diff options
Diffstat (limited to 'arch/x86/virt')
-rw-r--r-- | arch/x86/virt/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/virt/svm/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/virt/svm/cmdline.c | 45 | ||||
-rw-r--r-- | arch/x86/virt/svm/sev.c | 691 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/tdx.c | 104 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/tdx.h | 40 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/tdx_global_metadata.c | 48 | ||||
-rw-r--r-- | arch/x86/virt/vmx/tdx/tdx_global_metadata.h | 25 |
8 files changed, 760 insertions, 196 deletions
diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile index 1e36502cd738..ea343fc392dc 100644 --- a/arch/x86/virt/Makefile +++ b/arch/x86/virt/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += vmx/ +obj-y += svm/ vmx/ diff --git a/arch/x86/virt/svm/Makefile b/arch/x86/virt/svm/Makefile index ef2a31bdcc70..eca6d71355fa 100644 --- a/arch/x86/virt/svm/Makefile +++ b/arch/x86/virt/svm/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KVM_AMD_SEV) += sev.o +obj-$(CONFIG_CPU_SUP_AMD) += cmdline.o diff --git a/arch/x86/virt/svm/cmdline.c b/arch/x86/virt/svm/cmdline.c new file mode 100644 index 000000000000..affa2759fa20 --- /dev/null +++ b/arch/x86/virt/svm/cmdline.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD SVM-SEV command line parsing support + * + * Copyright (C) 2023 - 2024 Advanced Micro Devices, Inc. + * + * Author: Michael Roth <michael.roth@amd.com> + */ + +#include <linux/string.h> +#include <linux/printk.h> +#include <linux/cache.h> +#include <linux/cpufeature.h> + +#include <asm/sev-common.h> + +struct sev_config sev_cfg __read_mostly; + +static int __init init_sev_config(char *str) +{ + char *s; + + while ((s = strsep(&str, ","))) { + if (!strcmp(s, "debug")) { + sev_cfg.debug = true; + continue; + } + + if (!strcmp(s, "nosnp")) { + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + continue; + } else { + goto warn; + } + } + +warn: + pr_info("SEV command-line option '%s' was not recognized\n", s); + } + + return 1; +} +__setup("sev=", init_sev_config); diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index cffe1157a90a..fc473ca12c44 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -18,6 +18,7 @@ #include <linux/cpumask.h> #include <linux/iommu.h> #include <linux/amd-iommu.h> +#include <linux/nospec.h> #include <asm/sev.h> #include <asm/processor.h> @@ -31,10 +32,29 @@ #include <asm/iommu.h> /* - * The RMP entry format is not architectural. The format is defined in PPR - * Family 19h Model 01h, Rev B1 processor. + * The RMP entry information as returned by the RMPREAD instruction. */ struct rmpentry { + u64 gpa; + u8 assigned :1, + rsvd1 :7; + u8 pagesize :1, + hpage_region_status :1, + rsvd2 :6; + u8 immutable :1, + rsvd3 :7; + u8 rsvd4; + u32 asid; +} __packed; + +/* + * The raw RMP entry format is not architectural. The format is defined in PPR + * Family 19h Model 01h, Rev B1 processor. This format represents the actual + * entry in the RMP table memory. The bitfield definitions are used for machines + * without the RMPREAD instruction (Zen3 and Zen4), otherwise the "hi" and "lo" + * fields are only used for dumping the raw data. + */ +struct rmpentry_raw { union { struct { u64 assigned : 1, @@ -58,12 +78,48 @@ struct rmpentry { */ #define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000 +/* + * For a non-segmented RMP table, use the maximum physical addressing as the + * segment size in order to always arrive at index 0 in the table. + */ +#define RMPTABLE_NON_SEGMENTED_SHIFT 52 + +struct rmp_segment_desc { + struct rmpentry_raw *rmp_entry; + u64 max_index; + u64 size; +}; + +/* + * Segmented RMP Table support. + * - The segment size is used for two purposes: + * - Identify the amount of memory covered by an RMP segment + * - Quickly locate an RMP segment table entry for a physical address + * + * - The RMP segment table contains pointers to an RMP table that covers + * a specific portion of memory. There can be up to 512 8-byte entries, + * one pages worth. + */ +#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0)) +#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20)) + +#define RST_SIZE SZ_4K +static struct rmp_segment_desc **rmp_segment_table __ro_after_init; +static unsigned int rst_max_index __ro_after_init = 512; + +static unsigned int rmp_segment_shift; +static u64 rmp_segment_size; +static u64 rmp_segment_mask; + +#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_shift) +#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_mask))) + +static u64 rmp_cfg; + /* Mask to apply to a PFN to get the first PFN of a 2MB page */ #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT) static u64 probed_rmp_base, probed_rmp_size; -static struct rmpentry *rmptable __ro_after_init; -static u64 rmptable_max_pfn __ro_after_init; static LIST_HEAD(snp_leaked_pages_list); static DEFINE_SPINLOCK(snp_leaked_pages_list_lock); @@ -77,7 +133,7 @@ static int __mfd_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -98,7 +154,7 @@ static int __snp_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -116,78 +172,351 @@ static __init void snp_enable(void *arg) __snp_enable(smp_processor_id()); } -#define RMP_ADDR_MASK GENMASK_ULL(51, 13) +static void __init __snp_fixup_e820_tables(u64 pa) +{ + if (IS_ALIGNED(pa, PMD_SIZE)) + return; -bool snp_probe_rmptable_info(void) + /* + * Handle cases where the RMP table placement by the BIOS is not + * 2M aligned and the kexec kernel could try to allocate + * from within that chunk which then causes a fatal RMP fault. + * + * The e820_table needs to be updated as it is converted to + * kernel memory resources and used by KEXEC_FILE_LOAD syscall + * to load kexec segments. + * + * The e820_table_firmware needs to be updated as it is exposed + * to sysfs and used by the KEXEC_LOAD syscall to load kexec + * segments. + * + * The e820_table_kexec needs to be updated as it passed to + * the kexec-ed kernel. + */ + pa = ALIGN_DOWN(pa, PMD_SIZE); + if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) { + pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa); + e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + if (!memblock_is_region_reserved(pa, PMD_SIZE)) + memblock_reserve(pa, PMD_SIZE); + } +} + +static void __init fixup_e820_tables_for_segmented_rmp(void) { - u64 max_rmp_pfn, calc_rmp_sz, rmp_sz, rmp_base, rmp_end; + u64 pa, *rst, size, mapped_size; + unsigned int i; - rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); - rdmsrl(MSR_AMD64_RMP_END, rmp_end); + __snp_fixup_e820_tables(probed_rmp_base); - if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { - pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ; + + __snp_fixup_e820_tables(pa + RST_SIZE); + + rst = early_memremap(pa, RST_SIZE); + if (!rst) + return; + + for (i = 0; i < rst_max_index; i++) { + pa = RST_ENTRY_SEGMENT_BASE(rst[i]); + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]); + if (!mapped_size) + continue; + + __snp_fixup_e820_tables(pa); + + /* + * Mapped size in GB. Mapped size is allowed to exceed + * the segment coverage size, but gets reduced to the + * segment coverage size. + */ + mapped_size <<= 30; + if (mapped_size > rmp_segment_size) + mapped_size = rmp_segment_size; + + /* Calculate the RMP segment size (16 bytes/page mapped) */ + size = PHYS_PFN(mapped_size) << 4; + + __snp_fixup_e820_tables(pa + size); + } + + early_memunmap(rst, RST_SIZE); +} + +static void __init fixup_e820_tables_for_contiguous_rmp(void) +{ + __snp_fixup_e820_tables(probed_rmp_base); + __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size); +} + +void __init snp_fixup_e820_tables(void) +{ + if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) { + fixup_e820_tables_for_segmented_rmp(); + } else { + fixup_e820_tables_for_contiguous_rmp(); + } +} + +static bool __init clear_rmptable_bookkeeping(void) +{ + void *bk; + + bk = memremap(probed_rmp_base, RMPTABLE_CPU_BOOKKEEPING_SZ, MEMREMAP_WB); + if (!bk) { + pr_err("Failed to map RMP bookkeeping area\n"); return false; } - if (rmp_base > rmp_end) { - pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); + memset(bk, 0, RMPTABLE_CPU_BOOKKEEPING_SZ); + + memunmap(bk); + + return true; +} + +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa) +{ + u64 rst_index, rmp_segment_size_max; + struct rmp_segment_desc *desc; + void *rmp_segment; + + /* Calculate the maximum size an RMP can be (16 bytes/page mapped) */ + rmp_segment_size_max = PHYS_PFN(rmp_segment_size) << 4; + + /* Validate the RMP segment size */ + if (segment_size > rmp_segment_size_max) { + pr_err("Invalid RMP size 0x%llx for configured segment size 0x%llx\n", + segment_size, rmp_segment_size_max); return false; } - rmp_sz = rmp_end - rmp_base + 1; + /* Validate the RMP segment table index */ + rst_index = RST_ENTRY_INDEX(pa); + if (rst_index >= rst_max_index) { + pr_err("Invalid RMP segment base address 0x%llx for configured segment size 0x%llx\n", + pa, rmp_segment_size); + return false; + } + + if (rmp_segment_table[rst_index]) { + pr_err("RMP segment descriptor already exists at index %llu\n", rst_index); + return false; + } + + rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB); + if (!rmp_segment) { + pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n", + segment_pa, segment_size); + return false; + } + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + memunmap(rmp_segment); + return false; + } + + desc->rmp_entry = rmp_segment; + desc->max_index = segment_size / sizeof(*desc->rmp_entry); + desc->size = segment_size; + + rmp_segment_table[rst_index] = desc; + + return true; +} + +static void __init free_rmp_segment_table(void) +{ + unsigned int i; + + for (i = 0; i < rst_max_index; i++) { + struct rmp_segment_desc *desc; + + desc = rmp_segment_table[i]; + if (!desc) + continue; + + memunmap(desc->rmp_entry); + + kfree(desc); + } + + free_page((unsigned long)rmp_segment_table); + + rmp_segment_table = NULL; +} + +/* Allocate the table used to index into the RMP segments */ +static bool __init alloc_rmp_segment_table(void) +{ + struct page *page; + + page = alloc_page(__GFP_ZERO); + if (!page) + return false; + + rmp_segment_table = page_address(page); + + return true; +} + +static bool __init setup_contiguous_rmptable(void) +{ + u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end; + + if (!probed_rmp_size) + return false; + + rmp_end = probed_rmp_base + probed_rmp_size - 1; /* - * Calculate the amount the memory that must be reserved by the BIOS to + * Calculate the amount of memory that must be reserved by the BIOS to * address the whole RAM, including the bookkeeping area. The RMP itself * must also be covered. */ max_rmp_pfn = max_pfn; - if (PHYS_PFN(rmp_end) > max_pfn) - max_rmp_pfn = PHYS_PFN(rmp_end); + if (PFN_UP(rmp_end) > max_pfn) + max_rmp_pfn = PFN_UP(rmp_end); calc_rmp_sz = (max_rmp_pfn << 4) + RMPTABLE_CPU_BOOKKEEPING_SZ; - - if (calc_rmp_sz > rmp_sz) { + if (calc_rmp_sz > probed_rmp_size) { pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", - calc_rmp_sz, rmp_sz); + calc_rmp_sz, probed_rmp_size); return false; } - probed_rmp_base = rmp_base; - probed_rmp_size = rmp_sz; + if (!alloc_rmp_segment_table()) + return false; - pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", - probed_rmp_base, probed_rmp_base + probed_rmp_size - 1); + /* Map only the RMP entries */ + rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ; + rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ; + + if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) { + free_rmp_segment_table(); + return false; + } return true; } +static bool __init setup_segmented_rmptable(void) +{ + u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max; + unsigned int i, max_index; + + if (!probed_rmp_base) + return false; + + if (!alloc_rmp_segment_table()) + return false; + + rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ; + rst = memremap(rst_pa, RST_SIZE, MEMREMAP_WB); + if (!rst) { + pr_err("Failed to map RMP segment table addr 0x%llx\n", rst_pa); + goto e_free; + } + + pr_info("Segmented RMP using %lluGB segments\n", rmp_segment_size >> 30); + + ram_pa_max = max_pfn << PAGE_SHIFT; + + max_index = 0; + ram_pa_end = 0; + for (i = 0; i < rst_max_index; i++) { + u64 rmp_segment, rmp_size, mapped_size; + + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]); + if (!mapped_size) + continue; + + max_index = i; + + /* + * Mapped size in GB. Mapped size is allowed to exceed the + * segment coverage size, but gets reduced to the segment + * coverage size. + */ + mapped_size <<= 30; + if (mapped_size > rmp_segment_size) { + pr_info("RMP segment %u mapped size (0x%llx) reduced to 0x%llx\n", + i, mapped_size, rmp_segment_size); + mapped_size = rmp_segment_size; + } + + rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]); + + /* Calculate the RMP segment size (16 bytes/page mapped) */ + rmp_size = PHYS_PFN(mapped_size) << 4; + + pa = (u64)i << rmp_segment_shift; + + /* + * Some segments may be for MMIO mapped above system RAM. These + * segments are used for Trusted I/O. + */ + if (pa < ram_pa_max) + ram_pa_end = pa + mapped_size; + + if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa)) + goto e_unmap; + + pr_info("RMP segment %u physical address [0x%llx - 0x%llx] covering [0x%llx - 0x%llx]\n", + i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1); + } + + if (ram_pa_max > ram_pa_end) { + pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n", + ram_pa_max, ram_pa_end); + goto e_unmap; + } + + /* Adjust the maximum index based on the found segments */ + rst_max_index = max_index + 1; + + memunmap(rst); + + return true; + +e_unmap: + memunmap(rst); + +e_free: + free_rmp_segment_table(); + + return false; +} + +static bool __init setup_rmptable(void) +{ + if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) { + return setup_segmented_rmptable(); + } else { + return setup_contiguous_rmptable(); + } +} + /* * Do the necessary preparations which are verified by the firmware as * described in the SNP_INIT_EX firmware command description in the SNP * firmware ABI spec. */ -static int __init snp_rmptable_init(void) +int __init snp_rmptable_init(void) { - void *rmptable_start; - u64 rmptable_size; + unsigned int i; u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - return 0; - - if (!amd_iommu_snp_en) - return 0; + if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP))) + return -ENOSYS; - if (!probed_rmp_size) - goto nosnp; + if (WARN_ON_ONCE(!amd_iommu_snp_en)) + return -ENOSYS; - rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); - if (!rmptable_start) { - pr_err("Failed to map RMP table\n"); - return 1; - } + if (!setup_rmptable()) + return -ENOSYS; /* * Check if SEV-SNP is already enabled, this can happen in case of @@ -197,7 +526,22 @@ static int __init snp_rmptable_init(void) if (val & MSR_AMD64_SYSCFG_SNP_EN) goto skip_enable; - memset(rmptable_start, 0, probed_rmp_size); + /* Zero out the RMP bookkeeping area */ + if (!clear_rmptable_bookkeeping()) { + free_rmp_segment_table(); + return -ENOSYS; + } + + /* Zero out the RMP entries */ + for (i = 0; i < rst_max_index; i++) { + struct rmp_segment_desc *desc; + + desc = rmp_segment_table[i]; + if (!desc) + continue; + + memset(desc->rmp_entry, 0, desc->size); + } /* Flush the caches to ensure that data is written before SNP is enabled. */ wbinvd_on_all_cpus(); @@ -208,12 +552,6 @@ static int __init snp_rmptable_init(void) on_each_cpu(snp_enable, NULL, 1); skip_enable: - rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ; - rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ; - - rmptable = (struct rmpentry *)rmptable_start; - rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1; - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); /* @@ -223,59 +561,214 @@ skip_enable: crash_kexec_post_notifiers = true; return 0; +} + +static void set_rmp_segment_info(unsigned int segment_shift) +{ + rmp_segment_shift = segment_shift; + rmp_segment_size = 1ULL << rmp_segment_shift; + rmp_segment_mask = rmp_segment_size - 1; +} + +#define RMP_ADDR_MASK GENMASK_ULL(51, 13) + +static bool probe_contiguous_rmptable_info(void) +{ + u64 rmp_sz, rmp_base, rmp_end; + + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); + rdmsrl(MSR_AMD64_RMP_END, rmp_end); + + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + return false; + } + + if (rmp_base > rmp_end) { + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); + return false; + } + + rmp_sz = rmp_end - rmp_base + 1; + + /* Treat the contiguous RMP table as a single segment */ + rst_max_index = 1; + + set_rmp_segment_info(RMPTABLE_NON_SEGMENTED_SHIFT); + + probed_rmp_base = rmp_base; + probed_rmp_size = rmp_sz; + + pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", + rmp_base, rmp_end); + + return true; +} + +static bool probe_segmented_rmptable_info(void) +{ + unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max; + u64 rmp_base, rmp_end; + + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); + if (!(rmp_base & RMP_ADDR_MASK)) { + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + return false; + } + + rdmsrl(MSR_AMD64_RMP_END, rmp_end); + WARN_ONCE(rmp_end & RMP_ADDR_MASK, + "Segmented RMP enabled but RMP_END MSR is non-zero\n"); + + /* Obtain the min and max supported RMP segment size */ + eax = cpuid_eax(0x80000025); + segment_shift_min = eax & GENMASK(5, 0); + segment_shift_max = (eax & GENMASK(11, 6)) >> 6; + + /* Verify the segment size is within the supported limits */ + segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg); + if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) { + pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n", + segment_shift, segment_shift_min, segment_shift_max); + return false; + } + + /* Override the max supported RST index if a hardware limit exists */ + ebx = cpuid_ebx(0x80000025); + if (ebx & BIT(10)) + rst_max_index = ebx & GENMASK(9, 0); + + set_rmp_segment_info(segment_shift); + + probed_rmp_base = rmp_base; + probed_rmp_size = 0; + + pr_info("Segmented RMP base table physical range [0x%016llx - 0x%016llx]\n", + rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RST_SIZE); -nosnp: - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - return -ENOSYS; + return true; +} + +bool snp_probe_rmptable_info(void) +{ + if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP)) + rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg); + + if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) + return probe_segmented_rmptable_info(); + else + return probe_contiguous_rmptable_info(); } /* - * This must be called after the IOMMU has been initialized. + * About the array_index_nospec() usage below: + * + * This function can get called by exported functions like + * snp_lookup_rmpentry(), which is used by the KVM #PF handler, among + * others, and since the @pfn passed in cannot always be trusted, + * speculation should be stopped as a protective measure. */ -device_initcall(snp_rmptable_init); - -static struct rmpentry *get_rmpentry(u64 pfn) +static struct rmpentry_raw *get_raw_rmpentry(u64 pfn) { - if (WARN_ON_ONCE(pfn > rmptable_max_pfn)) + u64 paddr, rst_index, segment_index; + struct rmp_segment_desc *desc; + + if (!rmp_segment_table) + return ERR_PTR(-ENODEV); + + paddr = pfn << PAGE_SHIFT; + + rst_index = RST_ENTRY_INDEX(paddr); + if (unlikely(rst_index >= rst_max_index)) + return ERR_PTR(-EFAULT); + + rst_index = array_index_nospec(rst_index, rst_max_index); + + desc = rmp_segment_table[rst_index]; + if (unlikely(!desc)) + return ERR_PTR(-EFAULT); + + segment_index = RMP_ENTRY_INDEX(paddr); + if (unlikely(segment_index >= desc->max_index)) return ERR_PTR(-EFAULT); - return &rmptable[pfn]; + segment_index = array_index_nospec(segment_index, desc->max_index); + + return desc->rmp_entry + segment_index; } -static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) +static int get_rmpentry(u64 pfn, struct rmpentry *e) { - struct rmpentry *large_entry, *entry; + struct rmpentry_raw *e_raw; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - return ERR_PTR(-ENODEV); + if (cpu_feature_enabled(X86_FEATURE_RMPREAD)) { + int ret; - entry = get_rmpentry(pfn); - if (IS_ERR(entry)) - return entry; + /* Binutils version 2.44 supports the RMPREAD mnemonic. */ + asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfd" + : "=a" (ret) + : "a" (pfn << PAGE_SHIFT), "c" (e) + : "memory", "cc"); + + return ret; + } + + e_raw = get_raw_rmpentry(pfn); + if (IS_ERR(e_raw)) + return PTR_ERR(e_raw); + + /* + * Map the raw RMP table entry onto the RMPREAD output format. + * The 2MB region status indicator (hpage_region_status field) is not + * calculated, since the overhead could be significant and the field + * is not used. + */ + memset(e, 0, sizeof(*e)); + e->gpa = e_raw->gpa << PAGE_SHIFT; + e->asid = e_raw->asid; + e->assigned = e_raw->assigned; + e->pagesize = e_raw->pagesize; + e->immutable = e_raw->immutable; + + return 0; +} + +static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *e, int *level) +{ + struct rmpentry e_large; + int ret; + + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return -ENODEV; + + ret = get_rmpentry(pfn, e); + if (ret) + return ret; /* * Find the authoritative RMP entry for a PFN. This can be either a 4K * RMP entry or a special large RMP entry that is authoritative for a * whole 2M area. */ - large_entry = get_rmpentry(pfn & PFN_PMD_MASK); - if (IS_ERR(large_entry)) - return large_entry; + ret = get_rmpentry(pfn & PFN_PMD_MASK, &e_large); + if (ret) + return ret; - *level = RMP_TO_PG_LEVEL(large_entry->pagesize); + *level = RMP_TO_PG_LEVEL(e_large.pagesize); - return entry; + return 0; } int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { - struct rmpentry *e; + struct rmpentry e; + int ret; - e = __snp_lookup_rmpentry(pfn, level); - if (IS_ERR(e)) - return PTR_ERR(e); + ret = __snp_lookup_rmpentry(pfn, &e, level); + if (ret) + return ret; - *assigned = !!e->assigned; + *assigned = !!e.assigned; return 0; } EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); @@ -288,20 +781,28 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); */ static void dump_rmpentry(u64 pfn) { + struct rmpentry_raw *e_raw; u64 pfn_i, pfn_end; - struct rmpentry *e; - int level; + struct rmpentry e; + int level, ret; - e = __snp_lookup_rmpentry(pfn, &level); - if (IS_ERR(e)) { - pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n", - pfn, PTR_ERR(e)); + ret = __snp_lookup_rmpentry(pfn, &e, &level); + if (ret) { + pr_err("Failed to read RMP entry for PFN 0x%llx, error %d\n", + pfn, ret); return; } - if (e->assigned) { + if (e.assigned) { + e_raw = get_raw_rmpentry(pfn); + if (IS_ERR(e_raw)) { + pr_err("Failed to read RMP contents for PFN 0x%llx, error %ld\n", + pfn, PTR_ERR(e_raw)); + return; + } + pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n", - pfn, e->lo, e->hi); + pfn, e_raw->lo, e_raw->hi); return; } @@ -320,16 +821,16 @@ static void dump_rmpentry(u64 pfn) pfn, pfn_i, pfn_end); while (pfn_i < pfn_end) { - e = __snp_lookup_rmpentry(pfn_i, &level); - if (IS_ERR(e)) { - pr_err("Error %ld reading RMP entry for PFN 0x%llx\n", - PTR_ERR(e), pfn_i); + e_raw = get_raw_rmpentry(pfn_i); + if (IS_ERR(e_raw)) { + pr_err("Error %ld reading RMP contents for PFN 0x%llx\n", + PTR_ERR(e_raw), pfn_i); pfn_i++; continue; } - if (e->lo || e->hi) - pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi); + if (e_raw->lo || e_raw->hi) + pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e_raw->lo, e_raw->hi); pfn_i++; } } @@ -363,7 +864,7 @@ int psmash(u64 pfn) unsigned long paddr = pfn << PAGE_SHIFT; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; if (!pfn_valid(pfn)) @@ -472,7 +973,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; level = RMP_TO_PG_LEVEL(state->pagesize); @@ -558,3 +1059,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages) spin_unlock(&snp_leaked_pages_list_lock); } EXPORT_SYMBOL_GPL(snp_leak_pages); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the local CPU. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + wbinvd(); +} diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 4d6826a76f78..7fdb37387886 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -27,14 +27,13 @@ #include <linux/log2.h> #include <linux/acpi.h> #include <linux/suspend.h> -#include <linux/acpi.h> #include <asm/page.h> #include <asm/special_insns.h> #include <asm/msr-index.h> #include <asm/msr.h> #include <asm/cpufeature.h> #include <asm/tdx.h> -#include <asm/intel-family.h> +#include <asm/cpu_device_id.h> #include <asm/processor.h> #include <asm/mce.h> #include "tdx.h" @@ -271,57 +270,15 @@ static int read_sys_metadata_field(u64 field_id, u64 *data) return 0; } -static int read_sys_metadata_field16(u64 field_id, - int offset, - struct tdx_tdmr_sysinfo *ts) -{ - u16 *ts_member = ((void *)ts) + offset; - u64 tmp; - int ret; - - if (WARN_ON_ONCE(MD_FIELD_ID_ELE_SIZE_CODE(field_id) != - MD_FIELD_ID_ELE_SIZE_16BIT)) - return -EINVAL; - - ret = read_sys_metadata_field(field_id, &tmp); - if (ret) - return ret; - - *ts_member = tmp; - - return 0; -} - -struct field_mapping { - u64 field_id; - int offset; -}; - -#define TD_SYSINFO_MAP(_field_id, _offset) \ - { .field_id = MD_FIELD_ID_##_field_id, \ - .offset = offsetof(struct tdx_tdmr_sysinfo, _offset) } - -/* Map TD_SYSINFO fields into 'struct tdx_tdmr_sysinfo': */ -static const struct field_mapping fields[] = { - TD_SYSINFO_MAP(MAX_TDMRS, max_tdmrs), - TD_SYSINFO_MAP(MAX_RESERVED_PER_TDMR, max_reserved_per_tdmr), - TD_SYSINFO_MAP(PAMT_4K_ENTRY_SIZE, pamt_entry_size[TDX_PS_4K]), - TD_SYSINFO_MAP(PAMT_2M_ENTRY_SIZE, pamt_entry_size[TDX_PS_2M]), - TD_SYSINFO_MAP(PAMT_1G_ENTRY_SIZE, pamt_entry_size[TDX_PS_1G]), -}; +#include "tdx_global_metadata.c" -static int get_tdx_tdmr_sysinfo(struct tdx_tdmr_sysinfo *tdmr_sysinfo) +static int check_features(struct tdx_sys_info *sysinfo) { - int ret; - int i; + u64 tdx_features0 = sysinfo->features.tdx_features0; - /* Populate 'tdmr_sysinfo' fields using the mapping structure above: */ - for (i = 0; i < ARRAY_SIZE(fields); i++) { - ret = read_sys_metadata_field16(fields[i].field_id, - fields[i].offset, - tdmr_sysinfo); - if (ret) - return ret; + if (!(tdx_features0 & TDX_FEATURES0_NO_RBP_MOD)) { + pr_err("frame pointer (RBP) clobber bug present, upgrade TDX module\n"); + return -EINVAL; } return 0; @@ -343,13 +300,13 @@ static int tdmr_size_single(u16 max_reserved_per_tdmr) } static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list, - struct tdx_tdmr_sysinfo *tdmr_sysinfo) + struct tdx_sys_info_tdmr *sysinfo_tdmr) { size_t tdmr_sz, tdmr_array_sz; void *tdmr_array; - tdmr_sz = tdmr_size_single(tdmr_sysinfo->max_reserved_per_tdmr); - tdmr_array_sz = tdmr_sz * tdmr_sysinfo->max_tdmrs; + tdmr_sz = tdmr_size_single(sysinfo_tdmr->max_reserved_per_tdmr); + tdmr_array_sz = tdmr_sz * sysinfo_tdmr->max_tdmrs; /* * To keep things simple, allocate all TDMRs together. @@ -368,7 +325,7 @@ static int alloc_tdmr_list(struct tdmr_info_list *tdmr_list, * at a given index in the TDMR list. */ tdmr_list->tdmr_sz = tdmr_sz; - tdmr_list->max_tdmrs = tdmr_sysinfo->max_tdmrs; + tdmr_list->max_tdmrs = sysinfo_tdmr->max_tdmrs; tdmr_list->nr_consumed_tdmrs = 0; return 0; @@ -922,25 +879,29 @@ static int tdmrs_populate_rsvd_areas_all(struct tdmr_info_list *tdmr_list, /* * Construct a list of TDMRs on the preallocated space in @tdmr_list * to cover all TDX memory regions in @tmb_list based on the TDX module - * TDMR global information in @tdmr_sysinfo. + * TDMR global information in @sysinfo_tdmr. */ static int construct_tdmrs(struct list_head *tmb_list, struct tdmr_info_list *tdmr_list, - struct tdx_tdmr_sysinfo *tdmr_sysinfo) + struct tdx_sys_info_tdmr *sysinfo_tdmr) { + u16 pamt_entry_size[TDX_PS_NR] = { + sysinfo_tdmr->pamt_4k_entry_size, + sysinfo_tdmr->pamt_2m_entry_size, + sysinfo_tdmr->pamt_1g_entry_size, + }; int ret; ret = fill_out_tdmrs(tmb_list, tdmr_list); if (ret) return ret; - ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, - tdmr_sysinfo->pamt_entry_size); + ret = tdmrs_set_up_pamt_all(tdmr_list, tmb_list, pamt_entry_size); if (ret) return ret; ret = tdmrs_populate_rsvd_areas_all(tdmr_list, tmb_list, - tdmr_sysinfo->max_reserved_per_tdmr); + sysinfo_tdmr->max_reserved_per_tdmr); if (ret) tdmrs_free_pamt_all(tdmr_list); @@ -1099,9 +1060,18 @@ static int init_tdmrs(struct tdmr_info_list *tdmr_list) static int init_tdx_module(void) { - struct tdx_tdmr_sysinfo tdmr_sysinfo; + struct tdx_sys_info sysinfo; int ret; + ret = get_tdx_sys_info(&sysinfo); + if (ret) + return ret; + + /* Check whether the kernel can support this module */ + ret = check_features(&sysinfo); + if (ret) + return ret; + /* * To keep things simple, assume that all TDX-protected memory * will come from the page allocator. Make sure all pages in the @@ -1118,17 +1088,13 @@ static int init_tdx_module(void) if (ret) goto out_put_tdxmem; - ret = get_tdx_tdmr_sysinfo(&tdmr_sysinfo); - if (ret) - goto err_free_tdxmem; - /* Allocate enough space for constructing TDMRs */ - ret = alloc_tdmr_list(&tdx_tdmr_list, &tdmr_sysinfo); + ret = alloc_tdmr_list(&tdx_tdmr_list, &sysinfo.tdmr); if (ret) goto err_free_tdxmem; /* Cover all TDX-usable memory regions in TDMRs */ - ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdmr_sysinfo); + ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &sysinfo.tdmr); if (ret) goto err_free_tdmrs; @@ -1427,9 +1393,9 @@ static void __init check_tdx_erratum(void) * private memory poisons that memory, and a subsequent read of * that memory triggers #MC. */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_EMERALDRAPIDS_X: + switch (boot_cpu_data.x86_vfm) { + case INTEL_SAPPHIRERAPIDS_X: + case INTEL_EMERALDRAPIDS_X: setup_force_cpu_bug(X86_BUG_TDX_PW_MCE); } } diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index b701f69485d3..4e3d533cdd61 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -3,6 +3,7 @@ #define _X86_VIRT_TDX_H #include <linux/bits.h> +#include "tdx_global_metadata.h" /* * This file contains both macros and data structures defined by the TDX @@ -26,35 +27,6 @@ #define PT_NDA 0x0 #define PT_RSVD 0x1 -/* - * Global scope metadata field ID. - * - * See Table "Global Scope Metadata", TDX module 1.5 ABI spec. - */ -#define MD_FIELD_ID_MAX_TDMRS 0x9100000100000008ULL -#define MD_FIELD_ID_MAX_RESERVED_PER_TDMR 0x9100000100000009ULL -#define MD_FIELD_ID_PAMT_4K_ENTRY_SIZE 0x9100000100000010ULL -#define MD_FIELD_ID_PAMT_2M_ENTRY_SIZE 0x9100000100000011ULL -#define MD_FIELD_ID_PAMT_1G_ENTRY_SIZE 0x9100000100000012ULL - -/* - * Sub-field definition of metadata field ID. - * - * See Table "MD_FIELD_ID (Metadata Field Identifier / Sequence Header) - * Definition", TDX module 1.5 ABI spec. - * - * - Bit 33:32: ELEMENT_SIZE_CODE -- size of a single element of metadata - * - * 0: 8 bits - * 1: 16 bits - * 2: 32 bits - * 3: 64 bits - */ -#define MD_FIELD_ID_ELE_SIZE_CODE(_field_id) \ - (((_field_id) & GENMASK_ULL(33, 32)) >> 32) - -#define MD_FIELD_ID_ELE_SIZE_16BIT 1 - struct tdmr_reserved_area { u64 offset; u64 size; @@ -80,6 +52,9 @@ struct tdmr_info { DECLARE_FLEX_ARRAY(struct tdmr_reserved_area, reserved_areas); } __packed __aligned(TDMR_INFO_ALIGNMENT); +/* Bit definitions of TDX_FEATURES0 metadata field */ +#define TDX_FEATURES0_NO_RBP_MOD BIT(18) + /* * Do not put any hardware-defined TDX structure representations below * this comment! @@ -99,13 +74,6 @@ struct tdx_memblock { int nid; }; -/* "TDMR info" part of "Global Scope Metadata" for constructing TDMRs */ -struct tdx_tdmr_sysinfo { - u16 max_tdmrs; - u16 max_reserved_per_tdmr; - u16 pamt_entry_size[TDX_PS_NR]; -}; - /* Warn if kernel has less than TDMR_NR_WARN TDMRs after allocation */ #define TDMR_NR_WARN 4 diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c new file mode 100644 index 000000000000..8027a24d1c6e --- /dev/null +++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Automatically generated functions to read TDX global metadata. + * + * This file doesn't compile on its own as it lacks of inclusion + * of SEAMCALL wrapper primitive which reads global metadata. + * Include this file to other C file instead. + */ + +static int get_tdx_sys_info_features(struct tdx_sys_info_features *sysinfo_features) +{ + int ret = 0; + u64 val; + + if (!ret && !(ret = read_sys_metadata_field(0x0A00000300000008, &val))) + sysinfo_features->tdx_features0 = val; + + return ret; +} + +static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr) +{ + int ret = 0; + u64 val; + + if (!ret && !(ret = read_sys_metadata_field(0x9100000100000008, &val))) + sysinfo_tdmr->max_tdmrs = val; + if (!ret && !(ret = read_sys_metadata_field(0x9100000100000009, &val))) + sysinfo_tdmr->max_reserved_per_tdmr = val; + if (!ret && !(ret = read_sys_metadata_field(0x9100000100000010, &val))) + sysinfo_tdmr->pamt_4k_entry_size = val; + if (!ret && !(ret = read_sys_metadata_field(0x9100000100000011, &val))) + sysinfo_tdmr->pamt_2m_entry_size = val; + if (!ret && !(ret = read_sys_metadata_field(0x9100000100000012, &val))) + sysinfo_tdmr->pamt_1g_entry_size = val; + + return ret; +} + +static int get_tdx_sys_info(struct tdx_sys_info *sysinfo) +{ + int ret = 0; + + ret = ret ?: get_tdx_sys_info_features(&sysinfo->features); + ret = ret ?: get_tdx_sys_info_tdmr(&sysinfo->tdmr); + + return ret; +} diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.h b/arch/x86/virt/vmx/tdx/tdx_global_metadata.h new file mode 100644 index 000000000000..6dd3c9695f59 --- /dev/null +++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Automatically generated TDX global metadata structures. */ +#ifndef _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H +#define _X86_VIRT_TDX_AUTO_GENERATED_TDX_GLOBAL_METADATA_H + +#include <linux/types.h> + +struct tdx_sys_info_features { + u64 tdx_features0; +}; + +struct tdx_sys_info_tdmr { + u16 max_tdmrs; + u16 max_reserved_per_tdmr; + u16 pamt_4k_entry_size; + u16 pamt_2m_entry_size; + u16 pamt_1g_entry_size; +}; + +struct tdx_sys_info { + struct tdx_sys_info_features features; + struct tdx_sys_info_tdmr tdmr; +}; + +#endif |