From 5557e831f68878ef202c0e5296235442cff9b41e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Nov 2019 09:50:19 +0100 Subject: x86/mm/pat: Disambiguate PAT-disabled boot messages Right now we have these four types of PAT-disabled boot messages: x86/PAT: PAT support disabled. x86/PAT: PAT MSR is 0, disabled. x86/PAT: MTRRs disabled, skipping PAT initialization too. x86/PAT: PAT not supported by CPU. The first message is ambiguous in that it doesn't signal that PAT is off due to a boot option. The second message doesn't really make it clear that this is the MSR value during early bootup and it's the firmware environment that disabled PAT support. The fourth message doesn't really make it clear that we disable PAT support because CONFIG_MTRR is off in the kernel. Clarify, harmonize and fix the spelling in these user-visible messages: x86/PAT: PAT support disabled via boot option. x86/PAT: PAT support disabled by the firmware. x86/PAT: PAT support disabled because CONFIG_MTRR is disabled in the kernel. x86/PAT: PAT not supported by the CPU. Also add a fifth message, in case PAT support is disabled at build time: x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel. Previously we'd just silently return from pat_init() without giving any indication that PAT support is off. Finally, clarify/extend some of the comments related to PAT initialization. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mtrr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index dbff1456d215..3337d2233aef 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -86,7 +86,7 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) } static inline void mtrr_bp_init(void) { - pat_disable("MTRRs disabled, skipping PAT initialization too."); + pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel."); } #define mtrr_ap_init() do {} while (0) -- cgit From ecdd6ee77b73d11fcf2ca6739e4d1fe590446599 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:30:44 +0100 Subject: x86/mm/pat: Standardize on memtype_*() prefix for APIs Half of our memtype APIs are memtype_ prefixed, the other half are _memtype suffixed: reserve_memtype() free_memtype() kernel_map_sync_memtype() io_reserve_memtype() io_free_memtype() memtype_check_insert() memtype_erase() memtype_lookup() memtype_copy_nth_element() Use prefixes consistently, like most other modern kernel APIs: reserve_memtype() => memtype_reserve() free_memtype() => memtype_free() kernel_map_sync_memtype() => memtype_kernel_map_sync() io_reserve_memtype() => memtype_reserve_io() io_free_memtype() => memtype_free_io() memtype_check_insert() => memtype_check_insert() memtype_erase() => memtype_erase() memtype_lookup() => memtype_lookup() memtype_copy_nth_element() => memtype_copy_nth_element() Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 92015c65fa2a..4a9a97d930e7 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -10,17 +10,17 @@ void pat_disable(const char *reason); extern void pat_init(void); extern void init_cache_modes(void); -extern int reserve_memtype(u64 start, u64 end, +extern int memtype_reserve(u64 start, u64 end, enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int free_memtype(u64 start, u64 end); +extern int memtype_free(u64 start, u64 end); -extern int kernel_map_sync_memtype(u64 base, unsigned long size, +extern int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm); -int io_reserve_memtype(resource_size_t start, resource_size_t end, +int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *pcm); -void io_free_memtype(resource_size_t start, resource_size_t end); +void memtype_free_io(resource_size_t start, resource_size_t end); bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); -- cgit From eb243d1d28663c9b92010973a6a3ffa947f682ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:33:57 +0100 Subject: x86/mm/pat: Rename => pat.h is a file whose main purpose is to provide the memtype_*() APIs. PAT is the low level hardware mechanism - but the high level abstraction is memtype. So name the header as well - this goes hand in hand with memtype.c and memtype_interval.c. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/memtype.h | 27 +++++++++++++++++++++++++++ arch/x86/include/asm/mtrr.h | 2 +- arch/x86/include/asm/pat.h | 27 --------------------------- arch/x86/include/asm/pci.h | 2 +- 4 files changed, 29 insertions(+), 29 deletions(-) create mode 100644 arch/x86/include/asm/memtype.h delete mode 100644 arch/x86/include/asm/pat.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h new file mode 100644 index 000000000000..ec18e38ae391 --- /dev/null +++ b/arch/x86/include/asm/memtype.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_MEMTYPE_H +#define _ASM_X86_MEMTYPE_H + +#include +#include + +bool pat_enabled(void); +void pat_disable(const char *reason); +extern void pat_init(void); +extern void init_cache_modes(void); + +extern int memtype_reserve(u64 start, u64 end, + enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); +extern int memtype_free(u64 start, u64 end); + +extern int memtype_kernel_map_sync(u64 base, unsigned long size, + enum page_cache_mode pcm); + +int memtype_reserve_io(resource_size_t start, resource_size_t end, + enum page_cache_mode *pcm); + +void memtype_free_io(resource_size_t start, resource_size_t end); + +bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); + +#endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 3337d2233aef..829df26fd7a3 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -24,7 +24,7 @@ #define _ASM_X86_MTRR_H #include -#include +#include /* diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h deleted file mode 100644 index 4a9a97d930e7..000000000000 --- a/arch/x86/include/asm/pat.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PAT_H -#define _ASM_X86_PAT_H - -#include -#include - -bool pat_enabled(void); -void pat_disable(const char *reason); -extern void pat_init(void); -extern void init_cache_modes(void); - -extern int memtype_reserve(u64 start, u64 end, - enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm); -extern int memtype_free(u64 start, u64 end); - -extern int memtype_kernel_map_sync(u64 base, unsigned long size, - enum page_cache_mode pcm); - -int memtype_reserve_io(resource_size_t start, resource_size_t end, - enum page_cache_mode *pcm); - -void memtype_free_io(resource_size_t start, resource_size_t end); - -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); - -#endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 90d0731fdcb6..c1fdd43fe187 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include struct pci_sysdata { -- cgit From 533d49b37a2b532354d3841a142173b8321818df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 15:45:32 +0100 Subject: x86/mm/pat: Clean up externs Half of the declarations have an 'extern', half of them not, use 'extern' consistently. This makes grepping for APIs easier, such as: dagon:~/tip> git grep -E '\ --- arch/x86/include/asm/memtype.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h index ec18e38ae391..9c2447b3555d 100644 --- a/arch/x86/include/asm/memtype.h +++ b/arch/x86/include/asm/memtype.h @@ -5,8 +5,8 @@ #include #include -bool pat_enabled(void); -void pat_disable(const char *reason); +extern bool pat_enabled(void); +extern void pat_disable(const char *reason); extern void pat_init(void); extern void init_cache_modes(void); @@ -17,11 +17,11 @@ extern int memtype_free(u64 start, u64 end); extern int memtype_kernel_map_sync(u64 base, unsigned long size, enum page_cache_mode pcm); -int memtype_reserve_io(resource_size_t start, resource_size_t end, +extern int memtype_reserve_io(resource_size_t start, resource_size_t end, enum page_cache_mode *pcm); -void memtype_free_io(resource_size_t start, resource_size_t end); +extern void memtype_free_io(resource_size_t start, resource_size_t end); -bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); +extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); #endif /* _ASM_X86_MEMTYPE_H */ -- cgit From 4efb56649132687b5093d90aa62887595e65a09d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 17:38:39 +0100 Subject: x86/mm: Tabulate the page table encoding definitions I got lost in trying to figure out which bits were enabled in one of the PTE masks, so let's make it pretty obvious at the definition site already: #define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) #define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) #define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) #define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) #define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) #define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) #define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) #define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) #define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) #define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) #define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) Especially security relevant bits like 'NX' or coherence related bits like 'G' are now super easy to read based on a single grep. We do the underscore gymnastics to not pollute the kernel's symbol namespace, and the longest line still fits into 80 columns, so this should be readable for everyone. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 143 ++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 69 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b5e49e6bac63..ea7400726d7a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -110,11 +110,6 @@ #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) -#define _PAGE_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |\ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE_NOENC (_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_ACCESSED | _PAGE_DIRTY) - /* * Set of bits not changed in pte_modify. The pte's * protection key is treated like _PAGE_RW, for @@ -136,80 +131,93 @@ */ #ifndef __ASSEMBLY__ enum page_cache_mode { - _PAGE_CACHE_MODE_WB = 0, - _PAGE_CACHE_MODE_WC = 1, + _PAGE_CACHE_MODE_WB = 0, + _PAGE_CACHE_MODE_WC = 1, _PAGE_CACHE_MODE_UC_MINUS = 2, - _PAGE_CACHE_MODE_UC = 3, - _PAGE_CACHE_MODE_WT = 4, - _PAGE_CACHE_MODE_WP = 5, - _PAGE_CACHE_MODE_NUM = 8 + _PAGE_CACHE_MODE_UC = 3, + _PAGE_CACHE_MODE_WT = 4, + _PAGE_CACHE_MODE_WP = 5, + + _PAGE_CACHE_MODE_NUM = 8 }; #endif -#define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) -#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) -#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) +#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -#define __PAGE_KERNEL_WP (__PAGE_KERNEL | _PAGE_CACHE_WP) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE) +#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) -#ifndef __ASSEMBLY__ +#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) +#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) -#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) +#define __PP _PAGE_PRESENT +#define __RW _PAGE_RW +#define _USR _PAGE_USER +#define ___A _PAGE_ACCESSED +#define ___D _PAGE_DIRTY +#define ___G _PAGE_GLOBAL +#define __NX _PAGE_NX + +#define _ENC _PAGE_ENC +#define __WP _PAGE_CACHE_WP +#define __NC _PAGE_NOCACHE +#define _PSE _PAGE_PSE + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pg(x) __pgprot(x) + +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) + +#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G) +#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0) +#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY_NOEXEC __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_COPY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) +#define PAGE_COPY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY __pg(__PP| 0|_USR|___A|__NX| 0| 0| 0) +#define PAGE_READONLY_EXEC __pg(__PP| 0|_USR|___A| 0| 0| 0| 0) + +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) +#define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) +#define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) +#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) +#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) +#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) +#define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) +#define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) + + +#define __PAGE_KERNEL_IO __PAGE_KERNEL +#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY | _PAGE_ENC) -#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) -#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) -#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) +#ifndef __ASSEMBLY__ -#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL) -#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP) +#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC) +#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC) +#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL | 0) +#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP | 0) -#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask) +#define __pgprot_mask(x) __pgprot((x) & __default_kernel_pte_mask) -#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC) -#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC) -#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC) -#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) -#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) +#define PAGE_KERNEL __pgprot_mask(__PAGE_KERNEL | _ENC) +#define PAGE_KERNEL_NOENC __pgprot_mask(__PAGE_KERNEL | 0) +#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) +#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) +#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) +#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) +#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) +#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) +#define PAGE_KERNEL_VVAR __pgprot_mask(__PAGE_KERNEL_VVAR | _ENC) -#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE) #endif /* __ASSEMBLY__ */ @@ -449,9 +457,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; extern uint8_t __pte2cachemode_tbl[8]; -- cgit From 1f059dfdf5d170dccbac92193be2fee3c1763384 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Nov 2019 08:19:36 +0100 Subject: mm/vmalloc: Add empty headers and use them from In the x86 MM code we'd like to untangle various types of historic header dependency spaghetti, but for this we'd need to pass to the generic vmalloc code various vmalloc related defines that customarily come via the low level arch header. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/vmalloc.h | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 arch/x86/include/asm/vmalloc.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 000000000000..ba6295fa5876 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#endif /* _ASM_X86_VMALLOC_H */ -- cgit From 186525bd6b83efc592672e2d6185e4d7c810d2b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Nov 2019 08:17:25 +0100 Subject: mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions - Untangle the somewhat incestous way of how VMALLOC_START is used all across the kernel, but is, on x86, defined deep inside one of the lowest level page table headers. It doesn't help that vmalloc.h only includes a single asm header: #include /* pgprot_t */ So there was no existing cross-arch way to decouple address layout definitions from page.h details. I used this: #ifndef VMALLOC_START # include #endif This way every architecture that wants to simplify page.h can do so. - Also on x86 we had a couple of LDT related inline functions that used the late-stage address space layout positions - but these could be uninlined without real trouble - the end result is cleaner this way as well. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: Linus Torvalds Cc: Andrew Morton Cc: Rik van Riel Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_entry_area.h | 10 +--- arch/x86/include/asm/mmu_context.h | 86 +++------------------------------ arch/x86/include/asm/pgtable_32_areas.h | 53 ++++++++++++++++++++ arch/x86/include/asm/pgtable_32_types.h | 57 ++-------------------- arch/x86/include/asm/pgtable_areas.h | 16 ++++++ arch/x86/include/asm/vmalloc.h | 2 + 6 files changed, 81 insertions(+), 143 deletions(-) create mode 100644 arch/x86/include/asm/pgtable_32_areas.h create mode 100644 arch/x86/include/asm/pgtable_areas.h (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 804734058c77..02c0078d3787 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 @@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); -/* Single page reserved for the readonly IDT mapping: */ -#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE -#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) - -#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) - -#define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) - extern struct cpu_entry_area *get_cpu_entry_area(int cpu); static inline struct entry_stack *cpu_entry_stack(int cpu) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5f33924e200f..b243234e90cb 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -69,14 +69,6 @@ struct ldt_struct { int slot; }; -/* This is a multiple of PAGE_SIZE. */ -#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) - -static inline void *ldt_slot_va(int slot) -{ - return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); -} - /* * Used for LDT copy/destruction. */ @@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif -static inline void load_mm_ldt(struct mm_struct *mm) -{ #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* READ_ONCE synchronizes with smp_store_release */ - ldt = READ_ONCE(mm->context.ldt); - - /* - * Any change to mm->context.ldt is followed by an IPI to all - * CPUs with the mm active. The LDT will not be freed until - * after the IPI is handled by all such CPUs. This means that, - * if the ldt_struct changes before we return, the values we see - * will be safe, and the new values will be loaded before we run - * any user code. - * - * NB: don't try to convert this to use RCU without extreme care. - * We would still need IRQs off, because we don't want to change - * the local LDT after an IPI loaded a newer value than the one - * that we can see. - */ - - if (unlikely(ldt)) { - if (static_cpu_has(X86_FEATURE_PTI)) { - if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { - /* - * Whoops -- either the new LDT isn't mapped - * (if slot == -1) or is mapped into a bogus - * slot (if slot > 1). - */ - clear_LDT(); - return; - } - - /* - * If page table isolation is enabled, ldt->entries - * will not be mapped in the userspace pagetables. - * Tell the CPU to access the LDT through the alias - * at ldt_slot_va(ldt->slot). - */ - set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); - } else { - set_ldt(ldt->entries, ldt->nr_entries); - } - } else { - clear_LDT(); - } +extern void load_mm_ldt(struct mm_struct *mm); +extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else +static inline void load_mm_ldt(struct mm_struct *mm) +{ clear_LDT(); -#endif } - static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - /* - * Load the LDT if either the old or new mm had an LDT. - * - * An mm will never go from having an LDT to not having an LDT. Two - * mms never share an LDT, so we don't gain anything by checking to - * see whether the LDT changed. There's also no guarantee that - * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, - * then prev->context.ldt will also be non-NULL. - * - * If we really cared, we could optimize the case where prev == next - * and we're exiting lazy mode. Most of the time, if this happens, - * we don't actually need to reload LDTR, but modify_ldt() is mostly - * used by legacy code and emulators where we don't need this level of - * performance. - * - * This uses | instead of || because it generates better code. - */ - if (unlikely((unsigned long)prev->context.ldt | - (unsigned long)next->context.ldt)) - load_mm_ldt(next); -#endif - DEBUG_LOCKS_WARN_ON(preemptible()); } +#endif -void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h new file mode 100644 index 000000000000..b6355416a15a --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_areas.h @@ -0,0 +1,53 @@ +#ifndef _ASM_X86_PGTABLE_32_AREAS_H +#define _ASM_X86_PGTABLE_32_AREAS_H + +#include + +/* + * Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) + +#ifndef __ASSEMBLY__ +extern bool __vmalloc_start_set; /* set once high_memory is set */ +#endif + +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE)) + +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) + +#define LDT_BASE_ADDR \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) + +#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) + +#define PKMAP_BASE \ + ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) +#endif + +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_AREAS_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0416d42e5bdd..5356a46b0373 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_PGTABLE_32_DEFS_H -#define _ASM_X86_PGTABLE_32_DEFS_H +#ifndef _ASM_X86_PGTABLE_32_TYPES_H +#define _ASM_X86_PGTABLE_32_TYPES_H /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -20,55 +20,4 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) - -#ifndef __ASSEMBLY__ -extern bool __vmalloc_start_set; /* set once high_memory is set */ -#endif - -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -/* - * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE. - * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c - * to avoid include recursion hell. - */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43) - -/* The +1 is for the readonly IDT page: */ -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) - -#define LDT_BASE_ADDR \ - ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) - -#define LDT_END_ADDR (LDT_BASE_ADDR + PMD_SIZE) - -#define PKMAP_BASE \ - ((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (LDT_BASE_ADDR - 2 * PAGE_SIZE) -#endif - -#define MODULES_VADDR VMALLOC_START -#define MODULES_END VMALLOC_END -#define MODULES_LEN (MODULES_VADDR - MODULES_END) - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) - -#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ +#endif /* _ASM_X86_PGTABLE_32_TYPES_H */ diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h new file mode 100644 index 000000000000..d34cce1b995c --- /dev/null +++ b/arch/x86/include/asm/pgtable_areas.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_PGTABLE_AREAS_H +#define _ASM_X86_PGTABLE_AREAS_H + +#ifdef CONFIG_X86_32 +# include +#endif + +/* Single page reserved for the readonly IDT mapping: */ +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) + +#endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h index ba6295fa5876..29837740b520 100644 --- a/arch/x86/include/asm/vmalloc.h +++ b/arch/x86/include/asm/vmalloc.h @@ -1,4 +1,6 @@ #ifndef _ASM_X86_VMALLOC_H #define _ASM_X86_VMALLOC_H +#include + #endif /* _ASM_X86_VMALLOC_H */ -- cgit From 58ec655a75731c83f403e9c04ffd66aa6b3cd4d5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:05 +0100 Subject: efi/libstub: Remove unused __efi_call_early() macro The macro __efi_call_early() is defined by various architectures but never used. Let's get rid of it. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index d028e9acdf1c..59c19e0b6027 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -233,9 +233,6 @@ static inline bool efi_is_64bit(void) __efi_early()->call(efi_table_attr(efi_boot_services, f, \ __efi_early()->boot_services), __VA_ARGS__) -#define __efi_call_early(f, ...) \ - __efi_early()->call((unsigned long)f, __VA_ARGS__); - #define efi_call_runtime(f, ...) \ __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ __efi_early()->runtime_services), __VA_ARGS__) -- cgit From a8147dba75b188bff87d4ad072db84a0b70d716d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:06 +0100 Subject: efi/x86: Rename efi_is_native() to efi_is_mixed() The ARM architecture does not permit combining 32-bit and 64-bit code at the same privilege level, and so EFI mixed mode is strictly a x86 concept. In preparation of turning the 32/64 bit distinction in shared stub code to a native vs mixed one, refactor x86's current use of the helper function efi_is_native() into efi_is_mixed(). Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-7-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 59c19e0b6027..6094e7f49a99 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -153,17 +153,19 @@ extern u64 efi_setup; #ifdef CONFIG_EFI -static inline bool efi_is_native(void) +static inline bool efi_is_mixed(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return false; + return IS_ENABLED(CONFIG_X86_64) && !efi_enabled(EFI_64BIT); } static inline bool efi_runtime_supported(void) { - if (efi_is_native()) + if (!efi_is_mixed()) return true; - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) + if (!efi_enabled(EFI_OLD_MEMMAP)) return true; return false; -- cgit From f958efe97596837f9504fc38d75ef8e284bc0ebd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:09 +0100 Subject: efi/libstub: Distinguish between native/mixed not 32/64 bit Currently, we support mixed mode by casting all boot time firmware calls to 64-bit explicitly on native 64-bit systems, and to 32-bit on 32-bit systems or 64-bit systems running with 32-bit firmware. Due to this explicit awareness of the bitness in the code, we do a lot of casting even on generic code that is shared with other architectures, where mixed mode does not even exist. This casting leads to loss of coverage of type checking by the compiler, which we should try to avoid. So instead of distinguishing between 32-bit vs 64-bit, distinguish between native vs mixed, and limit all the nasty casting and pointer mangling to the code that actually deals with mixed mode. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-10-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 6094e7f49a99..c27323cb49e5 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -222,21 +222,42 @@ static inline bool efi_is_64bit(void) return __efi_early()->is64; } -#define efi_table_attr(table, attr, instance) \ - (efi_is_64bit() ? \ - ((table##_64_t *)(unsigned long)instance)->attr : \ - ((table##_32_t *)(unsigned long)instance)->attr) +static inline bool efi_is_native(void) +{ + if (!IS_ENABLED(CONFIG_X86_64)) + return true; + return efi_is_64bit(); +} + +#define efi_mixed_mode_cast(attr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(u32, __typeof__(attr)), \ + (unsigned long)(attr), (attr)) + +#define efi_table_attr(table, attr, instance) ({ \ + __typeof__(((table##_t *)0)->attr) __ret; \ + if (efi_is_native()) { \ + __ret = ((table##_t *)(unsigned long)instance)->attr; \ + } else { \ + __ret = (__typeof__(__ret))efi_mixed_mode_cast( \ + ((table##_t *)(unsigned long)instance)->mixed_mode.attr);\ + } \ + __ret; \ +}) #define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call(efi_table_attr(protocol, f, instance), \ + __efi_early()->call((unsigned long) \ + efi_table_attr(protocol, f, instance), \ instance, ##__VA_ARGS__) #define efi_call_early(f, ...) \ - __efi_early()->call(efi_table_attr(efi_boot_services, f, \ + __efi_early()->call((unsigned long) \ + efi_table_attr(efi_boot_services, f, \ __efi_early()->boot_services), __VA_ARGS__) #define efi_call_runtime(f, ...) \ - __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ + __efi_early()->call((unsigned long) \ + efi_table_attr(efi_runtime_services, f, \ __efi_early()->runtime_services), __VA_ARGS__) extern bool efi_reboot_required(void); -- cgit From afc4cc71cf78a8d691023da8ebcc31c3394a1674 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:13 +0100 Subject: efi/libstub/x86: Avoid thunking for native firmware calls We use special wrapper routines to invoke firmware services in the native case as well as the mixed mode case. For mixed mode, the need is obvious, but for the native cases, we can simply rely on the compiler to generate the indirect call, given that GCC now has support for the MS calling convention (and has had it for quite some time now). Note that on i386, the decompressor and the EFI stub are not built with -mregparm=3 like the rest of the i386 kernel, so we can safely allow the compiler to emit the indirect calls here as well. So drop all the wrappers and indirection, and switch to either native calls, or direct calls into the thunk routine for mixed mode. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-14-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c27323cb49e5..001905daa110 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -152,6 +152,7 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI +extern efi_status_t efi64_thunk(u32, ...); static inline bool efi_is_mixed(void) { @@ -205,7 +206,6 @@ struct efi_config { u64 runtime_services; u64 boot_services; u64 text_output; - efi_status_t (*call)(unsigned long, ...); bool is64; } __packed; @@ -235,30 +235,36 @@ static inline bool efi_is_native(void) (unsigned long)(attr), (attr)) #define efi_table_attr(table, attr, instance) ({ \ - __typeof__(((table##_t *)0)->attr) __ret; \ + __typeof__(instance->attr) __ret; \ if (efi_is_native()) { \ - __ret = ((table##_t *)(unsigned long)instance)->attr; \ + __ret = instance->attr; \ } else { \ - __ret = (__typeof__(__ret))efi_mixed_mode_cast( \ - ((table##_t *)(unsigned long)instance)->mixed_mode.attr);\ + __ret = (__typeof__(__ret)) \ + efi_mixed_mode_cast(instance->mixed_mode.attr); \ } \ __ret; \ }) #define efi_call_proto(protocol, f, instance, ...) \ - __efi_early()->call((unsigned long) \ - efi_table_attr(protocol, f, instance), \ - instance, ##__VA_ARGS__) + (efi_is_native() \ + ? instance->f(instance, ##__VA_ARGS__) \ + : efi64_thunk(instance->mixed_mode.f, instance, ##__VA_ARGS__)) #define efi_call_early(f, ...) \ - __efi_early()->call((unsigned long) \ - efi_table_attr(efi_boot_services, f, \ - __efi_early()->boot_services), __VA_ARGS__) + (efi_is_native() \ + ? ((efi_boot_services_t *)(unsigned long) \ + __efi_early()->boot_services)->f(__VA_ARGS__) \ + : efi64_thunk(((efi_boot_services_t *)(unsigned long) \ + __efi_early()->boot_services)->mixed_mode.f, \ + __VA_ARGS__)) #define efi_call_runtime(f, ...) \ - __efi_early()->call((unsigned long) \ - efi_table_attr(efi_runtime_services, f, \ - __efi_early()->runtime_services), __VA_ARGS__) + (efi_is_native() \ + ? ((efi_runtime_services_t *)(unsigned long) \ + __efi_early()->runtime_services)->f(__VA_ARGS__)\ + : efi64_thunk(((efi_runtime_services_t *)(unsigned long)\ + __efi_early()->runtime_services)->mixed_mode.f, \ + __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); -- cgit From c3710de5065d63f8b16d160a118cc50ae09050b4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:17 +0100 Subject: efi/libstub/x86: Drop __efi_early() export and efi_config struct The various pointers we stash in the efi_config struct which we retrieve using __efi_early() are simply copies of the ones in the EFI system table, which we have started accessing directly in the previous patch. So drop all the __efi_early() related plumbing, as well as all the assembly code dealing with efi_config, which allows us to move the PE/COFF entry point to C code as well. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-18-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 001905daa110..1817f350618e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -200,32 +200,14 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( /* arch specific definitions used by the stub code */ -struct efi_config { - u64 image_handle; - u64 table; - u64 runtime_services; - u64 boot_services; - u64 text_output; - bool is64; -} __packed; - -__pure const struct efi_config *__efi_early(void); - -static inline bool efi_is_64bit(void) -{ - if (!IS_ENABLED(CONFIG_X86_64)) - return false; - - if (!IS_ENABLED(CONFIG_EFI_MIXED)) - return true; - - return __efi_early()->is64; -} +__pure bool efi_is_64bit(void); static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) return true; + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return true; return efi_is_64bit(); } @@ -252,18 +234,16 @@ static inline bool efi_is_native(void) #define efi_call_early(f, ...) \ (efi_is_native() \ - ? ((efi_boot_services_t *)(unsigned long) \ - __efi_early()->boot_services)->f(__VA_ARGS__) \ - : efi64_thunk(((efi_boot_services_t *)(unsigned long) \ - __efi_early()->boot_services)->mixed_mode.f, \ + ? efi_system_table()->boottime->f(__VA_ARGS__) \ + : efi64_thunk(efi_table_attr(efi_boot_services, \ + boottime, efi_system_table())->mixed_mode.f, \ __VA_ARGS__)) #define efi_call_runtime(f, ...) \ (efi_is_native() \ - ? ((efi_runtime_services_t *)(unsigned long) \ - __efi_early()->runtime_services)->f(__VA_ARGS__)\ - : efi64_thunk(((efi_runtime_services_t *)(unsigned long)\ - __efi_early()->runtime_services)->mixed_mode.f, \ + ? efi_system_table()->runtime->f(__VA_ARGS__) \ + : efi64_thunk(efi_table_attr(efi_runtime_services, \ + runtime, efi_system_table())->mixed_mode.f, \ __VA_ARGS__)) extern bool efi_reboot_required(void); -- cgit From 47c0fd39b7b81f51cc8f767c34a57d12289bdc60 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:21 +0100 Subject: efi/libstub: Drop protocol argument from efi_call_proto() macro After refactoring the mixed mode support code, efi_call_proto() no longer uses its protocol argument in any of its implementation, so let's remove it altogether. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-22-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 1817f350618e..b7cd14e3a634 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -227,10 +227,10 @@ static inline bool efi_is_native(void) __ret; \ }) -#define efi_call_proto(protocol, f, instance, ...) \ +#define efi_call_proto(inst, func, ...) \ (efi_is_native() \ - ? instance->f(instance, ##__VA_ARGS__) \ - : efi64_thunk(instance->mixed_mode.f, instance, ##__VA_ARGS__)) + ? inst->func(inst, ##__VA_ARGS__) \ + : efi64_thunk(inst->mixed_mode.func, inst, ##__VA_ARGS__)) #define efi_call_early(f, ...) \ (efi_is_native() \ -- cgit From 99ea8b1db2d23ac856bf3ee0673628df088a21ea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:22 +0100 Subject: efi/libstub: Drop 'table' argument from efi_table_attr() macro None of the definitions of the efi_table_attr() still refer to their 'table' argument so let's get rid of it entirely. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-23-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b7cd14e3a634..39814a0a92f7 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -216,16 +216,11 @@ static inline bool efi_is_native(void) __builtin_types_compatible_p(u32, __typeof__(attr)), \ (unsigned long)(attr), (attr)) -#define efi_table_attr(table, attr, instance) ({ \ - __typeof__(instance->attr) __ret; \ - if (efi_is_native()) { \ - __ret = instance->attr; \ - } else { \ - __ret = (__typeof__(__ret)) \ - efi_mixed_mode_cast(instance->mixed_mode.attr); \ - } \ - __ret; \ -}) +#define efi_table_attr(inst, attr) \ + (efi_is_native() \ + ? inst->attr \ + : (__typeof__(inst->attr)) \ + efi_mixed_mode_cast(inst->mixed_mode.attr)) #define efi_call_proto(inst, func, ...) \ (efi_is_native() \ @@ -235,16 +230,14 @@ static inline bool efi_is_native(void) #define efi_call_early(f, ...) \ (efi_is_native() \ ? efi_system_table()->boottime->f(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_boot_services, \ - boottime, efi_system_table())->mixed_mode.f, \ - __VA_ARGS__)) + : efi64_thunk(efi_table_attr(efi_system_table(), \ + boottime)->mixed_mode.f, __VA_ARGS__)) #define efi_call_runtime(f, ...) \ (efi_is_native() \ ? efi_system_table()->runtime->f(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_runtime_services, \ - runtime, efi_system_table())->mixed_mode.f, \ - __VA_ARGS__)) + : efi64_thunk(efi_table_attr(efi_system_table(), \ + runtime)->mixed_mode.f, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); -- cgit From 966291f6344d7eb6fc3204381a426bafa20a3d18 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Dec 2019 16:10:23 +0100 Subject: efi/libstub: Rename efi_call_early/_runtime macros to be more intuitive The macros efi_call_early and efi_call_runtime are used to call EFI boot services and runtime services, respectively. However, the naming is confusing, given that the early vs runtime distinction may suggest that these are used for calling the same set of services either early or late (== at runtime), while in reality, the sets of services they can be used with are completely disjoint, and efi_call_runtime is also only usable in 'early' code. So do a global sweep to replace all occurrences with efi_bs_call or efi_rt_call, respectively, where BS and RT match the idiom used by the UEFI spec to refer to boot time or runtime services. While at it, use 'func' as the macro parameter name for the function pointers, which is less likely to collide and cause weird build errors. Signed-off-by: Ard Biesheuvel Cc: Arvind Sankar Cc: Borislav Petkov Cc: James Morse Cc: Matt Fleming Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20191224151025.32482-24-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 39814a0a92f7..2d1378f19b74 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -227,17 +227,17 @@ static inline bool efi_is_native(void) ? inst->func(inst, ##__VA_ARGS__) \ : efi64_thunk(inst->mixed_mode.func, inst, ##__VA_ARGS__)) -#define efi_call_early(f, ...) \ +#define efi_bs_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->boottime->f(__VA_ARGS__) \ + ? efi_system_table()->boottime->func(__VA_ARGS__) \ : efi64_thunk(efi_table_attr(efi_system_table(), \ - boottime)->mixed_mode.f, __VA_ARGS__)) + boottime)->mixed_mode.func, __VA_ARGS__)) -#define efi_call_runtime(f, ...) \ +#define efi_rt_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->runtime->f(__VA_ARGS__) \ + ? efi_system_table()->runtime->func(__VA_ARGS__) \ : efi64_thunk(efi_table_attr(efi_system_table(), \ - runtime)->mixed_mode.f, __VA_ARGS__)) + runtime)->mixed_mode.func, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); -- cgit From 6cfcd6f001b42fdbe6948cd113a5024945a8f50f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:36 +0100 Subject: efi/x86: Re-disable RT services for 32-bit kernels running on 64-bit EFI Commit a8147dba75b1 ("efi/x86: Rename efi_is_native() to efi_is_mixed()") renamed and refactored efi_is_native() into efi_is_mixed(), but failed to take into account that these are not diametrical opposites. Mixed mode is a construct that permits 64-bit kernels to boot on 32-bit firmware, but there is another non-native combination which is supported, i.e., 32-bit kernels booting on 64-bit firmware, but only for boot and not for runtime services. Also, mixed mode can be disabled in Kconfig, in which case the 64-bit kernel can still be booted from 32-bit firmware, but without access to runtime services. Due to this oversight, efi_runtime_supported() now incorrectly returns true for such configurations, resulting in crashes at boot. So fix this by making efi_runtime_supported() aware of this. As a side effect, some efi_thunk_xxx() stubs have become obsolete, so remove them as well. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-4-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 2d1378f19b74..b35b5d423e9d 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -163,10 +163,10 @@ static inline bool efi_is_mixed(void) static inline bool efi_runtime_supported(void) { - if (!efi_is_mixed()) + if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - if (!efi_enabled(EFI_OLD_MEMMAP)) + if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) return true; return false; @@ -176,7 +176,6 @@ extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); -#ifdef CONFIG_EFI_MIXED extern void efi_thunk_runtime_setup(void); extern efi_status_t efi_thunk_set_virtual_address_map( void *phys_set_virtual_address_map, @@ -184,19 +183,6 @@ extern efi_status_t efi_thunk_set_virtual_address_map( unsigned long descriptor_size, u32 descriptor_version, efi_memory_desc_t *virtual_map); -#else -static inline void efi_thunk_runtime_setup(void) {} -static inline efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - return EFI_SUCCESS; -} -#endif /* CONFIG_EFI_MIXED */ - /* arch specific definitions used by the stub code */ -- cgit From 89ed486532c4d155565cc4b7984a918ee3c58f80 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:38 +0100 Subject: efi/x86: Avoid redundant cast of EFI firmware service pointer All EFI firmware call prototypes have been annotated as __efiapi, permitting us to attach attributes regarding the calling convention by overriding __efiapi to an architecture specific value. On 32-bit x86, EFI firmware calls use the plain calling convention where all arguments are passed via the stack, and cleaned up by the caller. Let's add this to the __efiapi definition so we no longer need to cast the function pointers before invoking them. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-6-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index b35b5d423e9d..09c3fc468793 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -51,13 +51,7 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); }) -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ -#define arch_efi_call_virt(p, f, args...) \ -({ \ - ((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args); \ -}) +#define arch_efi_call_virt(p, f, args...) p->f(args) #define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) -- cgit From 6982947045734480b8b57521e8068073fe36bd14 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:40 +0100 Subject: efi/x86: Split SetVirtualAddresMap() wrappers into 32 and 64 bit versions Split the phys_efi_set_virtual_address_map() routine into 32 and 64 bit versions, so we can simplify them individually in subsequent patches. There is very little overlap between the logic anyway, and this has already been factored out in prolog/epilog routines which are completely different between 32 bit and 64 bit. So let's take it one step further, and get rid of the overlap completely. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-8-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 09c3fc468793..e29e5dc0b750 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -61,8 +61,6 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); extern asmlinkage u64 efi_call(void *fp, ...); -#define efi_call_phys(f, args...) efi_call((f), args) - /* * struct efi_scratch - Scratch space used while switching to/from efi_mm * @phys_stack: stack used during EFI Mixed Mode @@ -115,8 +113,6 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern struct efi_scratch efi_scratch; extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern pgd_t * __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_print_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); @@ -177,6 +173,10 @@ extern efi_status_t efi_thunk_set_virtual_address_map( unsigned long descriptor_size, u32 descriptor_version, efi_memory_desc_t *virtual_map); +efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map); /* arch specific definitions used by the stub code */ -- cgit From a46d674068b69b3897fc0d659e25f74b7ab52647 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:41 +0100 Subject: efi/x86: Simplify i386 efi_call_phys() firmware call wrapper The variadic efi_call_phys() wrapper that exists on i386 was originally created to call into any EFI firmware runtime service, but in practice, we only use it once, to call SetVirtualAddressMap() during early boot. The flexibility provided by the variadic nature also makes it type unsafe, and makes the assembler code more complicated than needed, since it has to deal with an unknown number of arguments living on the stack. So clean this up, by renaming the helper to efi_call_svam(), and dropping the unneeded complexity. Let's also drop the reference to the efi_phys struct and grab the address from the EFI system table directly. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-9-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index e29e5dc0b750..cb08035b89a0 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -35,9 +35,6 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF #ifdef CONFIG_X86_32 - -extern asmlinkage unsigned long efi_call_phys(void *, ...); - #define arch_efi_call_virt_setup() \ ({ \ kernel_fpu_begin(); \ -- cgit From ea5e1919b44f09fce72d919fbb87f9611fc700a6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jan 2020 12:39:43 +0100 Subject: efi/x86: Simplify mixed mode call wrapper Calling 32-bit EFI runtime services from a 64-bit OS involves switching back to the flat mapping with a stack carved out of memory that is 32-bit addressable. There is no need to actually execute the 64-bit part of this routine from the flat mapping as well, as long as the entry and return address fit in 32 bits. There is also no need to preserve part of the calling context in global variables: we can simply push the old stack pointer value to the new stack, and keep the return address from the code32 section in EBX. While at it, move the conditional check whether to invoke the mixed mode version of SetVirtualAddressMap() into the 64-bit implementation of the wrapper routine. Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-11-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cb08035b89a0..e7e9c6e057f9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -164,12 +164,6 @@ extern void parse_efi_setup(u64 phys_addr, u32 data_len); extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); extern void efi_thunk_runtime_setup(void); -extern efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map); efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, -- cgit From 14b864f4b5c402fe1ca394042beeea6fdf54f8f5 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 3 Jan 2020 12:39:48 +0100 Subject: efi/x86: Check number of arguments to variadic functions On x86 we need to thunk through assembler stubs to call the EFI services for mixed mode, and for runtime services in 64-bit mode. The assembler stubs have limits on how many arguments it handles. Introduce a few macros to check that we do not try to pass too many arguments to the stubs. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-16-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 54 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index e7e9c6e057f9..cfc450085584 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, @@ -34,6 +35,45 @@ #define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF +/* + * The EFI services are called through variadic functions in many cases. These + * functions are implemented in assembler and support only a fixed number of + * arguments. The macros below allows us to check at build time that we don't + * try to call them with too many arguments. + * + * __efi_nargs() will return the number of arguments if it is 7 or less, and + * cause a BUILD_BUG otherwise. The limitations of the C preprocessor make it + * impossible to calculate the exact number of arguments beyond some + * pre-defined limit. The maximum number of arguments currently supported by + * any of the thunks is 7, so this is good enough for now and can be extended + * in the obvious way if we ever need more. + */ + +#define __efi_nargs(...) __efi_nargs_(__VA_ARGS__) +#define __efi_nargs_(...) __efi_nargs__(0, ##__VA_ARGS__, \ + __efi_arg_sentinel(7), __efi_arg_sentinel(6), \ + __efi_arg_sentinel(5), __efi_arg_sentinel(4), \ + __efi_arg_sentinel(3), __efi_arg_sentinel(2), \ + __efi_arg_sentinel(1), __efi_arg_sentinel(0)) +#define __efi_nargs__(_0, _1, _2, _3, _4, _5, _6, _7, n, ...) \ + __take_second_arg(n, \ + ({ BUILD_BUG_ON_MSG(1, "__efi_nargs limit exceeded"); 8; })) +#define __efi_arg_sentinel(n) , n + +/* + * __efi_nargs_check(f, n, ...) will cause a BUILD_BUG if the ellipsis + * represents more than n arguments. + */ + +#define __efi_nargs_check(f, n, ...) \ + __efi_nargs_check_(f, __efi_nargs(__VA_ARGS__), n) +#define __efi_nargs_check_(f, p, n) __efi_nargs_check__(f, p, n) +#define __efi_nargs_check__(f, p, n) ({ \ + BUILD_BUG_ON_MSG( \ + (p) > (n), \ + #f " called with too many arguments (" #p ">" #n ")"); \ +}) + #ifdef CONFIG_X86_32 #define arch_efi_call_virt_setup() \ ({ \ @@ -56,7 +96,12 @@ #define EFI_LOADER_SIGNATURE "EL64" -extern asmlinkage u64 efi_call(void *fp, ...); +extern asmlinkage u64 __efi_call(void *fp, ...); + +#define efi_call(...) ({ \ + __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ + __efi_call(__VA_ARGS__); \ +}) /* * struct efi_scratch - Scratch space used while switching to/from efi_mm @@ -139,7 +184,12 @@ struct efi_setup_data { extern u64 efi_setup; #ifdef CONFIG_EFI -extern efi_status_t efi64_thunk(u32, ...); +extern efi_status_t __efi64_thunk(u32, ...); + +#define efi64_thunk(...) ({ \ + __efi_nargs_check(efi64_thunk, 6, __VA_ARGS__); \ + __efi64_thunk(__VA_ARGS__); \ +}) static inline bool efi_is_mixed(void) { -- cgit From ea7d87f98fa9675076fb5ad208d889b217e83889 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Fri, 3 Jan 2020 12:39:49 +0100 Subject: efi/x86: Allow translating 64-bit arguments for mixed mode calls Introduce the ability to define macros to perform argument translation for the calls that need it, and define them for the boot services that we currently use. When calling 32-bit firmware methods in mixed mode, all output parameters that are 32-bit according to the firmware, but 64-bit in the kernel (ie OUT UINTN * or OUT VOID **) must be initialized in the kernel, or the upper 32 bits may contain garbage. Define macros that zero out the upper 32 bits of the output before invoking the firmware method. When a 32-bit EFI call takes 64-bit arguments, the mixed-mode call must push the two 32-bit halves as separate arguments onto the stack. This can be achieved by splitting the argument into its two halves when calling the assembler thunk. Define a macro to do this for the free_pages boot service. Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-17-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 71 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index cfc450085584..166f0386719e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -243,22 +243,83 @@ static inline bool efi_is_native(void) : (__typeof__(inst->attr)) \ efi_mixed_mode_cast(inst->mixed_mode.attr)) +/* + * The following macros allow translating arguments if necessary from native to + * mixed mode. The use case for this is to initialize the upper 32 bits of + * output parameters, and where the 32-bit method requires a 64-bit argument, + * which must be split up into two arguments to be thunked properly. + * + * As examples, the AllocatePool boot service returns the address of the + * allocation, but it will not set the high 32 bits of the address. To ensure + * that the full 64-bit address is initialized, we zero-init the address before + * calling the thunk. + * + * The FreePages boot service takes a 64-bit physical address even in 32-bit + * mode. For the thunk to work correctly, a native 64-bit call of + * free_pages(addr, size) + * must be translated to + * efi64_thunk(free_pages, addr & U32_MAX, addr >> 32, size) + * so that the two 32-bit halves of addr get pushed onto the stack separately. + */ + +static inline void *efi64_zero_upper(void *p) +{ + ((u32 *)p)[1] = 0; + return p; +} + +#define __efi64_argmap_free_pages(addr, size) \ + ((addr), 0, (size)) + +#define __efi64_argmap_get_memory_map(mm_size, mm, key, size, ver) \ + ((mm_size), (mm), efi64_zero_upper(key), efi64_zero_upper(size), (ver)) + +#define __efi64_argmap_allocate_pool(type, size, buffer) \ + ((type), (size), efi64_zero_upper(buffer)) + +#define __efi64_argmap_handle_protocol(handle, protocol, interface) \ + ((handle), (protocol), efi64_zero_upper(interface)) + +#define __efi64_argmap_locate_protocol(protocol, reg, interface) \ + ((protocol), (reg), efi64_zero_upper(interface)) + +/* + * The macros below handle the plumbing for the argument mapping. To add a + * mapping for a specific EFI method, simply define a macro + * __efi64_argmap_, following the examples above. + */ + +#define __efi64_thunk_map(inst, func, ...) \ + efi64_thunk(inst->mixed_mode.func, \ + __efi64_argmap(__efi64_argmap_ ## func(__VA_ARGS__), \ + (__VA_ARGS__))) + +#define __efi64_argmap(mapped, args) \ + __PASTE(__efi64_argmap__, __efi_nargs(__efi_eat mapped))(mapped, args) +#define __efi64_argmap__0(mapped, args) __efi_eval mapped +#define __efi64_argmap__1(mapped, args) __efi_eval args + +#define __efi_eat(...) +#define __efi_eval(...) __VA_ARGS__ + +/* The three macros below handle dispatching via the thunk if needed */ + #define efi_call_proto(inst, func, ...) \ (efi_is_native() \ ? inst->func(inst, ##__VA_ARGS__) \ - : efi64_thunk(inst->mixed_mode.func, inst, ##__VA_ARGS__)) + : __efi64_thunk_map(inst, func, inst, ##__VA_ARGS__)) #define efi_bs_call(func, ...) \ (efi_is_native() \ ? efi_system_table()->boottime->func(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_system_table(), \ - boottime)->mixed_mode.func, __VA_ARGS__)) + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + boottime), func, __VA_ARGS__)) #define efi_rt_call(func, ...) \ (efi_is_native() \ ? efi_system_table()->runtime->func(__VA_ARGS__) \ - : efi64_thunk(efi_table_attr(efi_system_table(), \ - runtime)->mixed_mode.func, __VA_ARGS__)) + : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ + runtime), func, __VA_ARGS__)) extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); -- cgit From 4444f8541dad16fefd9b8807ad1451e806ef1d94 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 3 Jan 2020 12:39:50 +0100 Subject: efi: Allow disabling PCI busmastering on bridges during boot Add an option to disable the busmaster bit in the control register on all PCI bridges before calling ExitBootServices() and passing control to the runtime kernel. System firmware may configure the IOMMU to prevent malicious PCI devices from being able to attack the OS via DMA. However, since firmware can't guarantee that the OS is IOMMU-aware, it will tear down IOMMU configuration when ExitBootServices() is called. This leaves a window between where a hostile device could still cause damage before Linux configures the IOMMU again. If CONFIG_EFI_DISABLE_PCI_DMA is enabled or "efi=disable_early_pci_dma" is passed on the command line, the EFI stub will clear the busmaster bit on all PCI bridges before ExitBootServices() is called. This will prevent any malicious PCI devices from being able to perform DMA until the kernel reenables busmastering after configuring the IOMMU. This option may cause failures with some poorly behaved hardware and should not be enabled without testing. The kernel commandline options "efi=disable_early_pci_dma" or "efi=no_disable_early_pci_dma" may be used to override the default. Note that PCI devices downstream from PCI bridges are disconnected from their drivers first, using the UEFI driver model API, so that DMA can be disabled safely at the bridge level. [ardb: disconnect PCI I/O handles first, as suggested by Arvind] Co-developed-by: Matthew Garrett Signed-off-by: Matthew Garrett Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Arvind Sankar Cc: Matthew Garrett Cc: linux-efi@vger.kernel.org Link: https://lkml.kernel.org/r/20200103113953.9571-18-ardb@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 166f0386719e..383f7a0fc170 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -283,6 +283,11 @@ static inline void *efi64_zero_upper(void *p) #define __efi64_argmap_locate_protocol(protocol, reg, interface) \ ((protocol), (reg), efi64_zero_upper(interface)) +/* PCI I/O */ +#define __efi64_argmap_get_location(protocol, seg, bus, dev, func) \ + ((protocol), efi64_zero_upper(seg), efi64_zero_upper(bus), \ + efi64_zero_upper(dev), efi64_zero_upper(func)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro -- cgit From 796eb8d26a57f917bf22d781666aeab491f5c4f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:33 +0100 Subject: efi/libstub/x86: Use const attribute for efi_is_64bit() Reshuffle the x86 stub code a bit so that we can tag the efi_is_64bit() function with the 'const' attribute, which permits the compiler to optimize away any redundant calls. Since we have two different entry points for 32 and 64 bit firmware in the startup code, this also simplifies the C code since we'll enter it with the efi_is64 variable already set. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-2-ardb@kernel.org --- arch/x86/include/asm/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 383f7a0fc170..0a58468a7203 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -221,7 +221,7 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, /* arch specific definitions used by the stub code */ -__pure bool efi_is_64bit(void); +__attribute_const__ bool efi_is_64bit(void); static inline bool efi_is_native(void) { -- cgit From 1f299fad1e312947c974c6a1d8a3a484f27a6111 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 13 Jan 2020 18:22:39 +0100 Subject: efi/x86: Limit EFI old memory map to SGI UV machines We carry a quirk in the x86 EFI code to switch back to an older method of mapping the EFI runtime services memory regions, because it was deemed risky at the time to implement a new method without providing a fallback to the old method in case problems arose. Such problems did arise, but they appear to be limited to SGI UV1 machines, and so these are the only ones for which the fallback gets enabled automatically (via a DMI quirk). The fallback can be enabled manually as well, by passing efi=old_map, but there is very little evidence that suggests that this is something that is being relied upon in the field. Given that UV1 support is not enabled by default by the distros (Ubuntu, Fedora), there is no point in carrying this fallback code all the time if there are no other users. So let's move it into the UV support code, and document that efi=old_map now requires this support code to be enabled. Note that efi=old_map has been used in the past on other SGI UV machines to work around kernel regressions in production, so we keep the option to enable it by hand, but only if the kernel was built with UV support. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200113172245.27925-8-ardb@kernel.org --- arch/x86/include/asm/efi.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0a58468a7203..86169a24b0d8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -20,13 +20,16 @@ * This is the main reason why we're doing stable VA mappings for RT * services. * - * This flag is used in conjunction with a chicken bit called - * "efi=old_map" which can be used as a fallback to the old runtime - * services mapping method in case there's some b0rkage with a - * particular EFI implementation (haha, it is hard to hold up the - * sarcasm here...). + * SGI UV1 machines are known to be incompatible with this scheme, so we + * provide an opt-out for these machines via a DMI quirk that sets the + * attribute below. */ -#define EFI_OLD_MEMMAP EFI_ARCH_1 +#define EFI_UV1_MEMMAP EFI_ARCH_1 + +static inline bool efi_have_uv1_memmap(void) +{ + return IS_ENABLED(CONFIG_X86_UV) && efi_enabled(EFI_UV1_MEMMAP); +} #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" @@ -119,7 +122,7 @@ struct efi_scratch { kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(&efi_mm); \ }) @@ -128,7 +131,7 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (!efi_enabled(EFI_OLD_MEMMAP)) \ + if (!efi_have_uv1_memmap()) \ efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ @@ -172,6 +175,8 @@ extern void efi_delete_dummy_variable(void); extern void efi_switch_mm(struct mm_struct *mm); extern void efi_recover_from_page_fault(unsigned long phys_addr); extern void efi_free_boot_services(void); +extern pgd_t * __init efi_uv1_memmap_phys_prolog(void); +extern void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd); struct efi_setup_data { u64 fw_vendor; @@ -203,10 +208,7 @@ static inline bool efi_runtime_supported(void) if (IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT)) return true; - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) - return true; - - return false; + return IS_ENABLED(CONFIG_EFI_MIXED); } extern void parse_efi_setup(u64 phys_addr, u32 data_len); -- cgit