summaryrefslogtreecommitdiff
path: root/arch/x86/mm/pat
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pat')
-rw-r--r--arch/x86/mm/pat/cpa-test.c4
-rw-r--r--arch/x86/mm/pat/memtype.c157
-rw-r--r--arch/x86/mm/pat/set_memory.c123
3 files changed, 146 insertions, 138 deletions
diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c
index 423b21e80929..3d2f7f0a6ed1 100644
--- a/arch/x86/mm/pat/cpa-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
@@ -136,10 +136,10 @@ static int pageattr_test(void)
failed += print_split(&sa);
for (i = 0; i < NTEST; i++) {
- unsigned long pfn = prandom_u32_max(max_pfn_mapped);
+ unsigned long pfn = get_random_u32_below(max_pfn_mapped);
addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
- len[i] = prandom_u32_max(NPAGES);
+ len[i] = get_random_u32_below(NPAGES);
len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1);
if (len[i] == 0)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 66a209f7eb86..46de9cf5c91d 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -43,6 +43,7 @@
#include <linux/rbtree.h>
#include <asm/cacheflush.h>
+#include <asm/cacheinfo.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
@@ -60,41 +61,34 @@
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
-static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
-static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
-static bool __read_mostly pat_bp_enabled;
-static bool __read_mostly pat_cm_initialized;
+static u64 __ro_after_init pat_msr_val;
/*
* PAT support is enabled by default, but can be disabled for
* various user-requested or hardware-forced reasons:
*/
-void pat_disable(const char *msg_reason)
+static void __init pat_disable(const char *msg_reason)
{
if (pat_disabled)
return;
- if (pat_bp_initialized) {
- WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
- return;
- }
-
pat_disabled = true;
pr_info("x86/PAT: %s\n", msg_reason);
+
+ memory_caching_control &= ~CACHE_PAT;
}
static int __init nopat(char *str)
{
pat_disable("PAT support disabled via boot option.");
- pat_force_disabled = true;
return 0;
}
early_param("nopat", nopat);
bool pat_enabled(void)
{
- return pat_bp_enabled;
+ return !pat_disabled;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@@ -192,7 +186,8 @@ enum {
#define CM(c) (_PAGE_CACHE_MODE_ ## c)
-static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
+static enum page_cache_mode __init pat_get_cache_mode(unsigned int pat_val,
+ char *msg)
{
enum page_cache_mode cache;
char *cache_mode;
@@ -219,14 +214,12 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
-static void __init_cache_modes(u64 pat)
+static void __init init_cache_modes(u64 pat)
{
enum page_cache_mode cache;
char pat_msg[33];
int i;
- WARN_ON_ONCE(pat_cm_initialized);
-
pat_msg[32] = 0;
for (i = 7; i >= 0; i--) {
cache = pat_get_cache_mode((pat >> (i * 8)) & 7,
@@ -234,34 +227,9 @@ static void __init_cache_modes(u64 pat)
update_cache_mode_entry(i, cache);
}
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
-
- pat_cm_initialized = true;
}
-#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
-
-static void pat_bp_init(u64 pat)
-{
- u64 tmp_pat;
-
- if (!boot_cpu_has(X86_FEATURE_PAT)) {
- pat_disable("PAT not supported by the CPU.");
- return;
- }
-
- rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
- if (!tmp_pat) {
- pat_disable("PAT support disabled by the firmware.");
- return;
- }
-
- wrmsrl(MSR_IA32_CR_PAT, pat);
- pat_bp_enabled = true;
-
- __init_cache_modes(pat);
-}
-
-static void pat_ap_init(u64 pat)
+void pat_cpu_init(void)
{
if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
@@ -271,30 +239,39 @@ static void pat_ap_init(u64 pat)
panic("x86/PAT: PAT enabled, but not supported by secondary CPU\n");
}
- wrmsrl(MSR_IA32_CR_PAT, pat);
+ wrmsrl(MSR_IA32_CR_PAT, pat_msr_val);
}
-void __init init_cache_modes(void)
+/**
+ * pat_bp_init - Initialize the PAT MSR value and PAT table
+ *
+ * This function initializes PAT MSR value and PAT table with an OS-defined
+ * value to enable additional cache attributes, WC, WT and WP.
+ *
+ * This function prepares the calls of pat_cpu_init() via cache_cpu_init()
+ * on all CPUs.
+ */
+void __init pat_bp_init(void)
{
- u64 pat = 0;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+#define PAT(p0, p1, p2, p3, p4, p5, p6, p7) \
+ (((u64)PAT_ ## p0) | ((u64)PAT_ ## p1 << 8) | \
+ ((u64)PAT_ ## p2 << 16) | ((u64)PAT_ ## p3 << 24) | \
+ ((u64)PAT_ ## p4 << 32) | ((u64)PAT_ ## p5 << 40) | \
+ ((u64)PAT_ ## p6 << 48) | ((u64)PAT_ ## p7 << 56))
- if (pat_cm_initialized)
- return;
- if (boot_cpu_has(X86_FEATURE_PAT)) {
- /*
- * CPU supports PAT. Set PAT table to be consistent with
- * PAT MSR. This case supports "nopat" boot option, and
- * virtual machine environments which support PAT without
- * MTRRs. In specific, Xen has unique setup to PAT MSR.
- *
- * If PAT MSR returns 0, it is considered invalid and emulates
- * as No PAT.
- */
- rdmsrl(MSR_IA32_CR_PAT, pat);
- }
+ if (!IS_ENABLED(CONFIG_X86_PAT))
+ pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
+
+ if (!cpu_feature_enabled(X86_FEATURE_PAT))
+ pat_disable("PAT not supported by the CPU.");
+ else
+ rdmsrl(MSR_IA32_CR_PAT, pat_msr_val);
+
+ if (!pat_msr_val) {
+ pat_disable("PAT support disabled by the firmware.");
- if (!pat) {
/*
* No PAT. Emulate the PAT table that corresponds to the two
* cache bits, PWT (Write Through) and PCD (Cache Disable).
@@ -313,40 +290,22 @@ void __init init_cache_modes(void)
* NOTE: When WC or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
- pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
- } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
- /*
- * Clearly PAT is enabled underneath. Allow pat_enabled() to
- * reflect this.
- */
- pat_bp_enabled = true;
+ pat_msr_val = PAT(WB, WT, UC_MINUS, UC, WB, WT, UC_MINUS, UC);
}
- __init_cache_modes(pat);
-}
-
-/**
- * pat_init - Initialize the PAT MSR and PAT table on the current CPU
- *
- * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC, WT and WP.
- *
- * This function must be called on all CPUs using the specific sequence of
- * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
- * procedure for PAT.
- */
-void pat_init(void)
-{
- u64 pat;
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
-#ifndef CONFIG_X86_PAT
- pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
-#endif
-
- if (pat_disabled)
+ /*
+ * Xen PV doesn't allow to set PAT MSR, but all cache modes are
+ * supported.
+ * When running as TDX guest setting the PAT MSR won't work either
+ * due to the requirement to set CR0.CD when doing so. Rely on
+ * firmware to have set the PAT MSR correctly.
+ */
+ if (pat_disabled ||
+ cpu_feature_enabled(X86_FEATURE_XENPV) ||
+ cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ init_cache_modes(pat_msr_val);
return;
+ }
if ((c->x86_vendor == X86_VENDOR_INTEL) &&
(((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
@@ -371,8 +330,7 @@ void pat_init(void)
* NOTE: When WT or WP is used, it is redirected to UC- per
* the default setup in __cachemode2pte_tbl[].
*/
- pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
+ pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WC, UC_MINUS, UC);
} else {
/*
* Full PAT support. We put WT in slot 7 to improve
@@ -400,19 +358,14 @@ void pat_init(void)
* The reserved slots are unused, but mapped to their
* corresponding types in the presence of PAT errata.
*/
- pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
- PAT(4, WB) | PAT(5, WP) | PAT(6, UC_MINUS) | PAT(7, WT);
+ pat_msr_val = PAT(WB, WC, UC_MINUS, UC, WB, WP, UC_MINUS, WT);
}
- if (!pat_bp_initialized) {
- pat_bp_init(pat);
- pat_bp_initialized = true;
- } else {
- pat_ap_init(pat);
- }
-}
+ memory_caching_control |= CACHE_PAT;
+ init_cache_modes(pat_msr_val);
#undef PAT
+}
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2e5a045731de..356758b7d4b4 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/cc_platform.h>
#include <linux/set_memory.h>
+#include <linux/memregion.h>
#include <asm/e820/api.h>
#include <asm/processor.h>
@@ -219,6 +220,23 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
#ifdef CONFIG_X86_64
+/*
+ * The kernel image is mapped into two places in the virtual address space
+ * (addresses without KASLR, of course):
+ *
+ * 1. The kernel direct map (0xffff880000000000)
+ * 2. The "high kernel map" (0xffffffff81000000)
+ *
+ * We actually execute out of #2. If we get the address of a kernel symbol, it
+ * points to #2, but almost all physical-to-virtual translations point to #1.
+ *
+ * This is so that we can have both a directmap of all physical memory *and*
+ * take full advantage of the the limited (s32) immediate addressing range (2G)
+ * of x86_64.
+ *
+ * See Documentation/x86/x86_64/mm.rst for more detail.
+ */
+
static inline unsigned long highmap_start_pfn(void)
{
return __pa_symbol(_text) >> PAGE_SHIFT;
@@ -330,6 +348,23 @@ void arch_invalidate_pmem(void *addr, size_t size)
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
#endif
+#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+bool cpu_cache_has_invalidate_memregion(void)
+{
+ return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR);
+}
+EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM);
+
+int cpu_cache_invalidate_memregion(int res_desc)
+{
+ if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion()))
+ return -ENXIO;
+ wbinvd_on_all_cpus();
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM);
+#endif
+
static void __cpa_flush_all(void *arg)
{
unsigned long cache = (unsigned long)arg;
@@ -587,10 +622,6 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
{
unsigned long end;
- /* Kernel text is rw at boot up */
- if (system_state == SYSTEM_BOOTING)
- return new;
-
/*
* 32-bit has some unfixable W+X issues, like EFI code
* and writeable data being in the same page. Disable
@@ -747,11 +778,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
switch (level) {
case PG_LEVEL_1G:
phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
- offset = virt_addr & ~PUD_PAGE_MASK;
+ offset = virt_addr & ~PUD_MASK;
break;
case PG_LEVEL_2M:
phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
- offset = virt_addr & ~PMD_PAGE_MASK;
+ offset = virt_addr & ~PMD_MASK;
break;
default:
phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
@@ -1041,7 +1072,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
case PG_LEVEL_1G:
ref_prot = pud_pgprot(*(pud_t *)kpte);
ref_pfn = pud_pfn(*(pud_t *)kpte);
- pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+ pfninc = PMD_SIZE >> PAGE_SHIFT;
lpaddr = address & PUD_MASK;
lpinc = PMD_SIZE;
/*
@@ -1628,8 +1659,11 @@ repeat:
return err;
}
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary);
+/*
+ * Check the directmap and "high kernel map" 'aliases'.
+ */
static int cpa_process_alias(struct cpa_data *cpa)
{
struct cpa_data alias_cpa;
@@ -1653,6 +1687,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ /* Directmap always has NX set, do not modify. */
+ if (__supported_pte_mask & _PAGE_NX) {
+ alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
+ alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
+ }
+
cpa->force_flush_all = 1;
ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -1675,6 +1715,15 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ /*
+ * [_text, _brk_end) also covers data, do not modify NX except
+ * in cases where the highmap is the primary target.
+ */
+ if (__supported_pte_mask & _PAGE_NX) {
+ alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
+ alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
+ }
+
cpa->force_flush_all = 1;
/*
* The high mapping range is imprecise, so ignore the
@@ -1687,12 +1736,19 @@ static int cpa_process_alias(struct cpa_data *cpa)
return 0;
}
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary)
{
unsigned long numpages = cpa->numpages;
unsigned long rempages = numpages;
int ret = 0;
+ /*
+ * No changes, easy!
+ */
+ if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) &&
+ !cpa->force_split)
+ return ret;
+
while (rempages) {
/*
* Store the remaining nr of pages for the large page
@@ -1705,13 +1761,13 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
if (!debug_pagealloc_enabled())
spin_lock(&cpa_lock);
- ret = __change_page_attr(cpa, checkalias);
+ ret = __change_page_attr(cpa, primary);
if (!debug_pagealloc_enabled())
spin_unlock(&cpa_lock);
if (ret)
goto out;
- if (checkalias) {
+ if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) {
ret = cpa_process_alias(cpa);
if (ret)
goto out;
@@ -1739,7 +1795,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
struct page **pages)
{
struct cpa_data cpa;
- int ret, cache, checkalias;
+ int ret, cache;
memset(&cpa, 0, sizeof(cpa));
@@ -1785,20 +1841,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
cpa.numpages = numpages;
cpa.mask_set = mask_set;
cpa.mask_clr = mask_clr;
- cpa.flags = 0;
+ cpa.flags = in_flag;
cpa.curpage = 0;
cpa.force_split = force_split;
- if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
- cpa.flags |= in_flag;
-
- /* No alias checking for _NX bit modifications */
- checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
- /* Has caller explicitly disabled alias checking? */
- if (in_flag & CPA_NO_CHECK_ALIAS)
- checkalias = 0;
-
- ret = __change_page_attr_set_clr(&cpa, checkalias);
+ ret = __change_page_attr_set_clr(&cpa, 1);
/*
* Check whether we really changed something:
@@ -2029,6 +2076,16 @@ int set_memory_ro(unsigned long addr, int numpages)
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
}
+int set_memory_rox(unsigned long addr, int numpages)
+{
+ pgprot_t clr = __pgprot(_PAGE_RW);
+
+ if (__supported_pte_mask & _PAGE_NX)
+ clr.pgprot |= _PAGE_NX;
+
+ return change_page_attr_clear(&addr, numpages, clr, 0);
+}
+
int set_memory_rw(unsigned long addr, int numpages)
{
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
@@ -2041,11 +2098,9 @@ int set_memory_np(unsigned long addr, int numpages)
int set_memory_np_noalias(unsigned long addr, int numpages)
{
- int cpa_flags = CPA_NO_CHECK_ALIAS;
-
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
__pgprot(_PAGE_PRESENT), 0,
- cpa_flags, NULL);
+ CPA_NO_CHECK_ALIAS, NULL);
}
int set_memory_4k(unsigned long addr, int numpages)
@@ -2262,7 +2317,7 @@ static int __set_pages_p(struct page *page, int numpages)
.numpages = numpages,
.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
.mask_clr = __pgprot(0),
- .flags = 0};
+ .flags = CPA_NO_CHECK_ALIAS };
/*
* No alias checking needed for setting present flag. otherwise,
@@ -2270,7 +2325,7 @@ static int __set_pages_p(struct page *page, int numpages)
* mappings (this adds to complexity if we want to do this from
* atomic context especially). Let's keep it simple!
*/
- return __change_page_attr_set_clr(&cpa, 0);
+ return __change_page_attr_set_clr(&cpa, 1);
}
static int __set_pages_np(struct page *page, int numpages)
@@ -2281,7 +2336,7 @@ static int __set_pages_np(struct page *page, int numpages)
.numpages = numpages,
.mask_set = __pgprot(0),
.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
- .flags = 0};
+ .flags = CPA_NO_CHECK_ALIAS };
/*
* No alias checking needed for setting not present flag. otherwise,
@@ -2289,7 +2344,7 @@ static int __set_pages_np(struct page *page, int numpages)
* mappings (this adds to complexity if we want to do this from
* atomic context especially). Let's keep it simple!
*/
- return __change_page_attr_set_clr(&cpa, 0);
+ return __change_page_attr_set_clr(&cpa, 1);
}
int set_direct_map_invalid_noflush(struct page *page)
@@ -2360,7 +2415,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
.numpages = numpages,
.mask_set = __pgprot(0),
.mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
- .flags = 0,
+ .flags = CPA_NO_CHECK_ALIAS,
};
WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
@@ -2373,7 +2428,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
- retval = __change_page_attr_set_clr(&cpa, 0);
+ retval = __change_page_attr_set_clr(&cpa, 1);
__flush_tlb_all();
out:
@@ -2403,12 +2458,12 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
.numpages = numpages,
.mask_set = __pgprot(0),
.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
- .flags = 0,
+ .flags = CPA_NO_CHECK_ALIAS,
};
WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
- retval = __change_page_attr_set_clr(&cpa, 0);
+ retval = __change_page_attr_set_clr(&cpa, 1);
__flush_tlb_all();
return retval;