From 318995b4f5fa814b9f9aa434ca845b9a2cb0ae02 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:46 -0800 Subject: powerpc: introduce pte_get_hash_gslot() helper Introduce pte_get_hash_gslot()() which returns the global slot number of the HPTE in the global hash table. This function will come in handy as we work towards re-arranging the PTE bits in the later patches. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 655a5a9a183d..b35bd0202934 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1592,6 +1592,24 @@ static inline void tm_flush_hash_page(int local) } #endif +/* + * Return the global hash slot, corresponding to the given PTE, which contains + * the HPTE. + */ +unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, + int ssize, real_pte_t rpte, unsigned int subpg_index) +{ + unsigned long hash, gslot, hidx; + + hash = hpt_hash(vpn, shift, ssize); + hidx = __rpte_to_hidx(rpte, subpg_index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + gslot += hidx & _PTEIDX_GROUP_IX; + return gslot; +} + /* WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ -- cgit From 9d2edb18486febea930ea028dd128544cc28f3fe Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:47 -0800 Subject: powerpc: Free up four 64K PTE bits in 4K backed HPTE pages Rearrange 64K PTE bits to free up bits 3, 4, 5 and 6, in the 4K backed HPTE pages.These bits continue to be used for 64K backed HPTE pages in this patch, but will be freed up in the next patch. The bit numbers are big-endian as defined in the ISA3.0 The patch does the following change to the 4k HTPE backed 64K PTE's format. H_PAGE_BUSY moves from bit 3 to bit 9 (B bit in the figure below) V0 which occupied bit 4 is not used anymore. V1 which occupied bit 5 is not used anymore. V2 which occupied bit 6 is not used anymore. V3 which occupied bit 7 is not used anymore. Before the patch, the 4k backed 64k PTE format was as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x|B|V0|V1|V2|V3|x| | |x|x|................|x|x|x|x| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' |S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' After the patch, the 4k backed 64k PTE format is as follows 0 1 2 3 4 5 6 7 8 9 10...........................63 : : : : : : : : : : : : v v v v v v v v v v v v ,-,-,-,-,--,--,--,--,-,-,-,-,-,------------------,-,-,-, |x|x|x| | | | | |x|B| |x|x|................|.|.|.|.| <- primary pte '_'_'_'_'__'__'__'__'_'_'_'_'_'________________'_'_'_'_' |S|G|I|X|S |G |I |X |S|G|I|X|..................|S|G|I|X| <- secondary pte '_'_'_'_'__'__'__'__'_'_'_'_'__________________'_'_'_'_' the four bits S,G,I,X (one quadruplet per 4k HPTE) that cache the hash-bucket slot value, is initialized to 1,1,1,1 indicating -- an invalid slot. If a HPTE gets cached in a 1111 slot(i.e 7th slot of secondary hash bucket), it is released immediately. In other words, even though 1111 is a valid slot value in the hash bucket, we consider it invalid and release the slot and the HPTE. This gives us the opportunity to determine the validity of S,G,I,X bits based on its contents and not on any of the bits V0,V1,V2 or V3 in the primary PTE When we release a HPTE cached in the 1111 slot we also release a legitimate slot in the primary hash bucket and unmap its corresponding HPTE. This is to ensure that we do get a HPTE cached in a slot of the primary hash bucket, the next time we retry. Though treating 1111 slot as invalid, reduces the number of available slots in the hash bucket and may have an effect on the performance, the probabilty of hitting a 1111 slot is extermely low. Compared to the current scheme, the above scheme reduces the number of false hash table updates significantly and has the added advantage of releasing four valuable PTE bits for other purpose. NOTE:even though bits 3, 4, 5, 6, 7 are not used when the 64K PTE is backed by 4k HPTE, they continue to be used if the PTE gets backed by 64k HPTE. The next patch will decouple that aswell, and truely release the bits. This idea was jointly developed by Paul Mackerras, Aneesh, Michael Ellermen and myself. 4K PTE format remains unchanged currently. The patch does the following code changes a) PTE flags are split between 64k and 4k header files. b) __hash_page_4K() is reimplemented to reflect the above logic. Acked-by: Balbir Singh Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b35bd0202934..88102b6c664c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -979,8 +979,9 @@ void __init hash__early_init_devtree(void) void __init hash__early_init_mmu(void) { +#ifndef CONFIG_PPC_64K_PAGES /* - * We have code in __hash_page_64K() and elsewhere, which assumes it can + * We have code in __hash_page_4K() and elsewhere, which assumes it can * do the following: * new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); * @@ -991,6 +992,7 @@ void __init hash__early_init_mmu(void) * with a BUILD_BUG_ON(). */ BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3))); +#endif /* CONFIG_PPC_64K_PAGES */ htab_init_page_sizes(); -- cgit From a8548686463b67d66084d51a2737dc8d49369876 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 6 Nov 2017 00:50:51 -0800 Subject: powerpc: use helper functions to get and set hash slots replace redundant code in __hash_page_4K() and flush_hash_page() with helper functions pte_get_hash_gslot() and pte_set_hidx() Reviewed-by: Aneesh Kumar K.V Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88102b6c664c..0c802ded6f21 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1618,23 +1618,18 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, unsigned long flags) { - unsigned long hash, index, shift, hidx, slot; + unsigned long index, shift, gslot; int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { - hash = hpt_hash(vpn, shift, ssize); - hidx = __rpte_to_hidx(pte, index); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; - DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); + gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index); + DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot); /* * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize, ssize, local); } pte_iterate_hashed_end(); -- cgit From d4748276ae14ce951a3254852dddc3675797c277 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 24 Dec 2017 01:15:50 +1000 Subject: powerpc/64s: Improve local TLB flush for boot and MCE on POWER9 There are several cases outside the normal address space management where a CPU's entire local TLB is to be flushed: 1. Booting the kernel, in case something has left stale entries in the TLB (e.g., kexec). 2. Machine check, to clean corrupted TLB entries. One other place where the TLB is flushed, is waking from deep idle states. The flush is a side-effect of calling ->cpu_restore with the intention of re-setting various SPRs. The flush itself is unnecessary because in the first case, the TLB should not acquire new corrupted TLB entries as part of sleep/wake (though they may be lost). This type of TLB flush is coded inflexibly, several times for each CPU type, and they have a number of problems with ISA v3.0B: - The current radix mode of the MMU is not taken into account, it is always done as a hash flushn For IS=2 (LPID-matching flush from host) and IS=3 with HV=0 (guest kernel flush), tlbie(l) is undefined if the R field does not match the current radix mode. - ISA v3.0B hash must flush the partition and process table caches as well. - ISA v3.0B radix must flush partition and process scoped translations, partition and process table caches, and also the page walk cache. So consolidate the flushing code and implement it in C and inline asm under the mm/ directory with the rest of the flush code. Add ISA v3.0B cases for radix and hash, and use the radix flush in radix environment. Provide a way for IS=2 (LPID flush) to specify the radix mode of the partition. Have KVM pass in the radix mode of the guest. Take out the flushes from early cputable/dt_cpu_ftrs detection hooks, and move it later in the boot process after, the MMU registers are set up and before relocation is first turned on. The TLB flush is no longer called when restoring from deep idle states. This was not be done as a separate step because booting secondaries uses the same cpu_restore as idle restore, which needs the TLB flush. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0c802ded6f21..69fa01e02b63 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1051,6 +1051,10 @@ void __init hash__early_init_mmu(void) pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); + + if (cpu_has_feature(CPU_FTR_ARCH_206) + && cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } #ifdef CONFIG_SMP @@ -1070,6 +1074,10 @@ void hash__early_init_mmu_secondary(void) } /* Initialize SLB */ slb_initialize(); + + if (cpu_has_feature(CPU_FTR_ARCH_206) + && cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_all(); } #endif /* CONFIG_SMP */ -- cgit From 1513c33d7174e87a079cfa2666cb9a3eba56a0ea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:08 +1000 Subject: powerpc/powernv: Remove real mode access limit for early allocations This removes the RMA limit on powernv platform, which constrains early allocations such as PACAs and stacks. There are still other restrictions that must be followed, such as bolted SLB limits, but real mode addressing has no constraints. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 69fa01e02b63..a386bd854b1c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1848,16 +1848,22 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); - /* On LPAR systems, the first entry is our RMA region, - * non-LPAR 64-bit hash MMU systems don't have a limitation - * on real mode access, but using the first entry works well - * enough. We also clamp it to 1G to avoid some funky things + /* + * On virtualized systems the first entry is our RMA region aka VRMA, + * non-virtualized 64-bit hash MMU systems don't have a limitation + * on real mode access. + * + * We also clamp it to 1G to avoid some funky things * such as RTAS bugs etc... */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); - /* Finally limit subsequent allocations */ - memblock_set_current_limit(ppc64_rma_size); + /* Finally limit subsequent allocations */ + memblock_set_current_limit(ppc64_rma_size); + } else { + ppc64_rma_size = ULONG_MAX; + } } #ifdef CONFIG_DEBUG_FS -- cgit From c610d65c0ad00f1b7bfe2f1eb8f867ca74741685 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 22 Dec 2017 21:17:12 +1000 Subject: powerpc/pseries: lift RTAS limit for hash With the previous patch to switch to 64-bit mode after returning from RTAS and before doing any memory accesses, the RMA limit need not be clamped to 1GB to avoid RTAS bugs. Keep the 1GB limit for older firmware (although this is more of a kernel concern than RTAS), and remove it starting with POWER9. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a386bd854b1c..64e704eefad1 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1853,11 +1853,13 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, * non-virtualized 64-bit hash MMU systems don't have a limitation * on real mode access. * - * We also clamp it to 1G to avoid some funky things - * such as RTAS bugs etc... + * For guests on platforms before POWER9, we clamp the it limit to 1G + * to avoid some funky things such as RTAS bugs etc... */ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + ppc64_rma_size = first_memblock_size; + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) + ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000); /* Finally limit subsequent allocations */ memblock_set_current_limit(ppc64_rma_size); -- cgit From 92e3da3cf193fd27996909956c12a23c0333da44 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:24 -0800 Subject: powerpc: initial pkey plumbing Basic plumbing to initialize the pkey system. Nothing is enabled yet. A later patch will enable it once all the infrastructure is in place. Signed-off-by: Ram Pai [mpe: Rework copyrights to use SPDX tags] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 64e704eefad1..8cd31ea9b243 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include -- cgit From a6590ca55f1f49912e17e4811ccae9a51bda8859 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:36 -0800 Subject: powerpc: Program HPTE key protection bits Map the PTE protection key bits to the HPTE key protection bits, while creating HPTE entries. Acked-by: Balbir Singh Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 8cd31ea9b243..a78e24cf93ff 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -233,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; + rflags |= pte_to_hpte_pkey_bits(pteflags); return rflags; } -- cgit From 087003e9ef7c1c5bec932387e47511429eff2a54 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Thu, 18 Jan 2018 17:50:41 -0800 Subject: powerpc: introduce get_mm_addr_key() helper get_mm_addr_key() helper returns the pkey associated with an address corresponding to a given mm_struct. Signed-off-by: Ram Pai Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a78e24cf93ff..462c34e7b01d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1581,6 +1581,30 @@ out_exit: local_irq_restore(flags); } +#ifdef CONFIG_PPC_MEM_KEYS +/* + * Return the protection key associated with the given address and the + * mm_struct. + */ +u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + pte_t *ptep; + u16 pkey = 0; + unsigned long flags; + + if (!mm || !mm->pgd) + return 0; + + local_irq_save(flags); + ptep = find_linux_pte(mm->pgd, address, NULL, NULL); + if (ptep) + pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); + local_irq_restore(flags); + + return pkey; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) { -- cgit From 10527e808123f4b12db604993638b368220e7814 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 28 Nov 2017 14:04:40 +0530 Subject: powerpc/hash: Skip non initialized page size in init_hpte_page_sizes One of the easiest way to test config with 4K HPTE is to disable 64K hardware page size like below. int __init htab_dt_scan_page_sizes(unsigned long node, size -= 3; prop += 3; base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { + if (base_idx < 0 || base_idx == MMU_PAGE_64K) { /* skip the pte encoding also */ prop += lpnum * 2; size -= lpnum * 2; But then this results in error in other part of the code such as MPSS parsing where we look at 4K base page size and 64K actual page size support. This patch fix MPSS parsing by ignoring the actual page sizes marked unsupported. In reality this can happen only with a corrupt device tree. But it is good to tighten the error check. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 462c34e7b01d..d97be8d20715 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -608,7 +608,7 @@ static void init_hpte_page_sizes(void) continue; /* not a supported page size */ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) { penc = mmu_psize_defs[bp].penc[ap]; - if (penc == -1) + if (penc == -1 || !mmu_psize_defs[ap].shift) continue; shift = mmu_psize_defs[ap].shift - LP_SHIFT; if (shift <= 0) -- cgit From 7339390d772ddee0447886d72c3aebc3b58d9583 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 17 Mar 2017 12:11:19 +1100 Subject: powerpc/pseries: Don't give a warning when HPT resizing isn't available As of 438cc81a41 "powerpc/pseries: Automatically resize HPT for memory hot add/remove" when running on the pseries platform, we always attempt to use the PAPR extension to resize the hashed page table (HPT) when we add or remove memory. This is fine, but when the extension is available we'll give a harmless, but scary warning. This patch suppresses the warning in this case. It will still warn if the feature is supposed to be available, but didn't work. Signed-off-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index d97be8d20715..7d07c7e17db6 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -774,7 +774,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) int rc; rc = mmu_hash_ops.resize_hpt(target_hpt_shift); - if (rc) + if (rc && (rc != -ENODEV)) printk(KERN_WARNING "Unable to resize hash page table to target order %d: %d\n", target_hpt_shift, rc); -- cgit