Merge tag 'powerpc-5.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman: - A large series adding wrappers for our interrupt handlers, so that irq/nmi/user tracking can be isolated in the wrappers rather than spread in each handler. - Conversion of the 32-bit syscall handling into C. - A series from Nick to streamline our TLB flushing when using the Radix MMU. - Switch to using queued spinlocks by default for 64-bit server CPUs. - A rework of our PCI probing so that it happens later in boot, when more generic infrastructure is available. - Two small fixes to allow 32-bit little-endian processes to run on 64-bit kernels. - Other smaller features, fixes & cleanups. Thanks to: Alexey Kardashevskiy, Ananth N Mavinakayanahalli, Aneesh Kumar K.V, Athira Rajeev, Bhaskar Chowdhury, Cédric Le Goater, Chengyang Fan, Christophe Leroy, Christopher M. Riedl, Fabiano Rosas, Florian Fainelli, Frederic Barrat, Ganesh Goudar, Hari Bathini, Jiapeng Chong, Joseph J Allen, Kajol Jain, Markus Elfring, Michal Suchanek, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Pingfan Liu, Po-Hsu Lin, Qian Cai, Ram Pai, Randy Dunlap, Sandipan Das, Stephen Rothwell, Tyrel Datwyler, Will Springer, Yury Norov, and Zheng Yongjun. * tag 'powerpc-5.12-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (188 commits) powerpc/perf: Adds support for programming of Thresholding in P10 powerpc/pci: Remove unimplemented prototypes powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user() powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size() powerpc/uaccess: get rid of small constant size cases in raw_copy_{to,from}_user() powerpc/64: Fix stack trace not displaying final frame powerpc/time: Remove get_tbl() powerpc/time: Avoid using get_tbl() spi: mpc52xx: Avoid using get_tbl() powerpc/syscall: Avoid storing 'current' in another pointer powerpc/32: Handle bookE debugging in C in syscall entry/exit powerpc/syscall: Do not check unsupported scv vector on PPC32 powerpc/32: Remove the counter in global_dbcr0 powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry powerpc/syscall: implement system call entry/exit logic in C for PPC32 powerpc/32: Always save non volatile GPRs at syscall entry powerpc/syscall: Change condition to check MSR_RI powerpc/syscall: Save r3 in regs->orig_r3 powerpc/syscall: Use is_compat_task() powerpc/syscall: Make interrupt.c buildable on PPC32 ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2021-02-22 14:34:00 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2021-02-22 14:34:00 -0800
commit: b12b47249688915e987a9a2a393b522f86f6b7ab (patch)
tree: eae34f7fa64474bb3123f7b69c411ade6127c41f /arch/powerpc/mm
parent: 6ff6f86bc4d02949b5688d69de1c89c310d62c44 (diff)
parent: 82d2c16b350f72aa21ac2a6860c542aa4b43a51e (diff)
14 files changed, 395 insertions, 246 deletions
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 3f972db17761..446d9de88ce4 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -6,4 +6,6 @@ ifdef CONFIG_KASAN
 CFLAGS_mmu.o  		+= -DDISABLE_BRANCH_PROFILING
 endif
 
-obj-y += mmu.o hash_low.o mmu_context.o tlb.o nohash_low.o
+obj-y += mmu.o mmu_context.o
+obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
+obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 859e5bd603ac..d7eb266a3f7a 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -234,7 +234,7 @@ void mmu_mark_initmem_nx(void)
 		if (is_module_segment(i << 28))
 			continue;
 
-		mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
+		mtsr(mfsr(i << 28) | 0x10000000, i << 28);
 	}
 }
 
diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index b5e9fff8c217..a688e1324ae5 100644
--- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -16,10 +16,6 @@
 unsigned int hpage_shift;
 EXPORT_SYMBOL(hpage_shift);
 
-extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
-				  unsigned long pa, unsigned long rlags,
-				  unsigned long vflags, int psize, int ssize);
-
 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, unsigned long flags,
 		     int ssize, unsigned int shift, unsigned int mmu_psize)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 73b06adb6eeb..581b20a2feaf 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -38,6 +38,7 @@
 #include <linux/pgtable.h>
 
 #include <asm/debugfs.h>
+#include <asm/interrupt.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -1143,10 +1144,10 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 	page = pte_page(pte);
 
 	/* page is dirty */
-	if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
+	if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) {
 		if (trap == 0x400) {
 			flush_dcache_icache_page(page);
-			set_bit(PG_arch_1, &page->flags);
+			set_bit(PG_dcache_clean, &page->flags);
 		} else
 			pp |= HPTE_R_N;
 	}
@@ -1288,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 		 unsigned long flags)
 {
 	bool is_thp;
-	enum ctx_state prev_state = exception_enter();
 	pgd_t *pgdir;
 	unsigned long vsid;
 	pte_t *ptep;
@@ -1490,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	DBG_LOW(" -> rc=%d\n", rc);
 
 bail:
-	exception_exit(prev_state);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(hash_page_mm);
@@ -1512,16 +1511,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
 }
 EXPORT_SYMBOL_GPL(hash_page);
 
-int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
-		unsigned long msr)
+DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
+DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
 {
+	unsigned long ea = regs->dar;
+	unsigned long dsisr = regs->dsisr;
 	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
 	unsigned long flags = 0;
-	struct mm_struct *mm = current->mm;
-	unsigned int region_id = get_region_id(ea);
+	struct mm_struct *mm;
+	unsigned int region_id;
+	long err;
 
+	region_id = get_region_id(ea);
 	if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
 		mm = &init_mm;
+	else
+		mm = current->mm;
 
 	if (dsisr & DSISR_NOHPTE)
 		flags |= HPTE_NOHPTE_UPDATE;
@@ -1537,13 +1542,66 @@ int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr,
 	 * 2) user space access kernel space.
 	 */
 	access |= _PAGE_PRIVILEGED;
-	if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
+	if (user_mode(regs) || (region_id == USER_REGION_ID))
 		access &= ~_PAGE_PRIVILEGED;
 
-	if (trap == 0x400)
+	if (regs->trap == 0x400)
 		access |= _PAGE_EXEC;
 
-	return hash_page_mm(mm, ea, access, trap, flags);
+	err = hash_page_mm(mm, ea, access, regs->trap, flags);
+	if (unlikely(err < 0)) {
+		// failed to instert a hash PTE due to an hypervisor error
+		if (user_mode(regs)) {
+			if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
+				_exception(SIGSEGV, regs, SEGV_ACCERR, ea);
+			else
+				_exception(SIGBUS, regs, BUS_ADRERR, ea);
+		} else {
+			bad_page_fault(regs, SIGBUS);
+		}
+		err = 0;
+	}
+
+	return err;
+}
+
+/*
+ * The _RAW interrupt entry checks for the in_nmi() case before
+ * running the full handler.
+ */
+DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
+{
+	unsigned long dsisr = regs->dsisr;
+	long err;
+
+	if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT)))
+		goto page_fault;
+
+	/*
+	 * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
+	 * don't call hash_page, just fail the fault. This is required to
+	 * prevent re-entrancy problems in the hash code, namely perf
+	 * interrupts hitting while something holds H_PAGE_BUSY, and taking a
+	 * hash fault. See the comment in hash_preload().
+	 *
+	 * We come here as a result of a DSI at a point where we don't want
+	 * to call hash_page, such as when we are accessing memory (possibly
+	 * user memory) inside a PMU interrupt that occurred while interrupts
+	 * were soft-disabled.  We want to invoke the exception handler for
+	 * the access, or panic if there isn't a handler.
+	 */
+	if (unlikely(in_nmi())) {
+		do_bad_page_fault_segv(regs);
+		return 0;
+	}
+
+	err = __do_hash_fault(regs);
+	if (err) {
+page_fault:
+		err = hash__do_page_fault(regs);
+	}
+
+	return err;
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
@@ -1843,27 +1901,6 @@ void flush_hash_range(unsigned long number, int local)
 	}
 }
 
-/*
- * low_hash_fault is called when we the low level hash code failed
- * to instert a PTE due to an hypervisor error
- */
-void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
-{
-	enum ctx_state prev_state = exception_enter();
-
-	if (user_mode(regs)) {
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-		if (rc == -2)
-			_exception(SIGSEGV, regs, SEGV_ACCERR, address);
-		else
-#endif
-			_exception(SIGBUS, regs, BUS_ADRERR, address);
-	} else
-		bad_page_fault(regs, address, SIGBUS);
-
-	exception_exit(prev_state);
-}
-
 long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
 			   unsigned long pa, unsigned long rflags,
 			   unsigned long vflags, int psize, int ssize)
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index c12d78ee42f5..5045048ce244 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -15,4 +15,6 @@ static inline bool stress_slb(void)
 
 void slb_setup_new_exec(void);
 
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
+
 #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 685d7bb3d26f..cd18e94d0843 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -129,7 +129,8 @@ good_exit:
 
 	mutex_lock(&mem_list_mutex);
 
-	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
+	list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next,
+				lockdep_is_held(&mem_list_mutex)) {
 		/* Overlap? */
 		if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
 				(ua < (mem2->ua +
@@ -289,6 +290,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 {
 	struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 		if ((mem->ua <= ua) &&
 				(ua + size <= mem->ua +
@@ -297,6 +299,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 			break;
 		}
 	}
+	rcu_read_unlock();
 
 	return ret;
 }
@@ -327,7 +330,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
 
 	mutex_lock(&mem_list_mutex);
 
-	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next,
+				lockdep_is_held(&mem_list_mutex)) {
 		if ((mem->ua == ua) && (mem->entries == entries)) {
 			ret = mem;
 			++mem->used;
@@ -421,6 +425,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 	struct mm_iommu_table_group_mem_t *mem;
 	unsigned long end;
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
 		if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
 			continue;
@@ -437,6 +442,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
 			return true;
 		}
 	}
+	rcu_read_unlock();
 
 	return false;
 }
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 5b3a3bae21aa..9ffa65074cb0 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -20,6 +20,8 @@
 #include <mm/mmu_decl.h>
 #include <trace/events/thp.h>
 
+#include "internal.h"
+
 unsigned long __pmd_frag_nr;
 EXPORT_SYMBOL(__pmd_frag_nr);
 unsigned long __pmd_frag_size_shift;
@@ -79,10 +81,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
-static void do_nothing(void *unused)
+static void do_serialize(void *arg)
 {
-
+	/* We've taken the IPI, so try to trim the mask while here */
+	if (radix_enabled()) {
+		struct mm_struct *mm = arg;
+		exit_lazy_flush_tlb(mm, false);
+	}
 }
+
 /*
  * Serialize against find_current_mm_pte which does lock-less
  * lookup in page tables with local interrupts disabled. For huge pages
@@ -96,7 +103,7 @@ static void do_nothing(void *unused)
 void serialize_against_pte_lookup(struct mm_struct *mm)
 {
 	smp_mb();
-	smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
+	smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
 }
 
 /*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index fb66d154b26c..409e61210789 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -18,6 +18,8 @@
 #include <asm/cputhreads.h>
 #include <asm/plpar_wrappers.h>
 
+#include "internal.h"
+
 #define RIC_FLUSH_TLB 0
 #define RIC_FLUSH_PWC 1
 #define RIC_FLUSH_ALL 2
@@ -627,15 +629,6 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_page);
 
-static bool mm_is_singlethreaded(struct mm_struct *mm)
-{
-	if (atomic_read(&mm->context.copros) > 0)
-		return false;
-	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm)
-		return true;
-	return false;
-}
-
 static bool mm_needs_flush_escalation(struct mm_struct *mm)
 {
 	/*
@@ -648,21 +641,24 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
 	return false;
 }
 
-#ifdef CONFIG_SMP
-static void do_exit_flush_lazy_tlb(void *arg)
+/*
+ * If always_flush is true, then flush even if this CPU can't be removed
+ * from mm_cpumask.
+ */
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 {
-	struct mm_struct *mm = arg;
 	unsigned long pid = mm->context.id;
+	int cpu = smp_processor_id();
 
 	/*
 	 * A kthread could have done a mmget_not_zero() after the flushing CPU
-	 * checked mm_is_singlethreaded, and be in the process of
-	 * kthread_use_mm when interrupted here. In that case, current->mm will
-	 * be set to mm, because kthread_use_mm() setting ->mm and switching to
-	 * the mm is done with interrupts off.
+	 * checked mm_cpumask, and be in the process of kthread_use_mm when
+	 * interrupted here. In that case, current->mm will be set to mm,
+	 * because kthread_use_mm() setting ->mm and switching to the mm is
+	 * done with interrupts off.
 	 */
 	if (current->mm == mm)
-		goto out_flush;
+		goto out;
 
 	if (current->active_mm == mm) {
 		WARN_ON_ONCE(current->mm != NULL);
@@ -673,11 +669,30 @@ static void do_exit_flush_lazy_tlb(void *arg)
 		mmdrop(mm);
 	}
 
-	atomic_dec(&mm->context.active_cpus);
-	cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm));
+	/*
+	 * This IPI may be initiated from any source including those not
+	 * running the mm, so there may be a racing IPI that comes after
+	 * this one which finds the cpumask already clear. Check and avoid
+	 * underflowing the active_cpus count in that case. The race should
+	 * not otherwise be a problem, but the TLB must be flushed because
+	 * that's what the caller expects.
+	 */
+	if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+		atomic_dec(&mm->context.active_cpus);
+		cpumask_clear_cpu(cpu, mm_cpumask(mm));
+		always_flush = true;
+	}
 
-out_flush:
-	_tlbiel_pid(pid, RIC_FLUSH_ALL);
+out:
+	if (always_flush)
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+}
+
+#ifdef CONFIG_SMP
+static void do_exit_flush_lazy_tlb(void *arg)
+{
+	struct mm_struct *mm = arg;
+	exit_lazy_flush_tlb(mm, true);
 }
 
 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
@@ -693,9 +708,110 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm)
 				(void *)mm, 1);
 }
 
+#else /* CONFIG_SMP */
+static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
+#endif /* CONFIG_SMP */
+
+static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
+
+/*
+ * Interval between flushes at which we send out IPIs to check whether the
+ * mm_cpumask can be trimmed for the case where it's not a single-threaded
+ * process flushing its own mm. The intent is to reduce the cost of later
+ * flushes. Don't want this to be so low that it adds noticable cost to TLB
+ * flushing, or so high that it doesn't help reduce global TLBIEs.
+ */
+static unsigned long tlb_mm_cpumask_trim_timer = 1073;
+
+static bool tick_and_test_trim_clock(void)
+{
+	if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
+			tlb_mm_cpumask_trim_timer) {
+		__this_cpu_write(mm_cpumask_trim_clock, 0);
+		return true;
+	}
+	return false;
+}
+
+enum tlb_flush_type {
+	FLUSH_TYPE_NONE,
+	FLUSH_TYPE_LOCAL,
+	FLUSH_TYPE_GLOBAL,
+};
+
+static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
+{
+	int active_cpus = atomic_read(&mm->context.active_cpus);
+	int cpu = smp_processor_id();
+
+	if (active_cpus == 0)
+		return FLUSH_TYPE_NONE;
+	if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+		if (current->mm != mm) {
+			/*
+			 * Asynchronous flush sources may trim down to nothing
+			 * if the process is not running, so occasionally try
+			 * to trim.
+			 */
+			if (tick_and_test_trim_clock()) {
+				exit_lazy_flush_tlb(mm, true);
+				return FLUSH_TYPE_NONE;
+			}
+		}
+		return FLUSH_TYPE_LOCAL;
+	}
+
+	/* Coprocessors require TLBIE to invalidate nMMU. */
+	if (atomic_read(&mm->context.copros) > 0)
+		return FLUSH_TYPE_GLOBAL;
+
+	/*
+	 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
+	 * because the mm is being taken down anyway, and a TLBIE tends to
+	 * be faster than an IPI+TLBIEL.
+	 */
+	if (fullmm)
+		return FLUSH_TYPE_GLOBAL;
+
+	/*
+	 * If we are running the only thread of a single-threaded process,
+	 * then we should almost always be able to trim off the rest of the
+	 * CPU mask (except in the case of use_mm() races), so always try
+	 * trimming the mask.
+	 */
+	if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
+		exit_flush_lazy_tlbs(mm);
+		/*
+		 * use_mm() race could prevent IPIs from being able to clear
+		 * the cpumask here, however those users are established
+		 * after our first check (and so after the PTEs are removed),
+		 * and the TLB still gets flushed by the IPI, so this CPU
+		 * will only require a local flush.
+		 */
+		return FLUSH_TYPE_LOCAL;
+	}
+
+	/*
+	 * Occasionally try to trim down the cpumask. It's possible this can
+	 * bring the mask to zero, which results in no flush.
+	 */
+	if (tick_and_test_trim_clock()) {
+		exit_flush_lazy_tlbs(mm);
+		if (current->mm == mm)
+			return FLUSH_TYPE_LOCAL;
+		if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+			exit_lazy_flush_tlb(mm, true);
+		return FLUSH_TYPE_NONE;
+	}
+
+	return FLUSH_TYPE_GLOBAL;
+}
+
+#ifdef CONFIG_SMP
 void radix__flush_tlb_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
+	enum tlb_flush_type type;
 
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
@@ -703,16 +819,15 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 
 	preempt_disable();
 	/*
-	 * Order loads of mm_cpumask vs previous stores to clear ptes before
-	 * the invalidate. See barrier in switch_mm_irqs_off
+	 * Order loads of mm_cpumask (in flush_type_needed) vs previous
+	 * stores to clear ptes before the invalidate. See barrier in
+	 * switch_mm_irqs_off
 	 */
 	smp_mb();
-	if (!mm_is_thread_local(mm)) {
-		if (unlikely(mm_is_singlethreaded(mm))) {
-			exit_flush_lazy_tlbs(mm);
-			goto local;
-		}
-
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_pid(pid, RIC_FLUSH_TLB);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
 			unsigned long tgt = H_RPTI_TARGET_CMMU;
 
@@ -728,9 +843,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 		} else {
 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
 		}
-	} else {
-local:
-		_tlbiel_pid(pid, RIC_FLUSH_TLB);
 	}
 	preempt_enable();
 }
@@ -739,6 +851,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm);
 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 {
 	unsigned long pid;
+	enum tlb_flush_type type;
 
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
@@ -746,13 +859,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm)) {
-		if (unlikely(mm_is_singlethreaded(mm))) {
-			if (!fullmm) {
-				exit_flush_lazy_tlbs(mm);
-				goto local;
-			}
-		}
+	type = flush_type_needed(mm, fullmm);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
 			unsigned long tgt = H_RPTI_TARGET_CMMU;
 			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
@@ -766,9 +876,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
 		else
 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
-	} else {
-local:
-		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 	}
 	preempt_enable();
 }
@@ -783,6 +890,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 				 int psize)
 {
 	unsigned long pid;
+	enum tlb_flush_type type;
 
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
@@ -790,11 +898,10 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm)) {
-		if (unlikely(mm_is_singlethreaded(mm))) {
-			exit_flush_lazy_tlbs(mm);
-			goto local;
-		}
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
 			unsigned long tgt, pg_sizes, size;
 
@@ -811,9 +918,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 		else
 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
-	} else {
-local:
-		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 	}
 	preempt_enable();
 }
@@ -828,8 +932,6 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 }
 EXPORT_SYMBOL(radix__flush_tlb_page);
 
-#else /* CONFIG_SMP */
-static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
 #endif /* CONFIG_SMP */
 
 static void do_tlbiel_kernel(void *info)
@@ -893,7 +995,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
 	unsigned long page_size = 1UL << page_shift;
 	unsigned long nr_pages = (end - start) >> page_shift;
-	bool local, full;
+	bool fullmm = (end == TLB_FLUSH_ALL);
+	bool flush_pid;
+	enum tlb_flush_type type;
 
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
@@ -901,24 +1005,18 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm)) {
-		if (unlikely(mm_is_singlethreaded(mm))) {
-			if (end != TLB_FLUSH_ALL) {
-				exit_flush_lazy_tlbs(mm);
-				goto is_local;
-			}
-		}
-		local = false;
-		full = (end == TLB_FLUSH_ALL ||
-				nr_pages > tlb_single_page_flush_ceiling);
-	} else {
-is_local:
-		local = true;
-		full = (end == TLB_FLUSH_ALL ||
-				nr_pages > tlb_local_single_page_flush_ceiling);
-	}
+	type = flush_type_needed(mm, fullmm);
+	if (type == FLUSH_TYPE_NONE)
+		goto out;
+
+	if (fullmm)
+		flush_pid = true;
+	else if (type == FLUSH_TYPE_GLOBAL)
+		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+	else
+		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
 
-	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
 		unsigned long tgt = H_RPTI_TARGET_CMMU;
 		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
 
@@ -928,8 +1026,8 @@ is_local:
 			tgt |= H_RPTI_TARGET_NMMU;
 		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
 				       start, end);
-	} else if (full) {
-		if (local) {
+	} else if (flush_pid) {
+		if (type == FLUSH_TYPE_LOCAL) {
 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
 		} else {
 			if (cputlb_use_tlbie()) {
@@ -952,7 +1050,7 @@ is_local:
 				hflush = true;
 		}
 
-		if (local) {
+		if (type == FLUSH_TYPE_LOCAL) {
 			asm volatile("ptesync": : :"memory");
 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
 			if (hflush)
@@ -974,6 +1072,7 @@ is_local:
 					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
 		}
 	}
+out:
 	preempt_enable();
 }
 
@@ -1085,32 +1184,30 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 	unsigned int page_shift = mmu_psize_defs[psize].shift;
 	unsigned long page_size = 1UL << page_shift;
 	unsigned long nr_pages = (end - start) >> page_shift;
-	bool local, full;
+	bool fullmm = (end == TLB_FLUSH_ALL);
+	bool flush_pid;
+	enum tlb_flush_type type;
 
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		return;
 
+	fullmm = (end == TLB_FLUSH_ALL);
+
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm)) {
-		if (unlikely(mm_is_singlethreaded(mm))) {
-			if (end != TLB_FLUSH_ALL) {
-				exit_flush_lazy_tlbs(mm);
-				goto is_local;
-			}
-		}
-		local = false;
-		full = (end == TLB_FLUSH_ALL ||
-				nr_pages > tlb_single_page_flush_ceiling);
-	} else {
-is_local:
-		local = true;
-		full = (end == TLB_FLUSH_ALL ||
-				nr_pages > tlb_local_single_page_flush_ceiling);
-	}
+	type = flush_type_needed(mm, fullmm);
+	if (type == FLUSH_TYPE_NONE)
+		goto out;
+
+	if (fullmm)
+		flush_pid = true;
+	else if (type == FLUSH_TYPE_GLOBAL)
+		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+	else
+		flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
 
-	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+	if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
 		unsigned long tgt = H_RPTI_TARGET_CMMU;
 		unsigned long type = H_RPTI_TYPE_TLB;
 		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
@@ -1120,8 +1217,8 @@ is_local:
 		if (atomic_read(&mm->context.copros) > 0)
 			tgt |= H_RPTI_TARGET_NMMU;
 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
-	} else if (full) {
-		if (local) {
+	} else if (flush_pid) {
+		if (type == FLUSH_TYPE_LOCAL) {
 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
 		} else {
 			if (cputlb_use_tlbie()) {
@@ -1137,7 +1234,7 @@ is_local:
 
 		}
 	} else {
-		if (local)
+		if (type == FLUSH_TYPE_LOCAL)
 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
 		else if (cputlb_use_tlbie())
 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
@@ -1145,6 +1242,7 @@ is_local:
 			_tlbiel_va_range_multicast(mm,
 					start, end, pid, page_size, psize, also_pwc);
 	}
+out:
 	preempt_enable();
 }
 
@@ -1164,6 +1262,7 @@ static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long
 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 {
 	unsigned long pid, end;
+	enum tlb_flush_type type;
 
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
@@ -1180,11 +1279,10 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 	/* Otherwise first do the PWC, then iterate the pages. */
 	preempt_disable();
 	smp_mb(); /* see radix__flush_tlb_mm */
-	if (!mm_is_thread_local(mm)) {
-		if (unlikely(mm_is_singlethreaded(mm))) {
-			exit_flush_lazy_tlbs(mm);
-			goto local;
-		}
+	type = flush_type_needed(mm, false);
+	if (type == FLUSH_TYPE_LOCAL) {
+		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+	} else if (type == FLUSH_TYPE_GLOBAL) {
 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
 			unsigned long tgt, type, pg_sizes;
 
@@ -1202,9 +1300,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 		else
 			_tlbiel_va_range_multicast(mm,
 					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
-	} else {
-local:
-		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 	}
 
 	preempt_enable();
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 584567970c11..c91bd85eb90e 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -10,6 +10,7 @@
  */
 
 #include <asm/asm-prototypes.h>
+#include <asm/interrupt.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/paca.h>
@@ -813,8 +814,9 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
 	return slb_insert_entry(ea, context, flags, ssize, false);
 }
 
-long do_slb_fault(struct pt_regs *regs, unsigned long ea)
+DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
 {
+	unsigned long ea = regs->dar;
 	unsigned long id = get_region_id(ea);
 
 	/* IRQs are not reconciled here, so can't check irqs_disabled */
@@ -824,19 +826,21 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
 		return -EINVAL;
 
 	/*
-	 * SLB kernel faults must be very careful not to touch anything
-	 * that is not bolted. E.g., PACA and global variables are okay,
-	 * mm->context stuff is not.
-	 *
-	 * SLB user faults can access all of kernel memory, but must be
-	 * careful not to touch things like IRQ state because it is not
-	 * "reconciled" here. The difficulty is that we must use
-	 * fast_exception_return to return from kernel SLB faults without
-	 * looking at possible non-bolted memory. We could test user vs
-	 * kernel faults in the interrupt handler asm and do a full fault,
-	 * reconcile, ret_from_except for user faults which would make them
-	 * first class kernel code. But for performance it's probably nicer
-	 * if they go via fast_exception_return too.
+	 * SLB kernel faults must be very careful not to touch anything that is
+	 * not bolted. E.g., PACA and global variables are okay, mm->context
+	 * stuff is not. SLB user faults may access all of memory (and induce
+	 * one recursive SLB kernel fault), so the kernel fault must not
+	 * trample on the user fault state at those points.
+	 */
+
+	/*
+	 * This is a raw interrupt handler, for performance, so that
+	 * fast_interrupt_return can be used. The handler must not touch local
+	 * irq state, or schedule. We could test for usermode and upgrade to a
+	 * normal process context (synchronous) interrupt for those, which
+	 * would make them first-class kernel code and able to be traced and
+	 * instrumented, although performance would suffer a bit, it would
+	 * probably be a good tradeoff.
 	 */
 	if (id >= LINEAR_MAP_REGION_ID) {
 		long err;
@@ -865,13 +869,15 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
 	}
 }
 
-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
+DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault)
 {
+	int err = regs->result;
+
 	if (err == -EFAULT) {
 		if (user_mode(regs))
-			_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+			_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
 		else
-			bad_page_fault(regs, ea, SIGSEGV);
+			bad_page_fault(regs, SIGSEGV);
 	} else if (err == -EINVAL) {
 		unrecoverable_exception(regs);
 	} else {
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8961b44f350c..bb368257b55c 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -34,6 +34,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/firmware.h>
+#include <asm/interrupt.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -377,18 +378,16 @@ static void sanity_check_fault(bool is_write, bool is_user,
 
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
- * for a data fault, SRR1 for an instruction fault. For 400-family processors
- * the error_code parameter is ESR for a data fault, 0 for an instruction
- * fault.
- * For 64-bit processors, the error_code parameter is
- *  - DSISR for a non-SLB data access fault,
- *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
- *  - 0 any SLB fault.
+ * for a data fault, SRR1 for an instruction fault.
+ * For 400-family processors the error_code parameter is ESR for a data fault,
+ * 0 for an instruction fault.
+ * For 64-bit processors, the error_code parameter is DSISR for a data access
+ * fault, SRR1 & 0x08000000 for an instruction access fault.
  *
  * The return value is 0 if the fault was handled, or the signal
  * number if this is a kernel fault that can't be handled here.
  */
-static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 			   unsigned long error_code)
 {
 	struct vm_area_struct * vma;
@@ -435,9 +434,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 		return bad_area_nosemaphore(regs, address);
 	}
 
-	/* We restore the interrupt state now */
-	if (!arch_irq_disabled_regs(regs))
-		local_irq_enable();
+	interrupt_cond_local_irq_enable(regs);
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
@@ -540,34 +537,51 @@ retry:
 
 	return 0;
 }
-NOKPROBE_SYMBOL(__do_page_fault);
+NOKPROBE_SYMBOL(___do_page_fault);
 
-int do_page_fault(struct pt_regs *regs, unsigned long address,
-		  unsigned long error_code)
+static long __do_page_fault(struct pt_regs *regs)
 {
 	const struct exception_table_entry *entry;
-	enum ctx_state prev_state = exception_enter();
-	int rc = __do_page_fault(regs, address, error_code);
-	exception_exit(prev_state);
-	if (likely(!rc))
-		return 0;
+	long err;
+
+	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
+	if (likely(!err))
+		return err;
 
 	entry = search_exception_tables(regs->nip);
-	if (unlikely(!entry))
-		return rc;
+	if (likely(entry)) {
+		instruction_pointer_set(regs, extable_fixup(entry));
+		return 0;
+	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+		__bad_page_fault(regs, err);
+		return 0;
+	} else {
+		/* 32 and 64e handle the bad page fault in asm */
+		return err;
+	}
+}
+NOKPROBE_SYMBOL(__do_page_fault);
 
-	instruction_pointer_set(regs, extable_fixup(entry));
+DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
+{
+	return __do_page_fault(regs);
+}
 
-	return 0;
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
+long hash__do_page_fault(struct pt_regs *regs)
+{
+	return __do_page_fault(regs);
 }
-NOKPROBE_SYMBOL(do_page_fault);
+NOKPROBE_SYMBOL(hash__do_page_fault);
+#endif
 
 /*
  * bad_page_fault is called when we have a bad access from the kernel.
  * It is called from the DSI and ISI handlers in head.S and from some
  * of the procedures in traps.c.
  */
-void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+void __bad_page_fault(struct pt_regs *regs, int sig)
 {
 	int is_write = page_fault_is_write(regs->dsisr);
 
@@ -605,7 +619,7 @@ void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 	die("Kernel access of bad area", regs, sig);
 }
 
-void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+void bad_page_fault(struct pt_regs *regs, int sig)
 {
 	const struct exception_table_entry *entry;
 
@@ -614,5 +628,12 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 	if (entry)
 		instruction_pointer_set(regs, extable_fixup(entry));
 	else
-		__bad_page_fault(regs, address, sig);
+		__bad_page_fault(regs, sig);
 }
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
+{
+	bad_page_fault(regs, SIGSEGV);
+}
+#endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 8b3cc4d688e8..d142b76d507d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -217,7 +217,7 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_p
 	}
 }
 
-int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
+static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
 {
 	struct huge_bootmem_page *m;
 	if (nr_gpages == 0)
@@ -663,24 +663,6 @@ static int __init hugetlbpage_init(void)
 
 arch_initcall(hugetlbpage_init);
 
-void flush_dcache_icache_hugepage(struct page *page)
-{
-	int i;
-	void *start;
-
-	BUG_ON(!PageCompound(page));
-
-	for (i = 0; i < compound_nr(page); i++) {
-		if (!PageHighMem(page)) {
-			__flush_dcache_icache(page_address(page+i));
-		} else {
-			start = kmap_atomic(page+i);
-			__flush_dcache_icache(start);
-			kunmap_atomic(start);
-		}
-	}
-}
-
 void __init gigantic_hugetlb_cma_reserve(void)
 {
 	unsigned long order = 0;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index afab328d0887..4e8ce6d85232 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -91,27 +91,6 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
 	return -ENODEV;
 }
 
-#define FLUSH_CHUNK_SIZE SZ_1G
-/**
- * flush_dcache_range_chunked(): Write any modified data cache blocks out to
- * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
- * Does not invalidate the corresponding instruction cache blocks.
- *
- * @start: the start address
- * @stop: the stop address (exclusive)
- * @chunk: the max size of the chunks
- */
-static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
-				       unsigned long chunk)
-{
-	unsigned long i;
-
-	for (i = start; i < stop; i += chunk) {
-		flush_dcache_range(i, min(stop, i + chunk));
-		cond_resched();
-	}
-}
-
 int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
 				     struct mhp_params *params)
 {
@@ -136,7 +115,6 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
 
 	/* Remove htab bolted mappings for this section of memory */
 	start = (unsigned long)__va(start);
-	flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
 
 	mutex_lock(&linear_mapping_mutex);
 	ret = remove_section_mapping(start, start + size);
@@ -489,19 +467,35 @@ void flush_dcache_page(struct page *page)
 	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
 		return;
 	/* avoid an atomic op if possible */
-	if (test_bit(PG_arch_1, &page->flags))
-		clear_bit(PG_arch_1, &page->flags);
+	if (test_bit(PG_dcache_clean, &page->flags))
+		clear_bit(PG_dcache_clean, &page->flags);
 }
 EXPORT_SYMBOL(flush_dcache_page);
 
-void flush_dcache_icache_page(struct page *page)
+static void flush_dcache_icache_hugepage(struct page *page)
 {
-#ifdef CONFIG_HUGETLB_PAGE
-	if (PageCompound(page)) {
-		flush_dcache_icache_hugepage(page);
-		return;
+	int i;
+	void *start;
+
+	BUG_ON(!PageCompound(page));
+
+	for (i = 0; i < compound_nr(page); i++) {
+		if (!PageHighMem(page)) {
+			__flush_dcache_icache(page_address(page+i));
+		} else {
+			start = kmap_atomic(page+i);
+			__flush_dcache_icache(start);
+			kunmap_atomic(start);
+		}
 	}
-#endif
+}
+
+void flush_dcache_icache_page(struct page *page)
+{
+
+	if (PageCompound(page))
+		return flush_dcache_icache_hugepage(page);
+
 #if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64)
 	/* On 8xx there is no need to kmap since highmem is not supported */
 	__flush_dcache_icache(page_address(page));
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 15555c95cebc..354611940118 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -26,6 +26,7 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 #include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
 
 static inline int is_exec_fault(void)
 {
@@ -81,9 +82,9 @@ static pte_t set_pte_filter_hash(pte_t pte)
 		struct page *pg = maybe_pte_to_page(pte);
 		if (!pg)
 			return pte;
-		if (!test_bit(PG_arch_1, &pg->flags)) {
+		if (!test_bit(PG_dcache_clean, &pg->flags)) {
 			flush_dcache_icache_page(pg);
-			set_bit(PG_arch_1, &pg->flags);
+			set_bit(PG_dcache_clean, &pg->flags);
 		}
 	}
 	return pte;
@@ -116,13 +117,13 @@ static inline pte_t set_pte_filter(pte_t pte)
 		return pte;
 
 	/* If the page clean, we move on */
-	if (test_bit(PG_arch_1, &pg->flags))
+	if (test_bit(PG_dcache_clean, &pg->flags))
 		return pte;
 
 	/* If it's an exec fault, we flush the cache and make it clean */
 	if (is_exec_fault()) {
 		flush_dcache_icache_page(pg);
-		set_bit(PG_arch_1, &pg->flags);
+		set_bit(PG_dcache_clean, &pg->flags);
 		return pte;
 	}
 
@@ -161,12 +162,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
 		goto bail;
 
 	/* If the page is already clean, we move on */
-	if (test_bit(PG_arch_1, &pg->flags))
+	if (test_bit(PG_dcache_clean, &pg->flags))
 		goto bail;
 
-	/* Clean the page and set PG_arch_1 */
+	/* Clean the page and set PG_dcache_clean */
 	flush_dcache_icache_page(pg);
-	set_bit(PG_arch_1, &pg->flags);
+	set_bit(PG_dcache_clean, &pg->flags);
 
  bail:
 	return pte_mkexec(pte);
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
index dde2fe8de4b2..565048a0c9be 100644
--- a/arch/powerpc/mm/ptdump/segment_regs.c
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -10,7 +10,7 @@
 
 static void seg_show(struct seq_file *m, int i)
 {
-	u32 val = mfsrin(i << 28);
+	u32 val = mfsr(i << 28);
 
 	seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i);
 	seq_printf(m, "Kern key %d ", (val >> 30) & 1);
author	Linus Torvalds <torvalds@linux-foundation.org>	2021-02-22 14:34:00 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2021-02-22 14:34:00 -0800
commit	b12b47249688915e987a9a2a393b522f86f6b7ab (patch)
tree	eae34f7fa64474bb3123f7b69c411ade6127c41f /arch/powerpc/mm
parent	6ff6f86bc4d02949b5688d69de1c89c310d62c44 (diff)
parent	82d2c16b350f72aa21ac2a6860c542aa4b43a51e (diff)