summaryrefslogtreecommitdiff
path: root/arch/s390/include/asm/pgtable.h
diff options
context:
space:
mode:
authorGerald Schaefer <gerald.schaefer@linux.ibm.com>2023-02-06 17:48:21 +0100
committerHeiko Carstens <hca@linux.ibm.com>2023-02-14 11:45:39 +0100
commit0807b856521f3313d3912ebb52a9144215c4ff08 (patch)
tree373a6c2ab0ddef29ae5d749a82929fb5899972fa /arch/s390/include/asm/pgtable.h
parentd939474b3d92624744a334c9e5f58ce3934584b5 (diff)
s390/mm: add support for RDP (Reset DAT-Protection)
RDP instruction allows to reset DAT-protection bit in a PTE, with less CPU synchronization overhead than IPTE instruction. In particular, IPTE can cause machine-wide synchronization overhead, and excessive IPTE usage can negatively impact machine performance. RDP can be used instead of IPTE, if the new PTE only differs in SW bits and _PAGE_PROTECT HW bit, for PTE protection changes from RO to RW. SW PTE bit changes are allowed, e.g. for dirty and young tracking, but none of the other HW-defined part of the PTE must change. This is because the architecture forbids such changes to an active and valid PTE, which is why invalidation with IPTE is always used first, before writing a new entry. The RDP optimization helps mainly for fault-driven SW dirty-bit tracking. Writable PTEs are initially always mapped with HW _PAGE_PROTECT bit set, to allow SW dirty-bit accounting on first write protection fault, where the DAT-protection would then be reset. The reset is now done with RDP instead of IPTE, if RDP instruction is available. RDP cannot always guarantee that the DAT-protection reset is propagated to all CPUs immediately. This means that spurious TLB protection faults on other CPUs can now occur. For this, common code provides a flush_tlb_fix_spurious_fault() handler, which will now be used to do a CPU-local TLB flush. However, this will clear the whole TLB of a CPU, and not just the affected entry. For more fine-grained flushing, by simply doing a (local) RDP again, flush_tlb_fix_spurious_fault() would need to also provide the PTE pointer. Note that spurious TLB protection faults cannot really be distinguished from racing pagetable updates, where another thread already installed the correct PTE. In such a case, the local TLB flush would be unnecessary overhead, but overall reduction of CPU synchronization overhead by not using IPTE is still expected to be beneficial. Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/pgtable.h')
-rw-r--r--arch/s390/include/asm/pgtable.h62
1 files changed, 61 insertions, 1 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0f1eba005f6d..b87ca864d27d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -182,6 +182,8 @@ static inline int is_module_addr(void *addr)
#define _PAGE_SOFT_DIRTY 0x000
#endif
+#define _PAGE_SW_BITS 0xffUL /* All SW bits */
+
#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE /* SW pte exclusive swap bit */
/* Set of bits not changed in pte_modify */
@@ -189,6 +191,12 @@ static inline int is_module_addr(void *addr)
_PAGE_YOUNG | _PAGE_SOFT_DIRTY)
/*
+ * Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
+ * HW bit and all SW bits.
+ */
+#define _PAGE_RDP_MASK ~(_PAGE_PROTECT | _PAGE_SW_BITS)
+
+/*
* handle_pte_fault uses pte_present and pte_none to find out the pte type
* WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
* distinguish present from not-present ptes. It is changed only with the page
@@ -1052,6 +1060,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
+static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
+ unsigned long opt, unsigned long asce,
+ int local)
+{
+ unsigned long pto;
+
+ pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
+ asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
+ : "+m" (*ptep)
+ : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
+ [asce] "a" (asce), [m4] "i" (local));
+}
+
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
unsigned long opt, unsigned long asce,
int local)
@@ -1202,6 +1223,42 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
+/*
+ * Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
+ * bits in the comparison. Those might change e.g. because of dirty and young
+ * tracking.
+ */
+static inline int pte_allow_rdp(pte_t old, pte_t new)
+{
+ /*
+ * Only allow changes from RO to RW
+ */
+ if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
+ return 0;
+
+ return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
+}
+
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ /*
+ * RDP might not have propagated the PTE protection reset to all CPUs,
+ * so there could be spurious TLB protection faults.
+ * NOTE: This will also be called when a racing pagetable update on
+ * another thread already installed the correct PTE. Both cases cannot
+ * really be distinguished.
+ * Therefore, only do the local TLB flush when RDP can be used, to avoid
+ * unnecessary overhead.
+ */
+ if (MACHINE_HAS_RDP)
+ asm volatile("ptlb" : : : "memory");
+}
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+
+void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t new);
+
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
@@ -1209,7 +1266,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
{
if (pte_same(*ptep, entry))
return 0;
- ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
+ ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
+ else
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
return 1;
}