diff options
Diffstat (limited to 'arch/sparc/include/asm/tsb.h')
| -rw-r--r-- | arch/sparc/include/asm/tsb.h | 230 |
1 files changed, 123 insertions, 107 deletions
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index e696432b950d..239be259e166 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _SPARC64_TSB_H #define _SPARC64_TSB_H @@ -58,7 +59,7 @@ * The kernel TSB is locked into the TLB by virtue of being in the * kernel image, so we don't play these games for swapper_tsb access. */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct tsb_ldquad_phys_patch_entry { unsigned int addr; unsigned int sun4u_insn; @@ -133,107 +134,124 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sub TSB, 0x8, TSB; \ TSB_STORE(TSB, TAG); - /* Do a kernel page table walk. Leaves physical PTE pointer in - * REG1. Jumps to FAIL_LABEL on early page table walk termination. - * VADDR will not be clobbered, but REG2 will. + /* Do a kernel page table walk. Leaves valid PTE value in + * REG1. Jumps to FAIL_LABEL on early page table walk + * termination. VADDR will not be clobbered, but REG2 will. + * + * There are two masks we must apply to propagate bits from + * the virtual address into the PTE physical address field + * when dealing with huge pages. This is because the page + * table boundaries do not match the huge page size(s) the + * hardware supports. + * + * In these cases we propagate the bits that are below the + * page table level where we saw the huge page mapping, but + * are still within the relevant physical bits for the huge + * page size in question. So for PMD mappings (which fall on + * bit 23, for 8MB per PMD) we must propagate bit 22 for a + * 4MB huge page. For huge PUDs (which fall on bit 33, for + * 8GB per PUD), we have to accommodate 256MB and 2GB huge + * pages. So for those we propagate bits 32 to 28. */ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ sethi %hi(swapper_pg_dir), REG1; \ or REG1, %lo(swapper_pg_dir), REG1; \ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - andn REG2, 0x3, REG2; \ - lduw [REG1 + REG2], REG1; \ + andn REG2, 0x7, REG2; \ + ldx [REG1 + REG2], REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + sethi %hi(0xf8000000), REG2; \ + bne,pt %xcc, 697f; \ + sllx REG2, 1, REG2; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - sllx REG1, PGD_PADDR_SHIFT, REG1; \ - andn REG2, 0x3, REG2; \ - lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ - sllx REG1, PMD_PADDR_SHIFT, REG1; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pn %xcc, 698f; \ + sethi %hi(0x400000), REG2; \ +697: brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + ba,pt %xcc, 699f; \ + or REG1, REG2, REG1; \ +698: sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ - add REG1, REG2, REG1; + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brgez,pn REG1, FAIL_LABEL; \ + nop; \ +699: - /* These macros exists only to make the PMD translator below - * easier to read. It hides the ELF section switch for the - * sun4v code patching. + /* PUD has been loaded into REG1, interpret the value, seeing + * if it is a HUGE PUD or a normal one. If it is not valid + * then jump to FAIL_LABEL. If it is a HUGE PUD, and it + * translates to a valid PTE, branch to PTE_LABEL. + * + * We have to propagate bits [32:22] from the virtual address + * to resolve at 4M granularity. */ -#define OR_PTE_BIT_1INSN(REG, NAME) \ -661: or REG, _PAGE_##NAME##_4U, REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - or REG, _PAGE_##NAME##_4V, REG; \ - .previous; - -#define OR_PTE_BIT_2INSN(REG, TMP, NAME) \ -661: sethi %hi(_PAGE_##NAME##_4U), TMP; \ - or REG, TMP, REG; \ - .section .sun4v_2insn_patch, "ax"; \ - .word 661b; \ - mov -1, TMP; \ - or REG, _PAGE_##NAME##_4V, REG; \ - .previous; - - /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */ -#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \ -661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - sethi %uhi(_PAGE_VALID), REG; \ - .previous; \ - sllx REG, 32, REG; \ -661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \ - .previous; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ +700: ba 700f; \ + nop; \ + .section .pud_huge_patch, "ax"; \ + .word 700b; \ + nop; \ + .previous; \ + brz,pn REG1, FAIL_LABEL; \ + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pt %xcc, 700f; \ + sethi %hi(0xffe00000), REG2; \ + sllx REG2, 1, REG2; \ + brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + brlz,pt REG1, PTE_LABEL; \ + or REG1, REG2, REG1; \ +700: +#else +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ + brz,pn REG1, FAIL_LABEL; \ + nop; +#endif /* PMD has been loaded into REG1, interpret the value, seeing * if it is a HUGE PMD or a normal one. If it is not valid * then jump to FAIL_LABEL. If it is a HUGE PMD, and it * translates to a valid PTE, branch to PTE_LABEL. * - * We translate the PMD by hand, one bit at a time, - * constructing the huge PTE. - * - * So we construct the PTE in REG2 as follows: - * - * 1) Extract the PMD PFN from REG1 and place it into REG2. - * - * 2) Translate PMD protection bits in REG1 into REG2, one bit - * at a time using andcc tests on REG1 and OR's into REG2. - * - * Only two bits to be concerned with here, EXEC and WRITE. - * Now REG1 is freed up and we can use it as a temporary. - * - * 3) Construct the VALID, CACHE, and page size PTE bits in - * REG1, OR with REG2 to form final PTE. + * We have to propagate the 4MB bit of the virtual address + * because we are fabricating 8MB pages using 4MB hw pages. */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ - brz,pn REG1, FAIL_LABEL; \ - andcc REG1, PMD_ISHUGE, %g0; \ - be,pt %xcc, 700f; \ - and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \ - cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \ - bne,pn %xcc, FAIL_LABEL; \ - andn REG1, PMD_HUGE_PROTBITS, REG2; \ - sllx REG2, PMD_PADDR_SHIFT, REG2; \ - /* REG2 now holds PFN << PAGE_SHIFT */ \ - andcc REG1, PMD_HUGE_WRITE, %g0; \ - bne,a,pt %xcc, 1f; \ - OR_PTE_BIT_1INSN(REG2, W); \ -1: andcc REG1, PMD_HUGE_EXEC, %g0; \ - be,pt %xcc, 1f; \ - nop; \ - OR_PTE_BIT_2INSN(REG2, REG1, EXEC); \ - /* REG1 can now be clobbered, build final PTE */ \ -1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \ - ba,pt %xcc, PTE_LABEL; \ - or REG1, REG2, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pt %xcc, 700f; \ + sethi %hi(4 * 1024 * 1024), REG2; \ + brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + brlz,pt REG1, PTE_LABEL; \ + or REG1, REG2, REG1; \ 700: #else #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ @@ -253,18 +271,22 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; #define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - andn REG2, 0x3, REG2; \ - lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ brz,pn REG1, FAIL_LABEL; \ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - sllx REG1, PGD_PADDR_SHIFT, REG1; \ - andn REG2, 0x3, REG2; \ - lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ - sllx REG1, PMD_PADDR_SHIFT, REG1; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ add REG1, REG2, REG1; \ ldxa [REG1] ASI_PHYS_USE_EC, REG1; \ @@ -306,8 +328,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; (KERNEL_TSB_SIZE_BYTES / 16) #define KERNEL_TSB4M_NENTRIES 4096 -#define KTSB_PHYS_SHIFT 15 - /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -316,17 +336,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * VADDR and TAG will be preserved and not clobbered by this macro. */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_tsb), REG1; \ - or REG1, %lo(swapper_tsb), REG1; \ +661: sethi %uhi(swapper_tsb), REG1; \ + sethi %hi(swapper_tsb), REG2; \ + or REG1, %ulo(swapper_tsb), REG1; \ + or REG2, %lo(swapper_tsb), REG2; \ .section .swapper_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ srlx VADDR, PAGE_SHIFT, REG2; \ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ @@ -341,17 +359,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * we can make use of that for the index computation. */ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_4m_tsb), REG1; \ - or REG1, %lo(swapper_4m_tsb), REG1; \ +661: sethi %uhi(swapper_4m_tsb), REG1; \ + sethi %hi(swapper_4m_tsb), REG2; \ + or REG1, %ulo(swapper_4m_tsb), REG1; \ + or REG2, %lo(swapper_4m_tsb), REG2; \ .section .swapper_4m_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ |
