From d6da67378198e4caa37404f87851659553b936b9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 19 Jul 2022 07:16:34 +0200 Subject: s390/crash: move copy_to_user_real() to crash_dump.c Function copy_to_user_real() does not really belong to maccess.c. It is only used for copying oldmem to user space, so let's move it to the friends. Acked-by: Heiko Carstens Tested-by: Alexander Egorenkov Link: https://lore.kernel.org/r/e8de968d40202d87caa09aef12e9c67ec23a1c1a.1658206891.git.agordeev@linux.ibm.com Signed-off-by: Alexander Gordeev --- arch/s390/mm/maccess.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 421efa46946b..d6d84e02f35a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -171,32 +171,6 @@ void memcpy_absolute(void *dest, void *src, size_t count) arch_local_irq_restore(flags); } -/* - * Copy memory from kernel (real) to user (virtual) - */ -int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count) -{ - int offs = 0, size, rc; - char *buf; - - buf = (char *) __get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = -EFAULT; - while (offs < count) { - size = min(PAGE_SIZE, count - offs); - if (memcpy_real(buf, src + offs, size)) - goto out; - if (copy_to_user(dest + offs, buf, size)) - goto out; - offs += size; - } - rc = 0; -out: - free_page((unsigned long) buf); - return rc; -} - /* * Check if physical address is within prefix or zero page */ -- cgit From 7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 20 Jul 2022 08:22:01 +0200 Subject: s390/smp: rework absolute lowcore access Temporary unsetting of the prefix page in memcpy_absolute() routine poses a risk of executing code path with unexpectedly disabled prefix page. This rework avoids the prefix page uninstalling and disabling of normal and machine check interrupts when accessing the absolute zero memory. Although memcpy_absolute() routine can access the whole memory, it is only used to update the absolute zero lowcore. This rework therefore introduces a new mechanism for the absolute zero lowcore access and scraps memcpy_absolute() routine for good. Instead, an area is reserved in the virtual memory that is used for the absolute lowcore access only. That area holds an array of 8KB virtual mappings - one per CPU. Whenever a CPU is brought online, the corresponding item is mapped to the real address of the previously installed prefix page. The absolute zero lowcore access works like this: a CPU calls the new primitive get_abs_lowcore() to obtain its 8KB mapping as a pointer to the struct lowcore. Virtual address references to that pointer get translated to the real addresses of the prefix page, which in turn gets swapped with the absolute zero memory addresses due to prefixing. Once the pointer is not needed it must be released with put_abs_lowcore() primitive: struct lowcore *abs_lc; unsigned long flags; abs_lc = get_abs_lowcore(&flags); abs_lc->... = ...; put_abs_lowcore(abs_lc, flags); To ensure the described mechanism works large segment- and region- table entries must be avoided for the 8KB mappings. Failure to do so results in usage of Region-Frame Absolute Address (RFAA) or Segment-Frame Absolute Address (SFAA) large page fields. In that case absolute addresses would be used to address the prefix page instead of the real ones and the prefixing would get bypassed. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/mm/init.c | 2 +- arch/s390/mm/maccess.c | 67 ++++++++++++++++++--------------------- arch/s390/mm/vmem.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 38 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6a0ac00d5a42..7b6873ac99d1 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d6d84e02f35a..b8451ddbb3d6 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -15,6 +15,7 @@ #include #include #include +#include #include static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) @@ -148,46 +149,20 @@ int memcpy_real(void *dest, unsigned long src, size_t count) } /* - * Copy memory in absolute mode (kernel to kernel) + * Find CPU that owns swapped prefix page */ -void memcpy_absolute(void *dest, void *src, size_t count) -{ - unsigned long cr0, flags, prefix; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_clear_bit(0, 28); /* disable lowcore protection */ - prefix = store_prefix(); - if (prefix) { - local_mcck_disable(); - set_prefix(0); - memcpy(dest, src, count); - set_prefix(prefix); - local_mcck_enable(); - } else { - memcpy(dest, src, count); - } - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} - -/* - * Check if physical address is within prefix or zero page - */ -static int is_swapped(phys_addr_t addr) +static int get_swapped_owner(phys_addr_t addr) { phys_addr_t lc; int cpu; - if (addr < sizeof(struct lowcore)) - return 1; for_each_online_cpu(cpu) { lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; - return 1; + return cpu; } - return 0; + return -1; } /* @@ -200,17 +175,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) { void *ptr = phys_to_virt(addr); void *bounce = ptr; + struct lowcore *abs_lc; + unsigned long flags; unsigned long size; + int this_cpu, cpu; cpus_read_lock(); - preempt_disable(); - if (is_swapped(addr)) { - size = PAGE_SIZE - (addr & ~PAGE_MASK); - bounce = (void *) __get_free_page(GFP_ATOMIC); - if (bounce) - memcpy_absolute(bounce, ptr, size); + this_cpu = get_cpu(); + if (addr >= sizeof(struct lowcore)) { + cpu = get_swapped_owner(addr); + if (cpu < 0) + goto out; + } + bounce = (void *)__get_free_page(GFP_ATOMIC); + if (!bounce) + goto out; + size = PAGE_SIZE - (addr & ~PAGE_MASK); + if (addr < sizeof(struct lowcore)) { + abs_lc = get_abs_lowcore(&flags); + ptr = (void *)abs_lc + addr; + memcpy(bounce, ptr, size); + put_abs_lowcore(abs_lc, flags); + } else if (cpu == this_cpu) { + ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); + memcpy(bounce, ptr, size); + } else { + memcpy(bounce, ptr, size); } - preempt_enable(); +out: + put_cpu(); cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c2583f921ca8..203ba2bfea59 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -560,6 +560,91 @@ int vmem_add_mapping(unsigned long start, unsigned long size) return ret; } +/* + * Allocate new or return existing page-table entry, but do not map it + * to any physical address. If missing, allocate segment- and region- + * table entries along. Meeting a large segment- or region-table entry + * while traversing is an error, since the function is expected to be + * called against virtual regions reserverd for 4KB mappings only. + */ +static pte_t *vmem_get_alloc_pte(unsigned long addr) +{ + pte_t *ptep = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); + if (!p4d) + goto out; + pgd_populate(&init_mm, pgd, p4d); + } + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); + if (!pud) + goto out; + p4d_populate(&init_mm, p4d, pud); + } + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); + if (!pmd) + goto out; + pud_populate(&init_mm, pud, pmd); + } else if (WARN_ON_ONCE(pud_large(*pud))) { + goto out; + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte = vmem_pte_alloc(); + if (!pte) + goto out; + pmd_populate(&init_mm, pmd, pte); + } else if (WARN_ON_ONCE(pmd_large(*pmd))) { + goto out; + } + ptep = pte_offset_kernel(pmd, addr); +out: + return ptep; +} + +int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) +{ + pte_t *ptep, pte; + int rc = 0; + + if (!IS_ALIGNED(addr, PAGE_SIZE)) + return -EINVAL; + mutex_lock(&vmem_mutex); + ptep = vmem_get_alloc_pte(addr); + if (!ptep) { + rc = -ENOMEM; + goto out; + } + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte = mk_pte_phys(phys, prot); + set_pte(ptep, pte); +out: + mutex_unlock(&vmem_mutex); + return rc; +} + +void vmem_unmap_4k_page(unsigned long addr) +{ + pte_t *ptep; + + mutex_lock(&vmem_mutex); + ptep = virt_to_kpte(addr); + __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); + pte_clear(&init_mm, addr, ptep); + mutex_unlock(&vmem_mutex); +} + /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug -- cgit From e409b7f19172a3c154de62de4baf32a2c25a375a Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 20 Jul 2022 08:32:13 +0200 Subject: s390/smp,ptdump: add absolute lowcore markers Add "Lowcore Area Start" and "Lowcore Area End" markers that fence pages where absolute lowcore resides. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/mm/dump_pagetables.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9f9af5298dd6..393d6ef4589e 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,8 @@ enum address_markers_idx { VMALLOC_END_NR, MODULES_NR, MODULES_END_NR, + ABS_LOWCORE_NR, + ABS_LOWCORE_END_NR, }; static struct addr_marker address_markers[] = { @@ -62,6 +65,8 @@ static struct addr_marker address_markers[] = { [VMALLOC_END_NR] = {0, "vmalloc Area End"}, [MODULES_NR] = {0, "Modules Area Start"}, [MODULES_END_NR] = {0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, { -1, NULL } }; @@ -278,6 +283,8 @@ static int pt_dump_init(void) address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; + address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; + address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; -- cgit From 3fb39cb7c5145a10e1a0221c057e92fb8855efbb Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 6 Aug 2022 09:13:28 +0200 Subject: Revert "s390/smp,ptdump: add absolute lowcore markers" This reverts commit e409b7f19172a3c154de62de4baf32a2c25a375a. Commit 7d06fed77b7d ("s390/smp: rework absolute lowcore access") introduced mutex lock with interrupts disabled. This commit is a follow-up that needs to be reverted as well. Signed-off-by: Alexander Gordeev --- arch/s390/mm/dump_pagetables.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 393d6ef4589e..9f9af5298dd6 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -40,8 +39,6 @@ enum address_markers_idx { VMALLOC_END_NR, MODULES_NR, MODULES_END_NR, - ABS_LOWCORE_NR, - ABS_LOWCORE_END_NR, }; static struct addr_marker address_markers[] = { @@ -65,8 +62,6 @@ static struct addr_marker address_markers[] = { [VMALLOC_END_NR] = {0, "vmalloc Area End"}, [MODULES_NR] = {0, "Modules Area Start"}, [MODULES_END_NR] = {0, "Modules Area End"}, - [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, - [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, { -1, NULL } }; @@ -283,8 +278,6 @@ static int pt_dump_init(void) address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[MODULES_END_NR].start_address = MODULES_END; - address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; - address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; -- cgit From 5e441f61f509617a3f57fcb156b7aa2870cc8752 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Sat, 6 Aug 2022 09:24:07 +0200 Subject: Revert "s390/smp: rework absolute lowcore access" This reverts commit 7d06fed77b7d8fc9f6cc41b4e3f2823d32532ad8. This introduced vmem_mutex locking from vmem_map_4k_page() function called from smp_reinit_ipl_cpu() with interrupts disabled. While it is a pre-SMP early initcall no other CPUs running in parallel nor other code taking vmem_mutex on this boot stage - it still needs to be fixed. Signed-off-by: Alexander Gordeev --- arch/s390/mm/init.c | 2 +- arch/s390/mm/maccess.c | 67 +++++++++++++++++++++------------------ arch/s390/mm/vmem.c | 85 -------------------------------------------------- 3 files changed, 38 insertions(+), 116 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7b6873ac99d1..6a0ac00d5a42 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index b8451ddbb3d6..d6d84e02f35a 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -15,7 +15,6 @@ #include #include #include -#include #include static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size) @@ -149,20 +148,46 @@ int memcpy_real(void *dest, unsigned long src, size_t count) } /* - * Find CPU that owns swapped prefix page + * Copy memory in absolute mode (kernel to kernel) */ -static int get_swapped_owner(phys_addr_t addr) +void memcpy_absolute(void *dest, void *src, size_t count) +{ + unsigned long cr0, flags, prefix; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_clear_bit(0, 28); /* disable lowcore protection */ + prefix = store_prefix(); + if (prefix) { + local_mcck_disable(); + set_prefix(0); + memcpy(dest, src, count); + set_prefix(prefix); + local_mcck_enable(); + } else { + memcpy(dest, src, count); + } + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + +/* + * Check if physical address is within prefix or zero page + */ +static int is_swapped(phys_addr_t addr) { phys_addr_t lc; int cpu; + if (addr < sizeof(struct lowcore)) + return 1; for_each_online_cpu(cpu) { lc = virt_to_phys(lowcore_ptr[cpu]); if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc) continue; - return cpu; + return 1; } - return -1; + return 0; } /* @@ -175,35 +200,17 @@ void *xlate_dev_mem_ptr(phys_addr_t addr) { void *ptr = phys_to_virt(addr); void *bounce = ptr; - struct lowcore *abs_lc; - unsigned long flags; unsigned long size; - int this_cpu, cpu; cpus_read_lock(); - this_cpu = get_cpu(); - if (addr >= sizeof(struct lowcore)) { - cpu = get_swapped_owner(addr); - if (cpu < 0) - goto out; - } - bounce = (void *)__get_free_page(GFP_ATOMIC); - if (!bounce) - goto out; - size = PAGE_SIZE - (addr & ~PAGE_MASK); - if (addr < sizeof(struct lowcore)) { - abs_lc = get_abs_lowcore(&flags); - ptr = (void *)abs_lc + addr; - memcpy(bounce, ptr, size); - put_abs_lowcore(abs_lc, flags); - } else if (cpu == this_cpu) { - ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu])); - memcpy(bounce, ptr, size); - } else { - memcpy(bounce, ptr, size); + preempt_disable(); + if (is_swapped(addr)) { + size = PAGE_SIZE - (addr & ~PAGE_MASK); + bounce = (void *) __get_free_page(GFP_ATOMIC); + if (bounce) + memcpy_absolute(bounce, ptr, size); } -out: - put_cpu(); + preempt_enable(); cpus_read_unlock(); return bounce; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 203ba2bfea59..c2583f921ca8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -560,91 +560,6 @@ int vmem_add_mapping(unsigned long start, unsigned long size) return ret; } -/* - * Allocate new or return existing page-table entry, but do not map it - * to any physical address. If missing, allocate segment- and region- - * table entries along. Meeting a large segment- or region-table entry - * while traversing is an error, since the function is expected to be - * called against virtual regions reserverd for 4KB mappings only. - */ -static pte_t *vmem_get_alloc_pte(unsigned long addr) -{ - pte_t *ptep = NULL; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) { - p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY); - if (!p4d) - goto out; - pgd_populate(&init_mm, pgd, p4d); - } - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY); - if (!pud) - goto out; - p4d_populate(&init_mm, p4d, pud); - } - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY); - if (!pmd) - goto out; - pud_populate(&init_mm, pud, pmd); - } else if (WARN_ON_ONCE(pud_large(*pud))) { - goto out; - } - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte = vmem_pte_alloc(); - if (!pte) - goto out; - pmd_populate(&init_mm, pmd, pte); - } else if (WARN_ON_ONCE(pmd_large(*pmd))) { - goto out; - } - ptep = pte_offset_kernel(pmd, addr); -out: - return ptep; -} - -int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot) -{ - pte_t *ptep, pte; - int rc = 0; - - if (!IS_ALIGNED(addr, PAGE_SIZE)) - return -EINVAL; - mutex_lock(&vmem_mutex); - ptep = vmem_get_alloc_pte(addr); - if (!ptep) { - rc = -ENOMEM; - goto out; - } - __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); - pte = mk_pte_phys(phys, prot); - set_pte(ptep, pte); -out: - mutex_unlock(&vmem_mutex); - return rc; -} - -void vmem_unmap_4k_page(unsigned long addr) -{ - pte_t *ptep; - - mutex_lock(&vmem_mutex); - ptep = virt_to_kpte(addr); - __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); - pte_clear(&init_mm, addr, ptep); - mutex_unlock(&vmem_mutex); -} - /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug -- cgit