summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeiko Carstens <hca@linux.ibm.com>2023-08-25 14:29:48 +0200
committerHeiko Carstens <hca@linux.ibm.com>2023-08-30 11:03:27 +0200
commitc0f1d478121131c2a97cab24148bf7ebb7ed3434 (patch)
treeeddcdda865a0d66f371590e057025128c32af7d5
parentb6f10e2f66e43b903b1f37b643d353fe364190cd (diff)
s390/mm: simplify kernel mapping setup
The kernel mapping is setup in two stages: in the decompressor map all pages with RWX permissions, and within the kernel change all mappings to their final permissions, where most of the mappings are changed from RWX to RWNX. Change this and map all pages RWNX from the beginning, however without enabling noexec via control register modification. This means that effectively all pages are used with RWX permissions like before. When the final permissions have been applied to the kernel mapping enable noexec via control register modification. This allows to remove quite a bit of non-obvious code. Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
-rw-r--r--arch/s390/boot/startup.c4
-rw-r--r--arch/s390/boot/vmem.c12
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/mm/vmem.c109
4 files changed, 15 insertions, 114 deletions
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index caf562be3531..d3e48bd9c394 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -53,10 +53,8 @@ static void detect_facilities(void)
}
if (test_facility(78))
machine.has_edat2 = 1;
- if (test_facility(130)) {
+ if (test_facility(130))
machine.has_nx = 1;
- __ctl_set_bit(0, 20);
- }
}
static void setup_lpp(void)
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index c67f59db7a51..01257ce3b89c 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
entry = __pte(_pa(addr, PAGE_SIZE, mode));
- entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ entry = set_pte_bit(entry, PAGE_KERNEL);
+ if (!machine.has_nx)
+ entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
@@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
continue;
if (can_large_pmd(pmd, addr, next)) {
entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
- entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+ if (!machine.has_nx)
+ entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
@@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
continue;
if (can_large_pud(pud, addr, next)) {
entry = __pud(_pa(addr, _REGION3_SIZE, mode));
- entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ entry = set_pud_bit(entry, REGION3_KERNEL);
+ if (!machine.has_nx)
+ entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 3dccc6eb24a8..442ce0489e1a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -232,10 +232,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
- if (test_facility(130)) {
+ if (test_facility(130))
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
- __ctl_set_bit(0, 20);
- }
if (test_facility(133))
S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
if (test_facility(139) && (tod_clock_base.tod >> 63)) {
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 0ae363cb47bc..261eea21ca10 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -5,7 +5,6 @@
#include <linux/memory_hotplug.h>
#include <linux/memblock.h>
-#include <linux/kasan.h>
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/init.h>
@@ -650,108 +649,8 @@ void vmem_unmap_4k_page(unsigned long addr)
mutex_unlock(&vmem_mutex);
}
-static int __init memblock_region_cmp(const void *a, const void *b)
-{
- const struct memblock_region *r1 = a;
- const struct memblock_region *r2 = b;
-
- if (r1->base < r2->base)
- return -1;
- if (r1->base > r2->base)
- return 1;
- return 0;
-}
-
-static void __init memblock_region_swap(void *a, void *b, int size)
-{
- swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
-}
-
-#ifdef CONFIG_KASAN
-#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-
-static inline int set_memory_kasan(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
-}
-#endif
-
-/*
- * map whole physical memory to virtual memory (identity mapping)
- * we reserve enough space in the vmalloc area for vmemmap to hotplug
- * additional memory segments.
- */
void __init vmem_map_init(void)
{
- struct memblock_region memory_rwx_regions[] = {
- {
- .base = 0,
- .size = sizeof(struct lowcore),
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- {
- .base = __pa(_stext),
- .size = _etext - _stext,
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- {
- .base = __pa(_sinittext),
- .size = _einittext - _sinittext,
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- {
- .base = __stext_amode31,
- .size = __etext_amode31 - __stext_amode31,
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- };
- struct memblock_type memory_rwx = {
- .regions = memory_rwx_regions,
- .cnt = ARRAY_SIZE(memory_rwx_regions),
- .max = ARRAY_SIZE(memory_rwx_regions),
- };
- phys_addr_t base, end;
- u64 i;
-
- /*
- * Set RW+NX attribute on all memory, except regions enumerated with
- * memory_rwx exclude type. These regions need different attributes,
- * which are enforced afterwards.
- *
- * __for_each_mem_range() iterate and exclude types should be sorted.
- * The relative location of _stext and _sinittext is hardcoded in the
- * linker script. However a location of __stext_amode31 and the kernel
- * image itself are chosen dynamically. Thus, sort the exclude type.
- */
- sort(&memory_rwx_regions,
- ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
- memblock_region_cmp, memblock_region_swap);
- __for_each_mem_range(i, &memblock.memory, &memory_rwx,
- NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
- set_memory_rwnx((unsigned long)__va(base),
- (end - base) >> PAGE_SHIFT);
- }
-
-#ifdef CONFIG_KASAN
- for_each_mem_range(i, &base, &end) {
- set_memory_kasan((unsigned long)__va(base),
- (unsigned long)__va(end));
- }
-#endif
set_memory_rox((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT);
set_memory_ro((unsigned long)_etext,
@@ -762,14 +661,14 @@ void __init vmem_map_init(void)
(__etext_amode31 - __stext_amode31) >> PAGE_SHIFT);
/* lowcore must be executable for LPSWE */
- if (static_key_enabled(&cpu_has_bear))
- set_memory_nx(0, 1);
- set_memory_nx(PAGE_SIZE, 1);
+ if (!static_key_enabled(&cpu_has_bear))
+ set_memory_x(0, 1);
if (debug_pagealloc_enabled()) {
set_memory_4k((unsigned long)__va(0),
ident_map_size >> PAGE_SHIFT);
}
-
+ if (MACHINE_HAS_NX)
+ ctl_set_bit(0, 20);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}