From 4dc890df63996c4b18bcebfbd69b141a0beb5d03 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 8 Mar 2024 15:54:46 +0100 Subject: s390/sysinfo: allow response buffer in normal memory As provided with commit cd4386a931b63 ("s390/cpcmd,vmcp: avoid GFP_DMA allocations") the Diagnose Code 8 response buffer does not have to be below 2GB. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/sysinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 1b1be3110cfc..2be30a96696a 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -397,7 +397,7 @@ static void service_level_vm_print(struct seq_file *m, { char *query_buffer, *str; - query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA); + query_buffer = kmalloc(1024, GFP_KERNEL); if (!query_buffer) return; cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL); -- cgit From 367c50f78451d3bd7ad70bc5c89f9ba6dec46ca9 Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Wed, 6 Mar 2024 12:31:52 +0100 Subject: s390/vtime: fix average steal time calculation Current average steal timer calculation produces volatile and inflated values. The only user of this value is KVM so far and it uses that to decide whether or not to yield the vCPU which is seeing steal time. KVM compares average steal timer to a threshold and if the threshold is past then it does not allow CPU polling and yields it to host, else it keeps the CPU by polling. Since KVM's steal time threshold is very low by default (%10) it most likely is not effected much by the bloated average steal timer values because the operating region is pretty small. However there might be new users in the future who might rely on this number. Fix average steal timer calculation by changing the formula from: avg_steal_timer = avg_steal_timer / 2 + steal_timer; to the following: avg_steal_timer = (avg_steal_timer + steal_timer) / 2; This ensures that avg_steal_timer is actually a naive average of steal timer values. It now closely follows steal timer values but of course in a smoother manner. Fixes: 152e9b8676c6 ("s390/vtime: steal time exponential moving average") Signed-off-by: Mete Durlu Acked-by: Heiko Carstens Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/kernel/vtime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index e0a88dcaf5cb..24a18e5ef6e8 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -210,13 +210,13 @@ void vtime_flush(struct task_struct *tsk) virt_timer_expire(); steal = S390_lowcore.steal_timer; - avg_steal = S390_lowcore.avg_steal_timer / 2; + avg_steal = S390_lowcore.avg_steal_timer; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } - S390_lowcore.avg_steal_timer = avg_steal; + S390_lowcore.avg_steal_timer = avg_steal / 2; } static u64 vtime_delta(void) -- cgit From 8b19e145e82f1bf1419541de7ad823f5cb7a4ec3 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 7 Mar 2024 13:28:05 +0100 Subject: s390/cio: introduce bitwise dma types and helper functions Introduce dma32_t and dma64_t bitwise types, which are supposed to be used for 31 and 64 bit DMA capable addresses. This allows to use sparse (make C=1) for type checking, so that incorrect usages can be easily found. Also add a couple of helper functions which - convert virtual to DMA addresses and vice versa - allow for simple logical and arithmetic operations on DMA addresses - convert DMA addresses to plain u32 and u64 values All helper functions exist to avoid excessive casting in C code. Signed-off-by: Halil Pasic Co-developed-by: Heiko Carstens Reviewed-by: Steffen Maier Signed-off-by: Heiko Carstens --- arch/s390/include/asm/dma-types.h | 103 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 arch/s390/include/asm/dma-types.h (limited to 'arch/s390') diff --git a/arch/s390/include/asm/dma-types.h b/arch/s390/include/asm/dma-types.h new file mode 100644 index 000000000000..7d7f71022aa9 --- /dev/null +++ b/arch/s390/include/asm/dma-types.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_DMA_TYPES_H_ +#define _ASM_S390_DMA_TYPES_H_ + +#include +#include + +/* + * typedef dma32_t + * Contains a 31 bit absolute address to a DMA capable piece of storage. + * + * For CIO, DMA addresses are always absolute addresses. These addresses tend + * to be used in architectured memory blocks (like ORB, IDAW, MIDAW). Under + * certain circumstances 31 bit wide addresses must be used because the + * address must fit in 31 bits. + * + * This type is to be used when such fields can be modelled as 32 bit wide. + */ +typedef u32 __bitwise dma32_t; + +/* + * typedef dma64_t + * Contains a 64 bit absolute address to a DMA capable piece of storage. + * + * For CIO, DMA addresses are always absolute addresses. These addresses tend + * to be used in architectured memory blocks (like ORB, IDAW, MIDAW). + * + * This type is to be used to model such 64 bit wide fields. + */ +typedef u64 __bitwise dma64_t; + +/* + * Although DMA addresses should be obtained using the DMA API, in cases when + * it is known that the first argument holds a virtual address that points to + * DMA-able 31 bit addressable storage, then this function can be safely used. + */ +static inline dma32_t virt_to_dma32(void *ptr) +{ + return (__force dma32_t)__pa(ptr); +} + +static inline void *dma32_to_virt(dma32_t addr) +{ + return __va((__force unsigned long)addr); +} + +static inline dma32_t u32_to_dma32(u32 addr) +{ + return (__force dma32_t)addr; +} + +static inline u32 dma32_to_u32(dma32_t addr) +{ + return (__force u32)addr; +} + +static inline dma32_t dma32_add(dma32_t a, u32 b) +{ + return (__force dma32_t)((__force u32)a + b); +} + +static inline dma32_t dma32_and(dma32_t a, u32 b) +{ + return (__force dma32_t)((__force u32)a & b); +} + +/* + * Although DMA addresses should be obtained using the DMA API, in cases when + * it is known that the first argument holds a virtual address that points to + * DMA-able storage, then this function can be safely used. + */ +static inline dma64_t virt_to_dma64(void *ptr) +{ + return (__force dma64_t)__pa(ptr); +} + +static inline void *dma64_to_virt(dma64_t addr) +{ + return __va((__force unsigned long)addr); +} + +static inline dma64_t u64_to_dma64(u64 addr) +{ + return (__force dma64_t)addr; +} + +static inline u64 dma64_to_u64(dma64_t addr) +{ + return (__force u64)addr; +} + +static inline dma64_t dma64_add(dma64_t a, u64 b) +{ + return (__force dma64_t)((__force u64)a + b); +} + +static inline dma64_t dma64_and(dma64_t a, u64 b) +{ + return (__force dma64_t)((__force u64)a & b); +} + +#endif /* _ASM_S390_DMA_TYPES_H_ */ -- cgit From 1bcf7f48b7d40547eda6bfd1ea0bfed9cf9ec54f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Mar 2024 13:28:06 +0100 Subject: s390/cio: use bitwise types to allow for type checking Change types of I/O structure members which contain physical addresses to dma32_t and dma64_t bitwise types. This allows to make use of sparse (aka "make C=1") to find incorrect usage of physical addresses. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cio.h | 7 ++++--- arch/s390/include/asm/eadm.h | 5 +++-- arch/s390/include/asm/fcx.h | 13 +++++++------ arch/s390/include/asm/qdio.h | 17 +++++++++-------- arch/s390/include/asm/scsw.h | 7 ++++--- 5 files changed, 27 insertions(+), 22 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 1c4f585dd39b..80aeff80b2dc 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -32,7 +33,7 @@ struct ccw1 { __u8 cmd_code; __u8 flags; __u16 count; - __u32 cda; + dma32_t cda; } __attribute__ ((packed,aligned(8))); /** @@ -152,8 +153,8 @@ struct sublog { struct esw0 { struct sublog sublog; struct erw erw; - __u32 faddr[2]; - __u32 saddr; + dma32_t faddr[2]; + dma32_t saddr; } __attribute__ ((packed)); /** diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h index 06f795855af7..c4589ec4505e 100644 --- a/arch/s390/include/asm/eadm.h +++ b/arch/s390/include/asm/eadm.h @@ -5,6 +5,7 @@ #include #include #include +#include struct arqb { u64 data; @@ -45,7 +46,7 @@ struct msb { u16:12; u16 bs:4; u32 blk_count; - u64 data_addr; + dma64_t data_addr; u64 scm_addr; u64:64; } __packed; @@ -54,7 +55,7 @@ struct aidaw { u8 flags; u32 :24; u32 :32; - u64 data_addr; + dma64_t data_addr; } __packed; #define MSB_OC_CLEAR 0 diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h index 29784b4b44f6..80f82a739b45 100644 --- a/arch/s390/include/asm/fcx.h +++ b/arch/s390/include/asm/fcx.h @@ -10,6 +10,7 @@ #define _ASM_S390_FCX_H #include +#include #define TCW_FORMAT_DEFAULT 0 #define TCW_TIDAW_FORMAT_DEFAULT 0 @@ -43,16 +44,16 @@ struct tcw { u32 r:1; u32 w:1; u32 :16; - u64 output; - u64 input; - u64 tsb; - u64 tccb; + dma64_t output; + dma64_t input; + dma64_t tsb; + dma64_t tccb; u32 output_count; u32 input_count; u32 :32; u32 :32; u32 :32; - u32 intrg; + dma32_t intrg; } __attribute__ ((packed, aligned(64))); #define TIDAW_FLAGS_LAST (1 << (7 - 0)) @@ -73,7 +74,7 @@ struct tidaw { u32 flags:8; u32 :24; u32 count; - u64 addr; + dma64_t addr; } __attribute__ ((packed, aligned(16))); /** diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 2f983e0b95e0..69c4ead0c332 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -9,8 +9,9 @@ #define __QDIO_H__ #include -#include +#include #include +#include /* only use 4 queues to save some cachelines */ #define QDIO_MAX_QUEUES_PER_IRQ 4 @@ -34,9 +35,9 @@ * @dkey: access key for SLSB */ struct qdesfmt0 { - u64 sliba; - u64 sla; - u64 slsba; + dma64_t sliba; + dma64_t sla; + dma64_t slsba; u32 : 32; u32 akey : 4; u32 bkey : 4; @@ -74,7 +75,7 @@ struct qdr { /* private: */ u32 res[9]; /* public: */ - u64 qiba; + dma64_t qiba; u32 : 32; u32 qkey : 4; u32 : 28; @@ -146,7 +147,7 @@ struct qaob { u8 flags; u16 cbtbs; u8 sb_count; - u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + dma64_t sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; u64 user0; u64 res4[2]; @@ -208,7 +209,7 @@ struct qdio_buffer_element { u8 scount; u8 sflags; u32 length; - u64 addr; + dma64_t addr; } __attribute__ ((packed, aligned(16))); /** @@ -224,7 +225,7 @@ struct qdio_buffer { * @sbal: absolute SBAL address */ struct sl_element { - u64 sbal; + dma64_t sbal; } __attribute__ ((packed)); /** diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h index 322bdcd4b616..56003e26cdbf 100644 --- a/arch/s390/include/asm/scsw.h +++ b/arch/s390/include/asm/scsw.h @@ -11,6 +11,7 @@ #include #include +#include #include /** @@ -53,7 +54,7 @@ struct cmd_scsw { __u32 fctl : 3; __u32 actl : 7; __u32 stctl : 5; - __u32 cpa; + dma32_t cpa; __u32 dstat : 8; __u32 cstat : 8; __u32 count : 16; @@ -93,7 +94,7 @@ struct tm_scsw { u32 fctl:3; u32 actl:7; u32 stctl:5; - u32 tcw; + dma32_t tcw; u32 dstat:8; u32 cstat:8; u32 fcxs:8; @@ -125,7 +126,7 @@ struct eadm_scsw { u32 fctl:3; u32 actl:7; u32 stctl:5; - u32 aob; + dma32_t aob; u32 dstat:8; u32 cstat:8; u32:16; -- cgit From e3e9bda38e6d9f2af50b521741071d6406b40152 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Thu, 7 Mar 2024 13:28:08 +0100 Subject: s390/virtio_ccw: use DMA handle from DMA API Change and use ccw_device_dma_zalloc() so it returns a virtual address like before, which can be used to access data. However also pass a new dma32_t pointer type handle, which correlates to the returned virtual address. This pointer is used to directly pass/set the DMA handle as returned by the DMA API. Signed-off-by: Halil Pasic Reviewed-by: Eric Farman Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ccwdev.h | 3 ++- arch/s390/include/asm/cio.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 91d261751d25..436365ff6c19 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -217,7 +217,8 @@ extern void ccw_device_destroy_console(struct ccw_device *); extern int ccw_device_enable_console(struct ccw_device *); extern void ccw_device_wait_idle(struct ccw_device *); -extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size); +extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size, + dma32_t *dma_handle); extern void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size); diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 80aeff80b2dc..b6b619f340a5 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -365,6 +365,8 @@ extern struct device *cio_get_dma_css_dev(void); void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev, size_t size); +void *__cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev, + size_t size, dma32_t *dma_handle); void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size); void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev); struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); -- cgit From 57bc3c2219b0c4ada8072a39c55c14197e2bbda9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Mar 2024 13:28:12 +0100 Subject: s390/cio,idal: code cleanup Adjust coding style, partially refactor code, and use kcalloc() instead of kmalloc() to allocate an idaw array. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/idals.h | 134 ++++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 70 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index 59fcc3c72edf..462109777547 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* +/* * Author(s)......: Holger Smolinski * Martin Schwidefsky * Bugreports.to..: @@ -17,32 +17,34 @@ #include #include #include -#include #include +#include -#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */ -#define IDA_BLOCK_SIZE (1L<> 31) != 0; } - /* * Return the number of idal words needed for an address/length pair. */ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) { - return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length + - (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; + unsigned int cidaw; + + cidaw = __pa(vaddr) & (IDA_BLOCK_SIZE - 1); + cidaw += length + IDA_BLOCK_SIZE - 1; + cidaw >>= IDA_SIZE_SHIFT; + return cidaw; } /* @@ -50,23 +52,24 @@ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) */ static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length) { - return ((__pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1)) + length + - (IDA_2K_BLOCK_SIZE - 1)) >> IDA_2K_SIZE_LOG; + unsigned int cidaw; + + cidaw = __pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1); + cidaw += length + IDA_2K_BLOCK_SIZE - 1; + cidaw >>= IDA_2K_SIZE_SHIFT; + return cidaw; } /* * Create the list of idal words for an address/length pair. */ -static inline unsigned long *idal_create_words(unsigned long *idaws, - void *vaddr, unsigned int length) +static inline unsigned long *idal_create_words(unsigned long *idaws, void *vaddr, unsigned int length) { - unsigned long paddr; + unsigned long paddr = __pa(vaddr); unsigned int cidaw; - paddr = __pa(vaddr); - cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + - (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG; *idaws++ = paddr; + cidaw = idal_nr_words(vaddr, length); paddr &= -IDA_BLOCK_SIZE; while (--cidaw > 0) { paddr += IDA_BLOCK_SIZE; @@ -79,8 +82,7 @@ static inline unsigned long *idal_create_words(unsigned long *idaws, * Sets the address of the data in CCW. * If necessary it allocates an IDAL and sets the appropriate flags. */ -static inline int -set_normalized_cda(struct ccw1 * ccw, void *vaddr) +static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr) { unsigned int nridaws; unsigned long *idal; @@ -89,26 +91,24 @@ set_normalized_cda(struct ccw1 * ccw, void *vaddr) return -EINVAL; nridaws = idal_nr_words(vaddr, ccw->count); if (nridaws > 0) { - idal = kmalloc(nridaws * sizeof(unsigned long), - GFP_ATOMIC | GFP_DMA ); - if (idal == NULL) + idal = kcalloc(nridaws, sizeof(*idal), GFP_ATOMIC | GFP_DMA); + if (!idal) return -ENOMEM; idal_create_words(idal, vaddr, ccw->count); ccw->flags |= CCW_FLAG_IDA; vaddr = idal; } - ccw->cda = (__u32)(unsigned long) vaddr; + ccw->cda = (__u32)(unsigned long)vaddr; return 0; } /* * Releases any allocated IDAL related to the CCW. */ -static inline void -clear_normalized_cda(struct ccw1 * ccw) +static inline void clear_normalized_cda(struct ccw1 *ccw) { if (ccw->flags & CCW_FLAG_IDA) { - kfree((void *)(unsigned long) ccw->cda); + kfree((void *)(unsigned long)ccw->cda); ccw->flags &= ~CCW_FLAG_IDA; } ccw->cda = 0; @@ -126,86 +126,81 @@ struct idal_buffer { /* * Allocate an idal buffer */ -static inline struct idal_buffer * -idal_buffer_alloc(size_t size, int page_order) +static inline struct idal_buffer *idal_buffer_alloc(size_t size, int page_order) { - struct idal_buffer *ib; int nr_chunks, nr_ptrs, i; + struct idal_buffer *ib; - nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; - nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; + nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT; + nr_chunks = (PAGE_SIZE << page_order) >> IDA_SIZE_SHIFT; ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL); - if (ib == NULL) + if (!ib) return ERR_PTR(-ENOMEM); ib->size = size; ib->page_order = page_order; for (i = 0; i < nr_ptrs; i++) { - if ((i & (nr_chunks - 1)) != 0) { - ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE; - continue; - } - ib->data[i] = (void *) - __get_free_pages(GFP_KERNEL, page_order); - if (ib->data[i] != NULL) + if (i & (nr_chunks - 1)) { + ib->data[i] = ib->data[i - 1] + IDA_BLOCK_SIZE; continue; - // Not enough memory - while (i >= nr_chunks) { - i -= nr_chunks; - free_pages((unsigned long) ib->data[i], - ib->page_order); } - kfree(ib); - return ERR_PTR(-ENOMEM); + ib->data[i] = (void *)__get_free_pages(GFP_KERNEL, page_order); + if (!ib->data[i]) + goto error; } return ib; +error: + while (i >= nr_chunks) { + i -= nr_chunks; + free_pages((unsigned long)ib->data[i], ib->page_order); + } + kfree(ib); + return ERR_PTR(-ENOMEM); } /* * Free an idal buffer. */ -static inline void -idal_buffer_free(struct idal_buffer *ib) +static inline void idal_buffer_free(struct idal_buffer *ib) { int nr_chunks, nr_ptrs, i; - nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; - nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG; + nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT; + nr_chunks = (PAGE_SIZE << ib->page_order) >> IDA_SIZE_SHIFT; for (i = 0; i < nr_ptrs; i += nr_chunks) - free_pages((unsigned long) ib->data[i], ib->page_order); + free_pages((unsigned long)ib->data[i], ib->page_order); kfree(ib); } /* * Test if a idal list is really needed. */ -static inline int -__idal_buffer_is_needed(struct idal_buffer *ib) +static inline bool __idal_buffer_is_needed(struct idal_buffer *ib) { - return ib->size > (4096ul << ib->page_order) || - idal_is_needed(ib->data[0], ib->size); + if (ib->size > (PAGE_SIZE << ib->page_order)) + return true; + return idal_is_needed(ib->data[0], ib->size); } /* * Set channel data address to idal buffer. */ -static inline void -idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) +static inline void idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) { if (__idal_buffer_is_needed(ib)) { - // setup idals; - ccw->cda = (u32)(addr_t) ib->data; + /* Setup idals */ + ccw->cda = (u32)(addr_t)ib->data; ccw->flags |= CCW_FLAG_IDA; - } else - // we do not need idals - use direct addressing - ccw->cda = (u32)(addr_t) ib->data[0]; + } else { + /* No idals needed - use direct addressing. */ + ccw->cda = (u32)(addr_t)ib->data[0]; + } ccw->count = ib->size; } /* * Copy count bytes from an idal buffer to user memory */ -static inline size_t -idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) +static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) { size_t left; int i; @@ -215,7 +210,7 @@ idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE); if (left) return left + count - IDA_BLOCK_SIZE; - to = (void __user *) to + IDA_BLOCK_SIZE; + to = (void __user *)to + IDA_BLOCK_SIZE; count -= IDA_BLOCK_SIZE; } return copy_to_user(to, ib->data[i], count); @@ -224,8 +219,7 @@ idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) /* * Copy count bytes from user memory to an idal buffer */ -static inline size_t -idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) +static inline size_t idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) { size_t left; int i; @@ -235,7 +229,7 @@ idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t co left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE); if (left) return left + count - IDA_BLOCK_SIZE; - from = (void __user *) from + IDA_BLOCK_SIZE; + from = (void __user *)from + IDA_BLOCK_SIZE; count -= IDA_BLOCK_SIZE; } return copy_from_user(ib->data[i], from, count); -- cgit From 0c9580cebb5065c7ef25d48f61222aaf46932a0f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Mar 2024 13:28:13 +0100 Subject: s390/cio,idal: remove superfluous virt_to_phys() conversion Only the last 12 bits of virtual / physical addresses are used when masking with IDA_BLOCK_SIZE - 1. Given that the bits are the same regardless of virtual or physical address, remove the virtual to physical address conversion. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/idals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index 462109777547..1ceefc9f6fbe 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -41,7 +41,7 @@ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length) { unsigned int cidaw; - cidaw = __pa(vaddr) & (IDA_BLOCK_SIZE - 1); + cidaw = (unsigned long)vaddr & (IDA_BLOCK_SIZE - 1); cidaw += length + IDA_BLOCK_SIZE - 1; cidaw >>= IDA_SIZE_SHIFT; return cidaw; @@ -54,7 +54,7 @@ static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length) { unsigned int cidaw; - cidaw = __pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1); + cidaw = (unsigned long)vaddr & (IDA_2K_BLOCK_SIZE - 1); cidaw += length + IDA_2K_BLOCK_SIZE - 1; cidaw >>= IDA_2K_SIZE_SHIFT; return cidaw; -- cgit From e1f51be68d9906e652d4678c4df1d34a9d8ad9ef Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Mar 2024 13:28:14 +0100 Subject: s390/cio,idal: fix virtual vs physical address confusion Fix virtual vs physical address confusion. This does not fix a bug since virtual and physical address spaces are currently the same. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/idals.h | 68 ++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 23 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index 1ceefc9f6fbe..ac68c657b28c 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #define IDA_SIZE_SHIFT 12 @@ -31,7 +32,9 @@ */ static inline bool idal_is_needed(void *vaddr, unsigned int length) { - return ((__pa(vaddr) + length - 1) >> 31) != 0; + dma64_t paddr = virt_to_dma64(vaddr); + + return (((__force unsigned long)(paddr) + length - 1) >> 31) != 0; } /* @@ -63,16 +66,16 @@ static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length) /* * Create the list of idal words for an address/length pair. */ -static inline unsigned long *idal_create_words(unsigned long *idaws, void *vaddr, unsigned int length) +static inline dma64_t *idal_create_words(dma64_t *idaws, void *vaddr, unsigned int length) { - unsigned long paddr = __pa(vaddr); + dma64_t paddr = virt_to_dma64(vaddr); unsigned int cidaw; *idaws++ = paddr; cidaw = idal_nr_words(vaddr, length); - paddr &= -IDA_BLOCK_SIZE; + paddr = dma64_and(paddr, -IDA_BLOCK_SIZE); while (--cidaw > 0) { - paddr += IDA_BLOCK_SIZE; + paddr = dma64_add(paddr, IDA_BLOCK_SIZE); *idaws++ = paddr; } return idaws; @@ -85,7 +88,7 @@ static inline unsigned long *idal_create_words(unsigned long *idaws, void *vaddr static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr) { unsigned int nridaws; - unsigned long *idal; + dma64_t *idal; if (ccw->flags & CCW_FLAG_IDA) return -EINVAL; @@ -98,7 +101,7 @@ static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr) ccw->flags |= CCW_FLAG_IDA; vaddr = idal; } - ccw->cda = (__u32)(unsigned long)vaddr; + ccw->cda = virt_to_dma32(vaddr); return 0; } @@ -108,7 +111,7 @@ static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr) static inline void clear_normalized_cda(struct ccw1 *ccw) { if (ccw->flags & CCW_FLAG_IDA) { - kfree((void *)(unsigned long)ccw->cda); + kfree(dma32_to_virt(ccw->cda)); ccw->flags &= ~CCW_FLAG_IDA; } ccw->cda = 0; @@ -120,7 +123,7 @@ static inline void clear_normalized_cda(struct ccw1 *ccw) struct idal_buffer { size_t size; size_t page_order; - void *data[]; + dma64_t data[]; }; /* @@ -130,6 +133,7 @@ static inline struct idal_buffer *idal_buffer_alloc(size_t size, int page_order) { int nr_chunks, nr_ptrs, i; struct idal_buffer *ib; + void *vaddr; nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT; nr_chunks = (PAGE_SIZE << page_order) >> IDA_SIZE_SHIFT; @@ -140,18 +144,20 @@ static inline struct idal_buffer *idal_buffer_alloc(size_t size, int page_order) ib->page_order = page_order; for (i = 0; i < nr_ptrs; i++) { if (i & (nr_chunks - 1)) { - ib->data[i] = ib->data[i - 1] + IDA_BLOCK_SIZE; + ib->data[i] = dma64_add(ib->data[i - 1], IDA_BLOCK_SIZE); continue; } - ib->data[i] = (void *)__get_free_pages(GFP_KERNEL, page_order); - if (!ib->data[i]) + vaddr = (void *)__get_free_pages(GFP_KERNEL, page_order); + if (!vaddr) goto error; + ib->data[i] = virt_to_dma64(vaddr); } return ib; error: while (i >= nr_chunks) { i -= nr_chunks; - free_pages((unsigned long)ib->data[i], ib->page_order); + vaddr = dma64_to_virt(ib->data[i]); + free_pages((unsigned long)vaddr, ib->page_order); } kfree(ib); return ERR_PTR(-ENOMEM); @@ -163,11 +169,14 @@ error: static inline void idal_buffer_free(struct idal_buffer *ib) { int nr_chunks, nr_ptrs, i; + void *vaddr; nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT; nr_chunks = (PAGE_SIZE << ib->page_order) >> IDA_SIZE_SHIFT; - for (i = 0; i < nr_ptrs; i += nr_chunks) - free_pages((unsigned long)ib->data[i], ib->page_order); + for (i = 0; i < nr_ptrs; i += nr_chunks) { + vaddr = dma64_to_virt(ib->data[i]); + free_pages((unsigned long)vaddr, ib->page_order); + } kfree(ib); } @@ -178,7 +187,7 @@ static inline bool __idal_buffer_is_needed(struct idal_buffer *ib) { if (ib->size > (PAGE_SIZE << ib->page_order)) return true; - return idal_is_needed(ib->data[0], ib->size); + return idal_is_needed(dma64_to_virt(ib->data[0]), ib->size); } /* @@ -186,13 +195,20 @@ static inline bool __idal_buffer_is_needed(struct idal_buffer *ib) */ static inline void idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) { + void *vaddr; + if (__idal_buffer_is_needed(ib)) { /* Setup idals */ - ccw->cda = (u32)(addr_t)ib->data; + ccw->cda = virt_to_dma32(ib->data); ccw->flags |= CCW_FLAG_IDA; } else { - /* No idals needed - use direct addressing. */ - ccw->cda = (u32)(addr_t)ib->data[0]; + /* + * No idals needed - use direct addressing. Convert from + * dma64_t to virt and then to dma32_t only because of type + * checking. The physical address is known to be below 2GB. + */ + vaddr = dma64_to_virt(ib->data[0]); + ccw->cda = virt_to_dma32(vaddr); } ccw->count = ib->size; } @@ -203,17 +219,20 @@ static inline void idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw) static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count) { size_t left; + void *vaddr; int i; BUG_ON(count > ib->size); for (i = 0; count > IDA_BLOCK_SIZE; i++) { - left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE); + vaddr = dma64_to_virt(ib->data[i]); + left = copy_to_user(to, vaddr, IDA_BLOCK_SIZE); if (left) return left + count - IDA_BLOCK_SIZE; to = (void __user *)to + IDA_BLOCK_SIZE; count -= IDA_BLOCK_SIZE; } - return copy_to_user(to, ib->data[i], count); + vaddr = dma64_to_virt(ib->data[i]); + return copy_to_user(to, vaddr, count); } /* @@ -222,17 +241,20 @@ static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to static inline size_t idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count) { size_t left; + void *vaddr; int i; BUG_ON(count > ib->size); for (i = 0; count > IDA_BLOCK_SIZE; i++) { - left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE); + vaddr = dma64_to_virt(ib->data[i]); + left = copy_from_user(vaddr, from, IDA_BLOCK_SIZE); if (left) return left + count - IDA_BLOCK_SIZE; from = (void __user *)from + IDA_BLOCK_SIZE; count -= IDA_BLOCK_SIZE; } - return copy_from_user(ib->data[i], from, count); + vaddr = dma64_to_virt(ib->data[i]); + return copy_from_user(vaddr, from, count); } #endif -- cgit From 5f58bde7264704fec2058afc219a3557f02f3d8f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 7 Mar 2024 13:28:27 +0100 Subject: s390/mm: provide simple ARCH_HAS_DEBUG_VIRTUAL support Provide a very simple ARCH_HAS_DEBUG_VIRTUAL implementation. For now errors are only reported for the following cases: - Trying to translate a vmalloc or module address to a physical address - Translating a supposed to be ZONE_DMA virtual address into a physical address, and the resulting physical address is larger than two GiB Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + arch/s390/Makefile | 1 + arch/s390/configs/debug_defconfig | 1 + arch/s390/include/asm/dma-types.h | 2 +- arch/s390/include/asm/page.h | 30 ++++++++++++++++++++++++++++-- arch/s390/mm/Makefile | 1 + arch/s390/mm/physaddr.c | 15 +++++++++++++++ 7 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 arch/s390/mm/physaddr.c (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9e52461f35cb..6f0ee7db3aac 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -63,6 +63,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_HAS_CURRENT_STACK_POINTER + select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 2a58e1864931..2dbb2d2f22f9 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -29,6 +29,7 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY +KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 4032e6e136ac..f1fb01cd5a25 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -810,6 +810,7 @@ CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y +CONFIG_DEBUG_VIRTUAL=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y diff --git a/arch/s390/include/asm/dma-types.h b/arch/s390/include/asm/dma-types.h index 7d7f71022aa9..5c5734e6946c 100644 --- a/arch/s390/include/asm/dma-types.h +++ b/arch/s390/include/asm/dma-types.h @@ -37,7 +37,7 @@ typedef u64 __bitwise dma64_t; */ static inline dma32_t virt_to_dma32(void *ptr) { - return (__force dma32_t)__pa(ptr); + return (__force dma32_t)__pa32(ptr); } static inline void *dma32_to_virt(dma32_t addr) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index ded9548d11d9..9381879f7ecf 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -181,9 +181,35 @@ int arch_make_page_accessible(struct page *page); #define __PAGE_OFFSET 0x0UL #define PAGE_OFFSET 0x0UL -#define __pa(x) ((unsigned long)(x)) +#define __pa_nodebug(x) ((unsigned long)(x)) + +#ifdef __DECOMPRESSOR + +#define __pa(x) __pa_nodebug(x) +#define __pa32(x) __pa(x) #define __va(x) ((void *)(unsigned long)(x)) +#else /* __DECOMPRESSOR */ + +#ifdef CONFIG_DEBUG_VIRTUAL + +unsigned long __phys_addr(unsigned long x, bool is_31bit); + +#else /* CONFIG_DEBUG_VIRTUAL */ + +static inline unsigned long __phys_addr(unsigned long x, bool is_31bit) +{ + return __pa_nodebug(x); +} + +#endif /* CONFIG_DEBUG_VIRTUAL */ + +#define __pa(x) __phys_addr((unsigned long)(x), false) +#define __pa32(x) __phys_addr((unsigned long)(x), true) +#define __va(x) ((void *)(unsigned long)(x)) + +#endif /* __DECOMPRESSOR */ + #define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) #define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) @@ -205,7 +231,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define page_to_virt(page) pfn_to_virt(page_to_pfn(page)) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(kaddr) pfn_valid(phys_to_pfn(__pa_nodebug(kaddr))) #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 352ff520fd94..f6c2db7a8669 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -7,6 +7,7 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o diff --git a/arch/s390/mm/physaddr.c b/arch/s390/mm/physaddr.c new file mode 100644 index 000000000000..59de866c72d9 --- /dev/null +++ b/arch/s390/mm/physaddr.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +unsigned long __phys_addr(unsigned long x, bool is_31bit) +{ + VIRTUAL_BUG_ON(is_vmalloc_or_module_addr((void *)(x))); + x = __pa_nodebug(x); + if (is_31bit) + VIRTUAL_BUG_ON(x >> 31); + return x; +} +EXPORT_SYMBOL(__phys_addr); -- cgit From c239c83ed5c558be3b5926c7f11639f02c8acd00 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 20 Feb 2024 14:21:14 +0100 Subject: s390/entry: add CIF_SIE flag and remove sie64a() address check When a program check, interrupt or machine check is triggered, the PSW address is compared to a certain range of the sie64a() function to figure out whether SIE was interrupted and a cleanup of SIE is needed. This doesn't work with kprobes: If kprobes probes an instruction, it copies the instruction to the kprobes instruction page and overwrites the original instruction with an undefind instruction (Opcode 00). When this instruction is hit later, kprobes single-steps the instruction on the kprobes_instruction page. However, if this instruction is a relative branch instruction it will now point to a different location in memory due to being moved to the kprobes instruction page. If the new branch target points into sie64a() the kernel assumes it interrupted SIE when processing the breakpoint and will crash trying to access the SIE control block. Instead of comparing the address, introduce a new CIF_SIE flag which indicates whether SIE was interrupted. Signed-off-by: Sven Schnelle Suggested-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 2 ++ arch/s390/kernel/entry.S | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 7cf00cf8fb0b..db9982f0e8cd 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,11 +14,13 @@ #include +#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ +#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index fc5277eab554..4e0ff79ffee3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -146,6 +146,7 @@ _LPP_OFFSET = __LC_LPP lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce + ni __LC_CPU_FLAGS+7,255-_CIF_SIE larl %r9,sie_exit # skip forward to sie_exit .endm #endif @@ -214,6 +215,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz .Lsie_gmap + oi __LC_CPU_FLAGS+7,_CIF_SIE lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce .Lsie_gmap: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer @@ -235,6 +237,7 @@ SYM_FUNC_START(__sie64a) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: + ni __LC_CPU_FLAGS+7,255-_CIF_SIE # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. @@ -346,7 +349,8 @@ SYM_CODE_START(pgm_check_handler) .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in __sie64a - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f + TSTMSK __LC_CPU_FLAGS,_CIF_SIE + jz 1f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT lghi %r10,_PIF_GUEST_FAULT @@ -416,7 +420,8 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f + TSTMSK __LC_CPU_FLAGS,_CIF_SIE + jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT #endif @@ -513,7 +518,13 @@ SYM_CODE_START(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_user + TSTMSK __LC_CPU_FLAGS,_CIF_SIE + jz .Lmcck_user + # Need to compare the address instead of a CIF_SIE* flag. + # Otherwise there would be a race between setting the flag + # and entering SIE (or leaving and clearing the flag). This + # would cause machine checks targeted at the guest to be + # handled by the host. OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST -- cgit From 29e5bc0f023a1110d1539d75ccac3b4cdcadec5e Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 20 Feb 2024 15:18:56 +0100 Subject: s390/entry: remove OUTSIDE macro With only one OUTSIDE user left, remove the macro and move the code directly to the machine check handler. This has the advantage that it is much easier to determine which registers are used. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4e0ff79ffee3..a0543db0bcae 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -119,29 +119,6 @@ _LPP_OFFSET = __LC_LPP .endm #if IS_ENABLED(CONFIG_KVM) - /* - * The OUTSIDE macro jumps to the provided label in case the value - * in the provided register is outside of the provided range. The - * macro is useful for checking whether a PSW stored in a register - * pair points inside or outside of a block of instructions. - * @reg: register to check - * @start: start of the range - * @end: end of the range - * @outside_label: jump here if @reg is outside of [@start..@end) - */ - .macro OUTSIDE reg,start,end,outside_label - lgr %r14,\reg - larl %r13,\start - slgr %r14,%r13 - clgfrl %r14,.Lrange_size\@ - jhe \outside_label - .section .rodata, "a" - .balign 4 -.Lrange_size\@: - .long \end - \start - .previous - .endm - .macro SIEEXIT lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE @@ -236,7 +213,6 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce -.Lsie_done: ni __LC_CPU_FLAGS+7,255-_CIF_SIE # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There @@ -525,7 +501,10 @@ SYM_CODE_START(mcck_int_handler) # and entering SIE (or leaving and clearing the flag). This # would cause machine checks targeted at the guest to be # handled by the host. - OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f + larl %r14,.Lsie_entry + clgrjl %r9,%r14, 4f + larl %r14,.Lsie_leave + clgrjhe %r9,%r14, 4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT -- cgit From 64c3431808bdab2ccef97d7a444018c416b080b5 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 13 Mar 2024 09:51:22 +0100 Subject: s390/entry: compare gmap asce to determine guest/host fault With the current implementation, there are some cornercases where a host fault would be treated as a guest fault, for example when the sie instruction causes a program check. Therefore store the gmap asce in ptregs, and use that to compare the primary asce from the fault instead of matching instruction addresses. Suggested-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/include/asm/ptrace.h | 2 -- arch/s390/kernel/entry.S | 31 +++++++++++++++---------------- arch/s390/mm/fault.c | 4 +++- 3 files changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 788bc4467445..2ad9324f6338 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -14,13 +14,11 @@ #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_EXECVE_PGSTE_RESTART 1 /* restart execve for PGSTE binaries */ #define PIF_SYSCALL_RET_SET 2 /* return value was set via ptrace */ -#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define PIF_FTRACE_FULL_REGS 4 /* all register contents valid (ftrace) */ #define _PIF_SYSCALL BIT(PIF_SYSCALL) #define _PIF_EXECVE_PGSTE_RESTART BIT(PIF_EXECVE_PGSTE_RESTART) #define _PIF_SYSCALL_RET_SET BIT(PIF_SYSCALL_RET_SET) -#define _PIF_GUEST_FAULT BIT(PIF_GUEST_FAULT) #define _PIF_FTRACE_FULL_REGS BIT(PIF_FTRACE_FULL_REGS) #define PSW32_MASK_PER _AC(0x40000000, UL) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a0543db0bcae..787394978bc0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -119,8 +119,8 @@ _LPP_OFFSET = __LC_LPP .endm #if IS_ENABLED(CONFIG_KVM) - .macro SIEEXIT - lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer + .macro SIEEXIT sie_control + lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce ni __LC_CPU_FLAGS+7,255-_CIF_SIE @@ -316,21 +316,13 @@ SYM_CODE_START(pgm_check_handler) stpt __LC_SYS_ENTER_TIMER BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC - lghi %r10,0 + lgr %r10,%r15 lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce lctlg %c1,%c1,__LC_KERNEL_ASCE j 3f # -> fault in user space .Lpgm_skip_asce: -#if IS_ENABLED(CONFIG_KVM) - # cleanup critical section for program checks in __sie64a - TSTMSK __LC_CPU_FLAGS,_CIF_SIE - jz 1f - BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT - lghi %r10,_PIF_GUEST_FAULT -#endif 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception @@ -341,13 +333,20 @@ SYM_CODE_START(pgm_check_handler) CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f 3: lg %r15,__LC_KERNEL_STACK 4: la %r11,STACK_FRAME_OVERHEAD(%r15) - stg %r10,__PT_FLAGS(%r11) + xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK - stmg %r8,%r9,__PT_PSW(%r11) - + stctg %c1,%c1,__PT_CR1(%r11) +#if IS_ENABLED(CONFIG_KVM) + lg %r12,__LC_GMAP + clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) + jne 5f + BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST + SIEEXIT __SF_SIE_CONTROL(%r10) +#endif +5: stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -399,7 +398,7 @@ SYM_CODE_START(\name) TSTMSK __LC_CPU_FLAGS,_CIF_SIE jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT + SIEEXIT __SF_SIE_CONTROL(%r15) #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) @@ -507,7 +506,7 @@ SYM_CODE_START(mcck_int_handler) clgrjhe %r9,%r14, 4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT + SIEEXIT __SF_SIE_CONTROL(%r15) #endif .Lmcck_user: lg %r15,__LC_MCCK_STACK diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ac4c78546d97..c421dd44ffbe 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -67,13 +67,15 @@ early_initcall(fault_init); static enum fault_type get_fault_type(struct pt_regs *regs) { union teid teid = { .val = regs->int_parm_long }; + struct gmap *gmap; if (likely(teid.as == PSW_BITS_AS_PRIMARY)) { if (user_mode(regs)) return USER_FAULT; if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; - if (test_pt_regs_flag(regs, PIF_GUEST_FAULT)) + gmap = (struct gmap *)S390_lowcore.gmap; + if (regs->cr1 == gmap->asce) return GMAP_FAULT; return KERNEL_FAULT; } -- cgit