diff options
Diffstat (limited to 'arch/s390')
120 files changed, 4038 insertions, 3458 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3be9c832dec1..a8c259059adf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -4,6 +4,9 @@ config MMU config ZONE_DMA def_bool y +config CPU_BIG_ENDIAN + def_bool y + config LOCKDEP_SUPPORT def_bool y @@ -59,6 +62,9 @@ config PCI_QUIRKS config ARCH_SUPPORTS_UPROBES def_bool y +config DEBUG_RODATA + def_bool y + config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -101,6 +107,7 @@ config S390 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_PROT_NUMA_PROT_NONE select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT @@ -116,16 +123,19 @@ config S390 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_EARLY_PFN_TO_NID select HAVE_ARCH_JUMP_LABEL + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES + select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_EXIT_THREAD select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -157,6 +167,7 @@ config S390 select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS + select HAVE_NMI config SCHED_OMIT_FRAME_POINTER @@ -203,7 +214,7 @@ config HAVE_MARCH_Z13_FEATURES choice prompt "Processor type" - default MARCH_Z900 + default MARCH_Z196 config MARCH_Z900 bool "IBM zSeries model z800 and z900" @@ -254,12 +265,12 @@ config MARCH_ZEC12 older machines. config MARCH_Z13 - bool "IBM z13" + bool "IBM z13s and z13" select HAVE_MARCH_Z13_FEATURES help - Select this to enable optimizations for IBM z13 (2964 series). - The kernel will be slightly faster but will not work on older - machines. + Select this to enable optimizations for IBM z13s and z13 (2965 and + 2964 series). The kernel will be slightly faster but will not work on + older machines. endchoice @@ -605,8 +616,6 @@ config PCI_NR_MSI PCI devices. source "drivers/pci/Kconfig" -source "drivers/pci/pcie/Kconfig" -source "drivers/pci/hotplug/Kconfig" endif # PCI @@ -619,10 +628,6 @@ config HAS_IOMEM config IOMMU_HELPER def_bool PCI -config HAS_DMA - def_bool PCI - select HAVE_DMA_API_DEBUG - config NEED_SG_DMA_LENGTH def_bool PCI diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index fac6ac9790fa..1dd210347e12 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -22,7 +22,6 @@ OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(call if_changed,ld) - @: sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p' diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 0ac42cc4f880..d5ec71b2ed02 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -1,8 +1,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -13,19 +12,19 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG_KMEM=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CGROUP_PERF=y +CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y -CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y @@ -55,7 +54,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_Z196=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y @@ -65,6 +63,15 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CLEANCACHE=y +CONFIG_FRONTSWAP=y +CONFIG_CMA=y +CONFIG_MEM_SOFT_DIRTY=y +CONFIG_ZPOOL=m +CONFIG_ZBUD=m +CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC_STAT=y +CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_PCI_DEBUG=y CONFIG_HOTPLUG_PCI=y @@ -452,6 +459,7 @@ CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m @@ -537,6 +545,8 @@ CONFIG_DLM=m CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_READABLE_ASM=y CONFIG_UNUSED_SYMBOLS=y @@ -555,13 +565,17 @@ CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y +CONFIG_DEBUG_VM_VMACACHE=y CONFIG_DEBUG_VM_RB=y +CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y +CONFIG_WQ_WATCHDOG=y CONFIG_PANIC_ON_OOPS=y +CONFIG_DEBUG_TIMEKEEPING=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y @@ -596,6 +610,8 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_UPROBE_EVENT=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_KPROBES_SANITY_TEST=y @@ -607,7 +623,6 @@ CONFIG_TEST_STRING_HELPERS=y CONFIG_TEST_KSTRTOX=y CONFIG_DMA_API_DEBUG=y CONFIG_TEST_BPF=m -# CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y @@ -651,7 +666,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=y CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m @@ -664,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index a31dcd56f7c0..f46a35115d2d 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -1,8 +1,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -13,17 +12,17 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG_KMEM=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_BLK_CGROUP=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y @@ -53,7 +52,6 @@ CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_MARCH_Z196=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y @@ -62,6 +60,14 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CLEANCACHE=y +CONFIG_FRONTSWAP=y +CONFIG_CMA=y +CONFIG_ZSWAP=y +CONFIG_ZBUD=m +CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC_STAT=y +CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -530,6 +536,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=1024 CONFIG_UNUSED_SYMBOLS=y @@ -547,13 +555,13 @@ CONFIG_LATENCYTOP=y CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set +CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_RBTREE_TEST=m CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m -# CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y @@ -597,8 +605,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=y -CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m @@ -610,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 7b73bf353345..ba0f2a58b8cd 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -1,8 +1,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y @@ -14,17 +13,17 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG_KMEM=y +CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y -CONFIG_BLK_CGROUP=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_SCHED_AUTOGROUP=y @@ -53,7 +52,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_Z196=y CONFIG_TUNE_ZEC12=y CONFIG_NR_CPUS=512 CONFIG_NUMA=y @@ -62,6 +60,14 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CLEANCACHE=y +CONFIG_FRONTSWAP=y +CONFIG_CMA=y +CONFIG_ZSWAP=y +CONFIG_ZBUD=m +CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC_STAT=y +CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PCI=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y @@ -447,6 +453,7 @@ CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_HANGCHECK_TIMER=m CONFIG_TN3270_FS=y +# CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m @@ -530,6 +537,8 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=1024 CONFIG_UNUSED_SYMBOLS=y @@ -546,11 +555,12 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_UPROBE_EVENT=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_TEST_BPF=m -# CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y @@ -594,8 +604,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=y -CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m @@ -607,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_ASYMMETRIC_KEY_TYPE=m +CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 1719843a55a2..4366a3e3e754 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,5 +1,5 @@ # CONFIG_SWAP is not set -CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -7,7 +7,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_MARCH_Z196=y CONFIG_TUNE_ZEC12=y # CONFIG_COMPAT is not set CONFIG_NR_CPUS=2 @@ -64,7 +63,6 @@ CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set -# CONFIG_STRICT_DEVMEM is not set # CONFIG_PFAULT is not set # CONFIG_S390_HYPFS_FS is not set # CONFIG_VIRTUALIZATION is not set diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 0b9b95f3c703..7554a8bb2adc 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -27,7 +27,8 @@ #include <linux/cpufeature.h> #include <linux/init.h> #include <linux/spinlock.h> -#include "crypt_s390.h" +#include <crypto/xts.h> +#include <asm/cpacf.h> #define AES_KEYLEN_128 1 #define AES_KEYLEN_192 2 @@ -144,16 +145,16 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) switch (sctx->key_len) { case 16: - crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_128_ENC, &sctx->key, out, in, + AES_BLOCK_SIZE); break; case 24: - crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_192_ENC, &sctx->key, out, in, + AES_BLOCK_SIZE); break; case 32: - crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_256_ENC, &sctx->key, out, in, + AES_BLOCK_SIZE); break; } } @@ -169,16 +170,16 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) switch (sctx->key_len) { case 16: - crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_128_DEC, &sctx->key, out, in, + AES_BLOCK_SIZE); break; case 24: - crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_192_DEC, &sctx->key, out, in, + AES_BLOCK_SIZE); break; case 32: - crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in, - AES_BLOCK_SIZE); + cpacf_km(CPACF_KM_AES_256_DEC, &sctx->key, out, in, + AES_BLOCK_SIZE); break; } } @@ -211,7 +212,7 @@ static void fallback_exit_cip(struct crypto_tfm *tfm) static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_driver_name = "aes-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, @@ -297,16 +298,16 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 16: - sctx->enc = KM_AES_128_ENCRYPT; - sctx->dec = KM_AES_128_DECRYPT; + sctx->enc = CPACF_KM_AES_128_ENC; + sctx->dec = CPACF_KM_AES_128_DEC; break; case 24: - sctx->enc = KM_AES_192_ENCRYPT; - sctx->dec = KM_AES_192_DECRYPT; + sctx->enc = CPACF_KM_AES_192_ENC; + sctx->dec = CPACF_KM_AES_192_DEC; break; case 32: - sctx->enc = KM_AES_256_ENCRYPT; - sctx->dec = KM_AES_256_DECRYPT; + sctx->enc = CPACF_KM_AES_256_ENC; + sctx->dec = CPACF_KM_AES_256_DEC; break; } @@ -325,7 +326,7 @@ static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_km(func, param, out, in, n); + ret = cpacf_km(func, param, out, in, n); if (ret < 0 || ret != n) return -EIO; @@ -392,7 +393,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm) static struct crypto_alg ecb_aes_alg = { .cra_name = "ecb(aes)", .cra_driver_name = "ecb-aes-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: aes + ecb */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, @@ -426,16 +427,16 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 16: - sctx->enc = KMC_AES_128_ENCRYPT; - sctx->dec = KMC_AES_128_DECRYPT; + sctx->enc = CPACF_KMC_AES_128_ENC; + sctx->dec = CPACF_KMC_AES_128_DEC; break; case 24: - sctx->enc = KMC_AES_192_ENCRYPT; - sctx->dec = KMC_AES_192_DECRYPT; + sctx->enc = CPACF_KMC_AES_192_ENC; + sctx->dec = CPACF_KMC_AES_192_DEC; break; case 32: - sctx->enc = KMC_AES_256_ENCRYPT; - sctx->dec = KMC_AES_256_DECRYPT; + sctx->enc = CPACF_KMC_AES_256_ENC; + sctx->dec = CPACF_KMC_AES_256_DEC; break; } @@ -464,7 +465,7 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, ¶m, out, in, n); + ret = cpacf_kmc(func, ¶m, out, in, n); if (ret < 0 || ret != n) return -EIO; @@ -508,7 +509,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, static struct crypto_alg cbc_aes_alg = { .cra_name = "cbc(aes)", .cra_driver_name = "cbc-aes-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: aes + cbc */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, @@ -587,11 +588,16 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, { struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm); u32 *flags = &tfm->crt_flags; + int err; + + err = xts_check_key(tfm, in_key, key_len); + if (err) + return err; switch (key_len) { case 32: - xts_ctx->enc = KM_XTS_128_ENCRYPT; - xts_ctx->dec = KM_XTS_128_DECRYPT; + xts_ctx->enc = CPACF_KM_XTS_128_ENC; + xts_ctx->dec = CPACF_KM_XTS_128_DEC; memcpy(xts_ctx->key + 16, in_key, 16); memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16); break; @@ -601,8 +607,8 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, xts_fallback_setkey(tfm, in_key, key_len); break; case 64: - xts_ctx->enc = KM_XTS_256_ENCRYPT; - xts_ctx->dec = KM_XTS_256_DECRYPT; + xts_ctx->enc = CPACF_KM_XTS_256_ENC; + xts_ctx->dec = CPACF_KM_XTS_256_DEC; memcpy(xts_ctx->key, in_key, 32); memcpy(xts_ctx->pcc_key, in_key + 32, 32); break; @@ -637,7 +643,8 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, memset(pcc_param.xts, 0, sizeof(pcc_param.xts)); memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); memcpy(pcc_param.key, xts_ctx->pcc_key, 32); - ret = crypt_s390_pcc(func, &pcc_param.key[offset]); + /* remove decipher modifier bit from 'func' and call PCC */ + ret = cpacf_pcc(func & 0x7f, &pcc_param.key[offset]); if (ret < 0) return -EIO; @@ -649,7 +656,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func, out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n); + ret = cpacf_km(func, &xts_param.key[offset], out, in, n); if (ret < 0 || ret != n) return -EIO; @@ -715,7 +722,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm) static struct crypto_alg xts_aes_alg = { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: aes + xts */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, @@ -745,16 +752,16 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, switch (key_len) { case 16: - sctx->enc = KMCTR_AES_128_ENCRYPT; - sctx->dec = KMCTR_AES_128_DECRYPT; + sctx->enc = CPACF_KMCTR_AES_128_ENC; + sctx->dec = CPACF_KMCTR_AES_128_DEC; break; case 24: - sctx->enc = KMCTR_AES_192_ENCRYPT; - sctx->dec = KMCTR_AES_192_DECRYPT; + sctx->enc = CPACF_KMCTR_AES_192_ENC; + sctx->dec = CPACF_KMCTR_AES_192_DEC; break; case 32: - sctx->enc = KMCTR_AES_256_ENCRYPT; - sctx->dec = KMCTR_AES_256_DECRYPT; + sctx->enc = CPACF_KMCTR_AES_256_ENC; + sctx->dec = CPACF_KMCTR_AES_256_DEC; break; } @@ -798,8 +805,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, n = __ctrblk_init(ctrptr, nbytes); else n = AES_BLOCK_SIZE; - ret = crypt_s390_kmctr(func, sctx->key, out, in, - n, ctrptr); + ret = cpacf_kmctr(func, sctx->key, out, in, n, ctrptr); if (ret < 0 || ret != n) { if (ctrptr == ctrblk) spin_unlock(&ctrblk_lock); @@ -831,8 +837,8 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func, if (nbytes) { out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = crypt_s390_kmctr(func, sctx->key, buf, in, - AES_BLOCK_SIZE, ctrbuf); + ret = cpacf_kmctr(func, sctx->key, buf, in, + AES_BLOCK_SIZE, ctrbuf); if (ret < 0 || ret != AES_BLOCK_SIZE) return -EIO; memcpy(out, buf, nbytes); @@ -869,7 +875,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc, static struct crypto_alg ctr_aes_alg = { .cra_name = "ctr(aes)", .cra_driver_name = "ctr-aes-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: aes + ctr */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct s390_aes_ctx), @@ -893,11 +899,11 @@ static int __init aes_s390_init(void) { int ret; - if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA)) + if (cpacf_query(CPACF_KM, CPACF_KM_AES_128_ENC)) keylen_flag |= AES_KEYLEN_128; - if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA)) + if (cpacf_query(CPACF_KM, CPACF_KM_AES_192_ENC)) keylen_flag |= AES_KEYLEN_192; - if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA)) + if (cpacf_query(CPACF_KM, CPACF_KM_AES_256_ENC)) keylen_flag |= AES_KEYLEN_256; if (!keylen_flag) @@ -920,22 +926,17 @@ static int __init aes_s390_init(void) if (ret) goto cbc_aes_err; - if (crypt_s390_func_available(KM_XTS_128_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4) && - crypt_s390_func_available(KM_XTS_256_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128_ENC) && + cpacf_query(CPACF_KM, CPACF_KM_XTS_256_ENC)) { ret = crypto_register_alg(&xts_aes_alg); if (ret) goto xts_aes_err; xts_aes_alg_reg = 1; } - if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4) && - crypt_s390_func_available(KMCTR_AES_192_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4) && - crypt_s390_func_available(KMCTR_AES_256_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128_ENC) && + cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192_ENC) && + cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256_ENC)) { ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h deleted file mode 100644 index d9c4c313fbc6..000000000000 --- a/arch/s390/crypto/crypt_s390.h +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Cryptographic API. - * - * Support for s390 cryptographic instructions. - * - * Copyright IBM Corp. 2003, 2015 - * Author(s): Thomas Spatzier - * Jan Glauber (jan.glauber@de.ibm.com) - * Harald Freudenberger (freude@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ -#ifndef _CRYPTO_ARCH_S390_CRYPT_S390_H -#define _CRYPTO_ARCH_S390_CRYPT_S390_H - -#include <asm/errno.h> -#include <asm/facility.h> - -#define CRYPT_S390_OP_MASK 0xFF00 -#define CRYPT_S390_FUNC_MASK 0x00FF - -#define CRYPT_S390_PRIORITY 300 -#define CRYPT_S390_COMPOSITE_PRIORITY 400 - -#define CRYPT_S390_MSA 0x1 -#define CRYPT_S390_MSA3 0x2 -#define CRYPT_S390_MSA4 0x4 -#define CRYPT_S390_MSA5 0x8 - -/* s390 cryptographic operations */ -enum crypt_s390_operations { - CRYPT_S390_KM = 0x0100, - CRYPT_S390_KMC = 0x0200, - CRYPT_S390_KIMD = 0x0300, - CRYPT_S390_KLMD = 0x0400, - CRYPT_S390_KMAC = 0x0500, - CRYPT_S390_KMCTR = 0x0600, - CRYPT_S390_PPNO = 0x0700 -}; - -/* - * function codes for KM (CIPHER MESSAGE) instruction - * 0x80 is the decipher modifier bit - */ -enum crypt_s390_km_func { - KM_QUERY = CRYPT_S390_KM | 0x0, - KM_DEA_ENCRYPT = CRYPT_S390_KM | 0x1, - KM_DEA_DECRYPT = CRYPT_S390_KM | 0x1 | 0x80, - KM_TDEA_128_ENCRYPT = CRYPT_S390_KM | 0x2, - KM_TDEA_128_DECRYPT = CRYPT_S390_KM | 0x2 | 0x80, - KM_TDEA_192_ENCRYPT = CRYPT_S390_KM | 0x3, - KM_TDEA_192_DECRYPT = CRYPT_S390_KM | 0x3 | 0x80, - KM_AES_128_ENCRYPT = CRYPT_S390_KM | 0x12, - KM_AES_128_DECRYPT = CRYPT_S390_KM | 0x12 | 0x80, - KM_AES_192_ENCRYPT = CRYPT_S390_KM | 0x13, - KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80, - KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14, - KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80, - KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32, - KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80, - KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34, - KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80, -}; - -/* - * function codes for KMC (CIPHER MESSAGE WITH CHAINING) - * instruction - */ -enum crypt_s390_kmc_func { - KMC_QUERY = CRYPT_S390_KMC | 0x0, - KMC_DEA_ENCRYPT = CRYPT_S390_KMC | 0x1, - KMC_DEA_DECRYPT = CRYPT_S390_KMC | 0x1 | 0x80, - KMC_TDEA_128_ENCRYPT = CRYPT_S390_KMC | 0x2, - KMC_TDEA_128_DECRYPT = CRYPT_S390_KMC | 0x2 | 0x80, - KMC_TDEA_192_ENCRYPT = CRYPT_S390_KMC | 0x3, - KMC_TDEA_192_DECRYPT = CRYPT_S390_KMC | 0x3 | 0x80, - KMC_AES_128_ENCRYPT = CRYPT_S390_KMC | 0x12, - KMC_AES_128_DECRYPT = CRYPT_S390_KMC | 0x12 | 0x80, - KMC_AES_192_ENCRYPT = CRYPT_S390_KMC | 0x13, - KMC_AES_192_DECRYPT = CRYPT_S390_KMC | 0x13 | 0x80, - KMC_AES_256_ENCRYPT = CRYPT_S390_KMC | 0x14, - KMC_AES_256_DECRYPT = CRYPT_S390_KMC | 0x14 | 0x80, - KMC_PRNG = CRYPT_S390_KMC | 0x43, -}; - -/* - * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER) - * instruction - */ -enum crypt_s390_kmctr_func { - KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0, - KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1, - KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80, - KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2, - KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80, - KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3, - KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80, - KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12, - KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80, - KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13, - KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80, - KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14, - KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80, -}; - -/* - * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) - * instruction - */ -enum crypt_s390_kimd_func { - KIMD_QUERY = CRYPT_S390_KIMD | 0, - KIMD_SHA_1 = CRYPT_S390_KIMD | 1, - KIMD_SHA_256 = CRYPT_S390_KIMD | 2, - KIMD_SHA_512 = CRYPT_S390_KIMD | 3, - KIMD_GHASH = CRYPT_S390_KIMD | 65, -}; - -/* - * function codes for KLMD (COMPUTE LAST MESSAGE DIGEST) - * instruction - */ -enum crypt_s390_klmd_func { - KLMD_QUERY = CRYPT_S390_KLMD | 0, - KLMD_SHA_1 = CRYPT_S390_KLMD | 1, - KLMD_SHA_256 = CRYPT_S390_KLMD | 2, - KLMD_SHA_512 = CRYPT_S390_KLMD | 3, -}; - -/* - * function codes for KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) - * instruction - */ -enum crypt_s390_kmac_func { - KMAC_QUERY = CRYPT_S390_KMAC | 0, - KMAC_DEA = CRYPT_S390_KMAC | 1, - KMAC_TDEA_128 = CRYPT_S390_KMAC | 2, - KMAC_TDEA_192 = CRYPT_S390_KMAC | 3 -}; - -/* - * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER - * OPERATION) instruction - */ -enum crypt_s390_ppno_func { - PPNO_QUERY = CRYPT_S390_PPNO | 0, - PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3, - PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83 -}; - -/** - * crypt_s390_km: - * @func: the function code passed to KM; see crypt_s390_km_func - * @param: address of parameter block; see POP for details on each func - * @dest: address of destination memory area - * @src: address of source memory area - * @src_len: length of src operand in bytes - * - * Executes the KM (CIPHER MESSAGE) operation of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of processed - * bytes for encryption/decryption funcs - */ -static inline int crypt_s390_km(long func, void *param, - u8 *dest, const u8 *src, long src_len) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; - register const u8 *__src asm("2") = src; - register long __src_len asm("3") = src_len; - register u8 *__dest asm("4") = dest; - int ret; - - asm volatile( - "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) - : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; -} - -/** - * crypt_s390_kmc: - * @func: the function code passed to KM; see crypt_s390_kmc_func - * @param: address of parameter block; see POP for details on each func - * @dest: address of destination memory area - * @src: address of source memory area - * @src_len: length of src operand in bytes - * - * Executes the KMC (CIPHER MESSAGE WITH CHAINING) operation of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of processed - * bytes for encryption/decryption funcs - */ -static inline int crypt_s390_kmc(long func, void *param, - u8 *dest, const u8 *src, long src_len) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; - register const u8 *__src asm("2") = src; - register long __src_len asm("3") = src_len; - register u8 *__dest asm("4") = dest; - int ret; - - asm volatile( - "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest) - : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; -} - -/** - * crypt_s390_kimd: - * @func: the function code passed to KM; see crypt_s390_kimd_func - * @param: address of parameter block; see POP for details on each func - * @src: address of source memory area - * @src_len: length of src operand in bytes - * - * Executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) operation - * of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of processed - * bytes for digest funcs - */ -static inline int crypt_s390_kimd(long func, void *param, - const u8 *src, long src_len) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; - register const u8 *__src asm("2") = src; - register long __src_len asm("3") = src_len; - int ret; - - asm volatile( - "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (ret), "+a" (__src), "+d" (__src_len) - : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; -} - -/** - * crypt_s390_klmd: - * @func: the function code passed to KM; see crypt_s390_klmd_func - * @param: address of parameter block; see POP for details on each func - * @src: address of source memory area - * @src_len: length of src operand in bytes - * - * Executes the KLMD (COMPUTE LAST MESSAGE DIGEST) operation of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of processed - * bytes for digest funcs - */ -static inline int crypt_s390_klmd(long func, void *param, - const u8 *src, long src_len) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; - register const u8 *__src asm("2") = src; - register long __src_len asm("3") = src_len; - int ret; - - asm volatile( - "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (ret), "+a" (__src), "+d" (__src_len) - : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; -} - -/** - * crypt_s390_kmac: - * @func: the function code passed to KM; see crypt_s390_klmd_func - * @param: address of parameter block; see POP for details on each func - * @src: address of source memory area - * @src_len: length of src operand in bytes - * - * Executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) operation - * of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of processed - * bytes for digest funcs - */ -static inline int crypt_s390_kmac(long func, void *param, - const u8 *src, long src_len) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; - register const u8 *__src asm("2") = src; - register long __src_len asm("3") = src_len; - int ret; - - asm volatile( - "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (ret), "+a" (__src), "+d" (__src_len) - : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; -} - -/** - * crypt_s390_kmctr: - * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func - * @param: address of parameter block; see POP for details on each func - * @dest: address of destination memory area - * @src: address of source memory area - * @src_len: length of src operand in bytes - * @counter: address of counter value - * - * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of processed - * bytes for encryption/decryption funcs - */ -static inline int crypt_s390_kmctr(long func, void *param, u8 *dest, - const u8 *src, long src_len, u8 *counter) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; - register const u8 *__src asm("2") = src; - register long __src_len asm("3") = src_len; - register u8 *__dest asm("4") = dest; - register u8 *__ctr asm("6") = counter; - int ret = -1; - - asm volatile( - "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest), - "+a" (__ctr) - : "d" (__func), "a" (__param) : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len; -} - -/** - * crypt_s390_ppno: - * @func: the function code passed to PPNO; see crypt_s390_ppno_func - * @param: address of parameter block; see POP for details on each func - * @dest: address of destination memory area - * @dest_len: size of destination memory area in bytes - * @seed: address of seed data - * @seed_len: size of seed data in bytes - * - * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) - * operation of the CPU. - * - * Returns -1 for failure, 0 for the query func, number of random - * bytes stored in dest buffer for generate function - */ -static inline int crypt_s390_ppno(long func, void *param, - u8 *dest, long dest_len, - const u8 *seed, long seed_len) -{ - register long __func asm("0") = func & CRYPT_S390_FUNC_MASK; - register void *__param asm("1") = param; /* param block (240 bytes) */ - register u8 *__dest asm("2") = dest; /* buf for recv random bytes */ - register long __dest_len asm("3") = dest_len; /* requested random bytes */ - register const u8 *__seed asm("4") = seed; /* buf with seed data */ - register long __seed_len asm("5") = seed_len; /* bytes in seed buf */ - int ret = -1; - - asm volatile ( - "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "+d" (ret), "+a"(__dest), "+d"(__dest_len) - : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len) - : "cc", "memory"); - if (ret < 0) - return ret; - return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0; -} - -/** - * crypt_s390_func_available: - * @func: the function code of the specific function; 0 if op in general - * - * Tests if a specific crypto function is implemented on the machine. - * - * Returns 1 if func available; 0 if func or op in general not available - */ -static inline int crypt_s390_func_available(int func, - unsigned int facility_mask) -{ - unsigned char status[16]; - int ret; - - if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) - return 0; - if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) - return 0; - if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) - return 0; - if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57)) - return 0; - - switch (func & CRYPT_S390_OP_MASK) { - case CRYPT_S390_KM: - ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0); - break; - case CRYPT_S390_KMC: - ret = crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0); - break; - case CRYPT_S390_KIMD: - ret = crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0); - break; - case CRYPT_S390_KLMD: - ret = crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0); - break; - case CRYPT_S390_KMAC: - ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0); - break; - case CRYPT_S390_KMCTR: - ret = crypt_s390_kmctr(KMCTR_QUERY, &status, - NULL, NULL, 0, NULL); - break; - case CRYPT_S390_PPNO: - ret = crypt_s390_ppno(PPNO_QUERY, &status, - NULL, 0, NULL, 0); - break; - default: - return 0; - } - if (ret < 0) - return 0; - func &= CRYPT_S390_FUNC_MASK; - func &= 0x7f; /* mask modifier bit */ - return (status[func >> 3] & (0x80 >> (func & 7))) != 0; -} - -/** - * crypt_s390_pcc: - * @func: the function code passed to KM; see crypt_s390_km_func - * @param: address of parameter block; see POP for details on each func - * - * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU. - * - * Returns -1 for failure, 0 for success. - */ -static inline int crypt_s390_pcc(long func, void *param) -{ - register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */ - register void *__param asm("1") = param; - int ret = -1; - - asm volatile( - "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */ - "1: brc 1,0b\n" /* handle partial completion */ - " la %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "+d" (ret) - : "d" (__func), "a" (__param) : "cc", "memory"); - return ret; -} - -#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */ diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index fba1c10a2dd0..697e71a75fc2 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -20,8 +20,7 @@ #include <linux/crypto.h> #include <crypto/algapi.h> #include <crypto/des.h> - -#include "crypt_s390.h" +#include <asm/cpacf.h> #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) @@ -54,20 +53,20 @@ static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_DEA_ENC, ctx->key, out, in, DES_BLOCK_SIZE); } static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_DEA_DEC, ctx->key, out, in, DES_BLOCK_SIZE); } static struct crypto_alg des_alg = { .cra_name = "des", .cra_driver_name = "des-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -95,7 +94,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_km(func, key, out, in, n); + ret = cpacf_km(func, key, out, in, n); if (ret < 0 || ret != n) return -EIO; @@ -128,7 +127,7 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func, u8 *out = walk->dst.virt.addr; u8 *in = walk->src.virt.addr; - ret = crypt_s390_kmc(func, ¶m, out, in, n); + ret = cpacf_kmc(func, ¶m, out, in, n); if (ret < 0 || ret != n) return -EIO; @@ -149,7 +148,7 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_DEA_ENC, ctx->key, &walk); } static int ecb_des_decrypt(struct blkcipher_desc *desc, @@ -160,13 +159,13 @@ static int ecb_des_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_DEA_DEC, ctx->key, &walk); } static struct crypto_alg ecb_des_alg = { .cra_name = "ecb(des)", .cra_driver_name = "ecb-des-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: des + ecb */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -190,7 +189,7 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_DEA_ENC, &walk); } static int cbc_des_decrypt(struct blkcipher_desc *desc, @@ -200,13 +199,13 @@ static int cbc_des_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_DEA_DEC, &walk); } static struct crypto_alg cbc_des_alg = { .cra_name = "cbc(des)", .cra_driver_name = "cbc-des-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: des + cbc */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -258,20 +257,20 @@ static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_TDEA_192_ENC, ctx->key, dst, src, DES_BLOCK_SIZE); } static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); - crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE); + cpacf_km(CPACF_KM_TDEA_192_DEC, ctx->key, dst, src, DES_BLOCK_SIZE); } static struct crypto_alg des3_alg = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -295,7 +294,7 @@ static int ecb_des3_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_ENC, ctx->key, &walk); } static int ecb_des3_decrypt(struct blkcipher_desc *desc, @@ -306,13 +305,13 @@ static int ecb_des3_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk); + return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_DEC, ctx->key, &walk); } static struct crypto_alg ecb_des3_alg = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "ecb-des3_ede-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: des3 + ecb */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -336,7 +335,7 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_ENC, &walk); } static int cbc_des3_decrypt(struct blkcipher_desc *desc, @@ -346,13 +345,13 @@ static int cbc_des3_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk); + return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_DEC, &walk); } static struct crypto_alg cbc_des3_alg = { .cra_name = "cbc(des3_ede)", .cra_driver_name = "cbc-des3_ede-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: des3 + cbc */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = DES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -407,8 +406,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, n = __ctrblk_init(ctrptr, nbytes); else n = DES_BLOCK_SIZE; - ret = crypt_s390_kmctr(func, ctx->key, out, in, - n, ctrptr); + ret = cpacf_kmctr(func, ctx->key, out, in, n, ctrptr); if (ret < 0 || ret != n) { if (ctrptr == ctrblk) spin_unlock(&ctrblk_lock); @@ -438,8 +436,8 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func, if (nbytes) { out = walk->dst.virt.addr; in = walk->src.virt.addr; - ret = crypt_s390_kmctr(func, ctx->key, buf, in, - DES_BLOCK_SIZE, ctrbuf); + ret = cpacf_kmctr(func, ctx->key, buf, in, + DES_BLOCK_SIZE, ctrbuf); if (ret < 0 || ret != DES_BLOCK_SIZE) return -EIO; memcpy(out, buf, nbytes); @@ -458,7 +456,7 @@ static int ctr_des_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_ENC, ctx, &walk); } static int ctr_des_decrypt(struct blkcipher_desc *desc, @@ -469,13 +467,13 @@ static int ctr_des_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_DEC, ctx, &walk); } static struct crypto_alg ctr_des_alg = { .cra_name = "ctr(des)", .cra_driver_name = "ctr-des-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: des + ctr */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -501,7 +499,7 @@ static int ctr_des3_encrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_ENC, ctx, &walk); } static int ctr_des3_decrypt(struct blkcipher_desc *desc, @@ -512,13 +510,13 @@ static int ctr_des3_decrypt(struct blkcipher_desc *desc, struct blkcipher_walk walk; blkcipher_walk_init(&walk, dst, src, nbytes); - return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk); + return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_DEC, ctx, &walk); } static struct crypto_alg ctr_des3_alg = { .cra_name = "ctr(des3_ede)", .cra_driver_name = "ctr-des3_ede-s390", - .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY, + .cra_priority = 400, /* combo: des3 + ede */ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct s390_des_ctx), @@ -540,8 +538,8 @@ static int __init des_s390_init(void) { int ret; - if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) || - !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA)) + if (!cpacf_query(CPACF_KM, CPACF_KM_DEA_ENC) || + !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192_ENC)) return -EOPNOTSUPP; ret = crypto_register_alg(&des_alg); @@ -563,10 +561,8 @@ static int __init des_s390_init(void) if (ret) goto cbc_des3_err; - if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4) && - crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT, - CRYPT_S390_MSA | CRYPT_S390_MSA4)) { + if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA_ENC) && + cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192_ENC)) { ret = crypto_register_alg(&ctr_des_alg); if (ret) goto ctr_des_err; diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 26e14efd30a7..ab68de72e795 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -10,8 +10,7 @@ #include <crypto/internal/hash.h> #include <linux/module.h> #include <linux/cpufeature.h> - -#include "crypt_s390.h" +#include <asm/cpacf.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -72,8 +71,8 @@ static int ghash_update(struct shash_desc *desc, src += n; if (!dctx->bytes) { - ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, - GHASH_BLOCK_SIZE); + ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, + GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; } @@ -81,7 +80,7 @@ static int ghash_update(struct shash_desc *desc, n = srclen & ~(GHASH_BLOCK_SIZE - 1); if (n) { - ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n); + ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n); if (ret != n) return -EIO; src += n; @@ -106,7 +105,7 @@ static int ghash_flush(struct ghash_desc_ctx *dctx) memset(pos, 0, dctx->bytes); - ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); + ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE); if (ret != GHASH_BLOCK_SIZE) return -EIO; @@ -137,7 +136,7 @@ static struct shash_alg ghash_alg = { .base = { .cra_name = "ghash", .cra_driver_name = "ghash-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct ghash_ctx), @@ -147,8 +146,7 @@ static struct shash_alg ghash_alg = { static int __init ghash_mod_init(void) { - if (!crypt_s390_func_available(KIMD_GHASH, - CRYPT_S390_MSA | CRYPT_S390_MSA4)) + if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_GHASH)) return -EOPNOTSUPP; return crypto_register_shash(&ghash_alg); diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index b8045b97f4fb..41527b113f5a 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -23,8 +23,7 @@ #include <asm/debug.h> #include <asm/uaccess.h> #include <asm/timex.h> - -#include "crypt_s390.h" +#include <asm/cpacf.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); @@ -136,8 +135,8 @@ static int generate_entropy(u8 *ebuf, size_t nbytes) else h = ebuf; /* generate sha256 from this page */ - if (crypt_s390_kimd(KIMD_SHA_256, h, - pg, PAGE_SIZE) != PAGE_SIZE) { + if (cpacf_kimd(CPACF_KIMD_SHA_256, h, + pg, PAGE_SIZE) != PAGE_SIZE) { prng_errorflag = PRNG_GEN_ENTROPY_FAILED; ret = -EIO; goto out; @@ -164,9 +163,9 @@ static void prng_tdes_add_entropy(void) int ret; for (i = 0; i < 16; i++) { - ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, - (char *)entropy, (char *)entropy, - sizeof(entropy)); + ret = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block, + (char *)entropy, (char *)entropy, + sizeof(entropy)); BUG_ON(ret < 0 || ret != sizeof(entropy)); memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy)); } @@ -311,9 +310,8 @@ static int __init prng_sha512_selftest(void) memset(&ws, 0, sizeof(ws)); /* initial seed */ - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, - &ws, NULL, 0, - seed, sizeof(seed)); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, &ws, NULL, 0, + seed, sizeof(seed)); if (ret < 0) { pr_err("The prng self test seed operation for the " "SHA-512 mode failed with rc=%d\n", ret); @@ -331,18 +329,16 @@ static int __init prng_sha512_selftest(void) } /* generate random bytes */ - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, - &ws, buf, sizeof(buf), - NULL, 0); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), NULL, 0); if (ret < 0) { pr_err("The prng self test generate operation for " "the SHA-512 mode failed with rc=%d\n", ret); prng_errorflag = PRNG_SELFTEST_FAILED; return -EIO; } - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, - &ws, buf, sizeof(buf), - NULL, 0); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &ws, buf, sizeof(buf), NULL, 0); if (ret < 0) { pr_err("The prng self test generate operation for " "the SHA-512 mode failed with rc=%d\n", ret); @@ -396,9 +392,8 @@ static int __init prng_sha512_instantiate(void) get_tod_clock_ext(seed + 48); /* initial seed of the ppno drng */ - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, - seed, sizeof(seed)); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); if (ret < 0) { prng_errorflag = PRNG_SEED_FAILED; ret = -EIO; @@ -409,11 +404,9 @@ static int __init prng_sha512_instantiate(void) bytes for the FIPS 140-2 Conditional Self Test */ if (fips_enabled) { prng_data->prev = prng_data->buf + prng_chunk_size; - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, - prng_data->prev, - prng_chunk_size, - NULL, 0); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, + prng_data->prev, prng_chunk_size, NULL, 0); if (ret < 0 || ret != prng_chunk_size) { prng_errorflag = PRNG_GEN_FAILED; ret = -EIO; @@ -447,9 +440,8 @@ static int prng_sha512_reseed(void) return ret; /* do a reseed of the ppno drng with this bytestring */ - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED, - &prng_data->ppnows, NULL, 0, - seed, sizeof(seed)); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, + &prng_data->ppnows, NULL, 0, seed, sizeof(seed)); if (ret) { prng_errorflag = PRNG_RESEED_FAILED; return -EIO; @@ -471,9 +463,8 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes) } /* PPNO generate */ - ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN, - &prng_data->ppnows, buf, nbytes, - NULL, 0); + ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN, + &prng_data->ppnows, buf, nbytes, NULL, 0); if (ret < 0 || ret != nbytes) { prng_errorflag = PRNG_GEN_FAILED; return -EIO; @@ -555,8 +546,8 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf, * Note: you can still get strict X9.17 conformity by setting * prng_chunk_size to 8 bytes. */ - tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block, - prng_data->buf, prng_data->buf, n); + tmp = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block, + prng_data->buf, prng_data->buf, n); if (tmp < 0 || tmp != n) { ret = -EIO; break; @@ -669,11 +660,13 @@ static const struct file_operations prng_tdes_fops = { static struct miscdevice prng_sha512_dev = { .name = "prandom", .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, .fops = &prng_sha512_fops, }; static struct miscdevice prng_tdes_dev = { .name = "prandom", .minor = MISC_DYNAMIC_MINOR, + .mode = 0644, .fops = &prng_tdes_fops, }; @@ -813,14 +806,13 @@ static int __init prng_init(void) int ret; /* check if the CPU has a PRNG */ - if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA)) + if (!cpacf_query(CPACF_KMC, CPACF_KMC_PRNG)) return -EOPNOTSUPP; /* choose prng mode */ if (prng_mode != PRNG_MODE_TDES) { /* check for MSA5 support for PPNO operations */ - if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN, - CRYPT_S390_MSA5)) { + if (!cpacf_query(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) { if (prng_mode == PRNG_MODE_SHA512) { pr_err("The prng module cannot " "start in SHA-512 mode\n"); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 9208eadae9f0..5fbf91bbb478 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -28,8 +28,8 @@ #include <linux/module.h> #include <linux/cpufeature.h> #include <crypto/sha.h> +#include <asm/cpacf.h> -#include "crypt_s390.h" #include "sha.h" static int sha1_init(struct shash_desc *desc) @@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc) sctx->state[3] = SHA1_H3; sctx->state[4] = SHA1_H4; sctx->count = 0; - sctx->func = KIMD_SHA_1; + sctx->func = CPACF_KIMD_SHA_1; return 0; } @@ -66,7 +66,7 @@ static int sha1_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); - sctx->func = KIMD_SHA_1; + sctx->func = CPACF_KIMD_SHA_1; return 0; } @@ -82,7 +82,7 @@ static struct shash_alg alg = { .base = { .cra_name = "sha1", .cra_driver_name= "sha1-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -91,7 +91,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { - if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA)) + if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_1)) return -EOPNOTSUPP; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 667888f5c964..10aac0b11988 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -18,8 +18,8 @@ #include <linux/module.h> #include <linux/cpufeature.h> #include <crypto/sha.h> +#include <asm/cpacf.h> -#include "crypt_s390.h" #include "sha.h" static int sha256_init(struct shash_desc *desc) @@ -35,7 +35,7 @@ static int sha256_init(struct shash_desc *desc) sctx->state[6] = SHA256_H6; sctx->state[7] = SHA256_H7; sctx->count = 0; - sctx->func = KIMD_SHA_256; + sctx->func = CPACF_KIMD_SHA_256; return 0; } @@ -59,7 +59,7 @@ static int sha256_import(struct shash_desc *desc, const void *in) sctx->count = ictx->count; memcpy(sctx->state, ictx->state, sizeof(ictx->state)); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->func = KIMD_SHA_256; + sctx->func = CPACF_KIMD_SHA_256; return 0; } @@ -75,7 +75,7 @@ static struct shash_alg sha256_alg = { .base = { .cra_name = "sha256", .cra_driver_name= "sha256-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -95,7 +95,7 @@ static int sha224_init(struct shash_desc *desc) sctx->state[6] = SHA224_H6; sctx->state[7] = SHA224_H7; sctx->count = 0; - sctx->func = KIMD_SHA_256; + sctx->func = CPACF_KIMD_SHA_256; return 0; } @@ -112,7 +112,7 @@ static struct shash_alg sha224_alg = { .base = { .cra_name = "sha224", .cra_driver_name= "sha224-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -123,7 +123,7 @@ static int __init sha256_s390_init(void) { int ret; - if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA)) + if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_256)) return -EOPNOTSUPP; ret = crypto_register_shash(&sha256_alg); if (ret < 0) diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 2ba66b1518f0..ea85757be407 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -19,9 +19,9 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/cpufeature.h> +#include <asm/cpacf.h> #include "sha.h" -#include "crypt_s390.h" static int sha512_init(struct shash_desc *desc) { @@ -36,7 +36,7 @@ static int sha512_init(struct shash_desc *desc) *(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL; *(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL; ctx->count = 0; - ctx->func = KIMD_SHA_512; + ctx->func = CPACF_KIMD_SHA_512; return 0; } @@ -64,7 +64,7 @@ static int sha512_import(struct shash_desc *desc, const void *in) memcpy(sctx->state, ictx->state, sizeof(ictx->state)); memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); - sctx->func = KIMD_SHA_512; + sctx->func = CPACF_KIMD_SHA_512; return 0; } @@ -80,7 +80,7 @@ static struct shash_alg sha512_alg = { .base = { .cra_name = "sha512", .cra_driver_name= "sha512-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_module = THIS_MODULE, @@ -102,7 +102,7 @@ static int sha384_init(struct shash_desc *desc) *(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL; *(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL; ctx->count = 0; - ctx->func = KIMD_SHA_512; + ctx->func = CPACF_KIMD_SHA_512; return 0; } @@ -119,7 +119,7 @@ static struct shash_alg sha384_alg = { .base = { .cra_name = "sha384", .cra_driver_name= "sha384-s390", - .cra_priority = CRYPT_S390_PRIORITY, + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_sha_ctx), @@ -133,7 +133,7 @@ static int __init init(void) { int ret; - if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA)) + if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_512)) return -EOPNOTSUPP; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 8620b0ec9c42..8e908166c3ee 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -15,8 +15,8 @@ #include <crypto/internal/hash.h> #include <linux/module.h> +#include <asm/cpacf.h> #include "sha.h" -#include "crypt_s390.h" int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -35,7 +35,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) /* process one stored block */ if (index) { memcpy(ctx->buf + index, data, bsize - index); - ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize); + ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize); if (ret != bsize) return -EIO; data += bsize - index; @@ -45,8 +45,8 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) /* process as many blocks as possible */ if (len >= bsize) { - ret = crypt_s390_kimd(ctx->func, ctx->state, data, - len & ~(bsize - 1)); + ret = cpacf_kimd(ctx->func, ctx->state, data, + len & ~(bsize - 1)); if (ret != (len & ~(bsize - 1))) return -EIO; data += ret; @@ -89,7 +89,7 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) bits = ctx->count * 8; memcpy(ctx->buf + end - 8, &bits, sizeof(bits)); - ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end); + ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, end); if (ret != end) return -EIO; diff --git a/arch/s390/defconfig b/arch/s390/defconfig index e24f2af4c73b..3f571ea89509 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,8 +1,8 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y +CONFIG_USELIB=y CONFIG_AUDIT=y -CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y @@ -11,19 +11,19 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_MEMCG_KMEM=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CGROUP_PERF=y +CONFIG_BLK_CGROUP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y -CONFIG_BLK_CGROUP=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CHECKPOINT_RESTORE=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y @@ -44,7 +44,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y CONFIG_LIVEPATCH=y -CONFIG_MARCH_Z196=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_HZ_100=y @@ -52,6 +51,14 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_CLEANCACHE=y +CONFIG_FRONTSWAP=y +CONFIG_CMA=y +CONFIG_ZSWAP=y +CONFIG_ZBUD=m +CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC_STAT=y +CONFIG_IDLE_PAGE_TRACKING=y CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -61,7 +68,6 @@ CONFIG_UNIX=y CONFIG_NET_KEY=y CONFIG_INET=y CONFIG_IP_MULTICAST=y -# CONFIG_INET_LRO is not set CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m CONFIG_VLAN_8021Q=y @@ -144,6 +150,9 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y # CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y @@ -158,20 +167,21 @@ CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PI_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y -CONFIG_TRACER_SNAPSHOT=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_UPROBE_EVENT=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y -# CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m @@ -212,8 +222,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m -CONFIG_CRYPTO_ZLIB=m -CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ANSI_CPRNG=m diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 0f3da2cb2bd6..255c7eec4481 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -278,8 +278,8 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent) sbi->uid = current_uid(); sbi->gid = current_gid(); sb->s_fs_info = sbi; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = HYPFS_MAGIC; sb->s_op = &hypfs_s_ops; if (hypfs_parse_options(data, sb)) diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 4d7ccac5fd1d..22da3b34c655 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -15,4 +15,7 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + #endif diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 740364856355..d7f100c53f07 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -91,8 +91,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * returns a 32-bit checksum */ static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, unsigned short proto, +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { __u32 csum = (__force __u32)sum; @@ -118,8 +117,7 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, */ static inline __sum16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, unsigned short proto, +csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h index a0e71a501f7c..5687d62fb0cb 100644 --- a/arch/s390/include/asm/clp.h +++ b/arch/s390/include/asm/clp.h @@ -4,14 +4,23 @@ /* CLP common request & response block size */ #define CLP_BLK_SIZE PAGE_SIZE +#define CLP_LPS_BASE 0 +#define CLP_LPS_PCI 2 + struct clp_req_hdr { u16 len; u16 cmd; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; struct clp_rsp_hdr { u16 len; u16 rsp; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; /* CLP Response Codes */ @@ -25,4 +34,22 @@ struct clp_rsp_hdr { #define CLP_RC_NODATA 0x0080 /* No data available */ #define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ +/* Store logical-processor characteristics request */ +struct clp_req_slpc { + struct clp_req_hdr hdr; +} __packed; + +struct clp_rsp_slpc { + struct clp_rsp_hdr hdr; + u32 reserved2[4]; + u32 lpif[8]; + u32 reserved3[8]; + u32 lpic[8]; +} __packed; + +struct clp_req_rsp_slpc { + struct clp_req_slpc request; + struct clp_rsp_slpc response; +} __packed; + #endif diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h new file mode 100644 index 000000000000..1a82cf26ee11 --- /dev/null +++ b/arch/s390/include/asm/cpacf.h @@ -0,0 +1,410 @@ +/* + * CP Assist for Cryptographic Functions (CPACF) + * + * Copyright IBM Corp. 2003, 2016 + * Author(s): Thomas Spatzier + * Jan Glauber + * Harald Freudenberger (freude@de.ibm.com) + * Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#ifndef _ASM_S390_CPACF_H +#define _ASM_S390_CPACF_H + +#include <asm/facility.h> + +/* + * Instruction opcodes for the CPACF instructions + */ +#define CPACF_KMAC 0xb91e /* MSA */ +#define CPACF_KM 0xb92e /* MSA */ +#define CPACF_KMC 0xb92f /* MSA */ +#define CPACF_KIMD 0xb93e /* MSA */ +#define CPACF_KLMD 0xb93f /* MSA */ +#define CPACF_PCC 0xb92c /* MSA4 */ +#define CPACF_KMCTR 0xb92d /* MSA4 */ +#define CPACF_PPNO 0xb93c /* MSA5 */ + +/* + * Function codes for the KM (CIPHER MESSAGE) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_KM_QUERY 0x00 +#define CPACF_KM_DEA_ENC 0x01 +#define CPACF_KM_DEA_DEC 0x81 +#define CPACF_KM_TDEA_128_ENC 0x02 +#define CPACF_KM_TDEA_128_DEC 0x82 +#define CPACF_KM_TDEA_192_ENC 0x03 +#define CPACF_KM_TDEA_192_DEC 0x83 +#define CPACF_KM_AES_128_ENC 0x12 +#define CPACF_KM_AES_128_DEC 0x92 +#define CPACF_KM_AES_192_ENC 0x13 +#define CPACF_KM_AES_192_DEC 0x93 +#define CPACF_KM_AES_256_ENC 0x14 +#define CPACF_KM_AES_256_DEC 0x94 +#define CPACF_KM_XTS_128_ENC 0x32 +#define CPACF_KM_XTS_128_DEC 0xb2 +#define CPACF_KM_XTS_256_ENC 0x34 +#define CPACF_KM_XTS_256_DEC 0xb4 + +/* + * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_KMC_QUERY 0x00 +#define CPACF_KMC_DEA_ENC 0x01 +#define CPACF_KMC_DEA_DEC 0x81 +#define CPACF_KMC_TDEA_128_ENC 0x02 +#define CPACF_KMC_TDEA_128_DEC 0x82 +#define CPACF_KMC_TDEA_192_ENC 0x03 +#define CPACF_KMC_TDEA_192_DEC 0x83 +#define CPACF_KMC_AES_128_ENC 0x12 +#define CPACF_KMC_AES_128_DEC 0x92 +#define CPACF_KMC_AES_192_ENC 0x13 +#define CPACF_KMC_AES_192_DEC 0x93 +#define CPACF_KMC_AES_256_ENC 0x14 +#define CPACF_KMC_AES_256_DEC 0x94 +#define CPACF_KMC_PRNG 0x43 + +/* + * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_KMCTR_QUERY 0x00 +#define CPACF_KMCTR_DEA_ENC 0x01 +#define CPACF_KMCTR_DEA_DEC 0x81 +#define CPACF_KMCTR_TDEA_128_ENC 0x02 +#define CPACF_KMCTR_TDEA_128_DEC 0x82 +#define CPACF_KMCTR_TDEA_192_ENC 0x03 +#define CPACF_KMCTR_TDEA_192_DEC 0x83 +#define CPACF_KMCTR_AES_128_ENC 0x12 +#define CPACF_KMCTR_AES_128_DEC 0x92 +#define CPACF_KMCTR_AES_192_ENC 0x13 +#define CPACF_KMCTR_AES_192_DEC 0x93 +#define CPACF_KMCTR_AES_256_ENC 0x14 +#define CPACF_KMCTR_AES_256_DEC 0x94 + +/* + * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_KIMD_QUERY 0x00 +#define CPACF_KIMD_SHA_1 0x01 +#define CPACF_KIMD_SHA_256 0x02 +#define CPACF_KIMD_SHA_512 0x03 +#define CPACF_KIMD_GHASH 0x41 + +/* + * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_KLMD_QUERY 0x00 +#define CPACF_KLMD_SHA_1 0x01 +#define CPACF_KLMD_SHA_256 0x02 +#define CPACF_KLMD_SHA_512 0x03 + +/* + * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_KMAC_QUERY 0x00 +#define CPACF_KMAC_DEA 0x01 +#define CPACF_KMAC_TDEA_128 0x02 +#define CPACF_KMAC_TDEA_192 0x03 + +/* + * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * instruction (0x80 is the decipher modifier bit) + */ +#define CPACF_PPNO_QUERY 0x00 +#define CPACF_PPNO_SHA512_DRNG_GEN 0x03 +#define CPACF_PPNO_SHA512_DRNG_SEED 0x83 + +/** + * cpacf_query() - check if a specific CPACF function is available + * @opcode: the opcode of the crypto instruction + * @func: the function code to test for + * + * Executes the query function for the given crypto instruction @opcode + * and checks if @func is available + * + * Returns 1 if @func is available for @opcode, 0 otherwise + */ +static inline void __cpacf_query(unsigned int opcode, unsigned char *status) +{ + typedef struct { unsigned char _[16]; } status_type; + register unsigned long r0 asm("0") = 0; /* query function */ + register unsigned long r1 asm("1") = (unsigned long) status; + + asm volatile( + /* Parameter registers are ignored, but may not be 0 */ + "0: .insn rrf,%[opc] << 16,2,2,2,0\n" + " brc 1,0b\n" /* handle partial completion */ + : "=m" (*(status_type *) status) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode) + : "cc"); +} + +static inline int cpacf_query(unsigned int opcode, unsigned int func) +{ + unsigned char status[16]; + + switch (opcode) { + case CPACF_KMAC: + case CPACF_KM: + case CPACF_KMC: + case CPACF_KIMD: + case CPACF_KLMD: + if (!test_facility(17)) /* check for MSA */ + return 0; + break; + case CPACF_PCC: + case CPACF_KMCTR: + if (!test_facility(77)) /* check for MSA4 */ + return 0; + break; + case CPACF_PPNO: + if (!test_facility(57)) /* check for MSA5 */ + return 0; + break; + default: + BUG(); + } + __cpacf_query(opcode, status); + return (status[func >> 3] & (0x80 >> (func & 7))) != 0; +} + +/** + * cpacf_km() - executes the KM (CIPHER MESSAGE) instruction + * @func: the function code passed to KM; see CPACF_KM_xxx defines + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Returns 0 for the query func, number of processed bytes for + * encryption/decryption funcs + */ +static inline int cpacf_km(long func, void *param, + u8 *dest, const u8 *src, long src_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + register unsigned long r4 asm("4") = (unsigned long) dest; + + asm volatile( + "0: .insn rre,%[opc] << 16,%[dst],%[src]\n" + " brc 1,0b\n" /* handle partial completion */ + : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KM) + : "cc", "memory"); + + return src_len - r3; +} + +/** + * cpacf_kmc() - executes the KMC (CIPHER MESSAGE WITH CHAINING) instruction + * @func: the function code passed to KM; see CPACF_KMC_xxx defines + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Returns 0 for the query func, number of processed bytes for + * encryption/decryption funcs + */ +static inline int cpacf_kmc(long func, void *param, + u8 *dest, const u8 *src, long src_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + register unsigned long r4 asm("4") = (unsigned long) dest; + + asm volatile( + "0: .insn rre,%[opc] << 16,%[dst],%[src]\n" + " brc 1,0b\n" /* handle partial completion */ + : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMC) + : "cc", "memory"); + + return src_len - r3; +} + +/** + * cpacf_kimd() - executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) + * instruction + * @func: the function code passed to KM; see CPACF_KIMD_xxx defines + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Returns 0 for the query func, number of processed bytes for digest funcs + */ +static inline int cpacf_kimd(long func, void *param, + const u8 *src, long src_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + + asm volatile( + "0: .insn rre,%[opc] << 16,0,%[src]\n" + " brc 1,0b\n" /* handle partial completion */ + : [src] "+a" (r2), [len] "+d" (r3) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD) + : "cc", "memory"); + + return src_len - r3; +} + +/** + * cpacf_klmd() - executes the KLMD (COMPUTE LAST MESSAGE DIGEST) instruction + * @func: the function code passed to KM; see CPACF_KLMD_xxx defines + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Returns 0 for the query func, number of processed bytes for digest funcs + */ +static inline int cpacf_klmd(long func, void *param, + const u8 *src, long src_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + + asm volatile( + "0: .insn rre,%[opc] << 16,0,%[src]\n" + " brc 1,0b\n" /* handle partial completion */ + : [src] "+a" (r2), [len] "+d" (r3) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD) + : "cc", "memory"); + + return src_len - r3; +} + +/** + * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) + * instruction + * @func: the function code passed to KM; see CPACF_KMAC_xxx defines + * @param: address of parameter block; see POP for details on each func + * @src: address of source memory area + * @src_len: length of src operand in bytes + * + * Returns 0 for the query func, number of processed bytes for digest funcs + */ +static inline int cpacf_kmac(long func, void *param, + const u8 *src, long src_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + + asm volatile( + "0: .insn rre,%[opc] << 16,0,%[src]\n" + " brc 1,0b\n" /* handle partial completion */ + : [src] "+a" (r2), [len] "+d" (r3) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMAC) + : "cc", "memory"); + + return src_len - r3; +} + +/** + * cpacf_kmctr() - executes the KMCTR (CIPHER MESSAGE WITH COUNTER) instruction + * @func: the function code passed to KMCTR; see CPACF_KMCTR_xxx defines + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @src: address of source memory area + * @src_len: length of src operand in bytes + * @counter: address of counter value + * + * Returns 0 for the query func, number of processed bytes for + * encryption/decryption funcs + */ +static inline int cpacf_kmctr(long func, void *param, u8 *dest, + const u8 *src, long src_len, u8 *counter) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) src; + register unsigned long r3 asm("3") = (unsigned long) src_len; + register unsigned long r4 asm("4") = (unsigned long) dest; + register unsigned long r6 asm("6") = (unsigned long) counter; + + asm volatile( + "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n" + " brc 1,0b\n" /* handle partial completion */ + : [src] "+a" (r2), [len] "+d" (r3), + [dst] "+a" (r4), [ctr] "+a" (r6) + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMCTR) + : "cc", "memory"); + + return src_len - r3; +} + +/** + * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION) + * instruction + * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines + * @param: address of parameter block; see POP for details on each func + * @dest: address of destination memory area + * @dest_len: size of destination memory area in bytes + * @seed: address of seed data + * @seed_len: size of seed data in bytes + * + * Returns 0 for the query func, number of random bytes stored in + * dest buffer for generate function + */ +static inline int cpacf_ppno(long func, void *param, + u8 *dest, long dest_len, + const u8 *seed, long seed_len) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + register unsigned long r2 asm("2") = (unsigned long) dest; + register unsigned long r3 asm("3") = (unsigned long) dest_len; + register unsigned long r4 asm("4") = (unsigned long) seed; + register unsigned long r5 asm("5") = (unsigned long) seed_len; + + asm volatile ( + "0: .insn rre,%[opc] << 16,%[dst],%[seed]\n" + " brc 1,0b\n" /* handle partial completion */ + : [dst] "+a" (r2), [dlen] "+d" (r3) + : [fc] "d" (r0), [pba] "a" (r1), + [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO) + : "cc", "memory"); + + return dest_len - r3; +} + +/** + * cpacf_pcc() - executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) + * instruction + * @func: the function code passed to PCC; see CPACF_KM_xxx defines + * @param: address of parameter block; see POP for details on each func + * + * Returns 0. + */ +static inline int cpacf_pcc(long func, void *param) +{ + register unsigned long r0 asm("0") = (unsigned long) func; + register unsigned long r1 asm("1") = (unsigned long) param; + + asm volatile( + "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */ + " brc 1,0b\n" /* handle partial completion */ + : + : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC) + : "cc", "memory"); + + return 0; +} + +#endif /* _ASM_S390_CPACF_H */ diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h index d8f9872b0e2d..4a9f35e0973f 100644 --- a/arch/s390/include/asm/device.h +++ b/arch/s390/include/asm/device.h @@ -3,5 +3,9 @@ * * This file is released under the GPLv2 */ -#include <asm-generic/device.h> +struct dev_archdata { + struct dma_map_ops *dma_ops; +}; +struct pdev_archdata { +}; diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index e64bfcb9702f..3249b7464889 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -11,11 +11,13 @@ #define DMA_ERROR_CODE (~(dma_addr_t) 0x0) -extern struct dma_map_ops s390_dma_ops; +extern struct dma_map_ops s390_pci_dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { - return &s390_dma_ops; + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + return &dma_noop_ops; } static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index 5e04f3cbd320..8ae236b0f80b 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc) " la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) + : "=d" (rc), "=&d" (orig_fpc) : "d" (fpc), "0" (-EINVAL)); return rc; } diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h index 14a8b0c14f87..fe937c9b6471 100644 --- a/arch/s390/include/asm/fpu/types.h +++ b/arch/s390/include/asm/fpu/types.h @@ -11,11 +11,13 @@ #include <asm/sigcontext.h> struct fpu { - __u32 fpc; /* Floating-point control */ + __u32 fpc; /* Floating-point control */ + void *regs; /* Pointer to the current save area */ union { - void *regs; - freg_t *fprs; /* Floating-point register save area */ - __vector128 *vxrs; /* Vector register save area */ + /* Floating-point register save area */ + freg_t fprs[__NUM_FPRS]; + /* Vector register save area */ + __vector128 vxrs[__NUM_VXRS]; }; }; diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 836c56290499..64053d9ac3f2 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -12,7 +12,9 @@ #ifndef __ASSEMBLY__ -#define ftrace_return_address(n) __builtin_return_address(n) +unsigned long return_address(int depth); + +#define ftrace_return_address(n) return_address(n) void _mcount(void); void ftrace_caller(void); diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h new file mode 100644 index 000000000000..d054c1b07a3c --- /dev/null +++ b/arch/s390/include/asm/gmap.h @@ -0,0 +1,64 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_S390_GMAP_H +#define _ASM_S390_GMAP_H + +/** + * struct gmap_struct - guest address space + * @crst_list: list of all crst tables used in the guest address space + * @mm: pointer to the parent mm_struct + * @guest_to_host: radix tree with guest to host address translation + * @host_to_guest: radix tree with pointer to segment table entries + * @guest_table_lock: spinlock to protect all entries in the guest page table + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @pfault_enabled: defines if pfaults are applicable for the guest + */ +struct gmap { + struct list_head list; + struct list_head crst_list; + struct mm_struct *mm; + struct radix_tree_root guest_to_host; + struct radix_tree_root host_to_guest; + spinlock_t guest_table_lock; + unsigned long *table; + unsigned long asce; + unsigned long asce_end; + void *private; + bool pfault_enabled; +}; + +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); +}; + +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); +unsigned long gmap_translate(struct gmap *, unsigned long gaddr); +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); +int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); +void gmap_discard(struct gmap *, unsigned long from, unsigned long to); +void __gmap_zap(struct gmap *, unsigned long gaddr); +void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); + +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); + +#endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8959ebb6d2c9..ac82e8eb936d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -20,6 +20,7 @@ #include <linux/kvm_types.h> #include <linux/kvm_host.h> #include <linux/kvm.h> +#include <linux/seqlock.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu/api.h> @@ -37,7 +38,7 @@ */ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 4096 -#define KVM_HALT_POLL_NS_DEFAULT 0 +#define KVM_HALT_POLL_NS_DEFAULT 80000 /* s390-specific vcpu->requests bit members */ #define KVM_REQ_ENABLE_IBS 8 @@ -229,17 +230,11 @@ struct kvm_s390_itdb { __u8 data[256]; } __packed; -struct kvm_s390_vregs { - __vector128 vrs[32]; - __u8 reserved200[512]; /* for future vector expansion */ -} __packed; - struct sie_page { struct kvm_s390_sie_block sie_block; __u8 reserved200[1024]; /* 0x0200 */ struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[1280]; /* 0x0700 */ - struct kvm_s390_vregs vregs; /* 0x0c00 */ + __u8 reserved700[2304]; /* 0x0700 */ } __packed; struct kvm_vcpu_stat { @@ -250,8 +245,10 @@ struct kvm_vcpu_stat { u32 exit_stop_request; u32 exit_validity; u32 exit_instruction; + u32 exit_pei; u32 halt_successful_poll; u32 halt_attempted_poll; + u32 halt_poll_invalid; u32 halt_wakeup; u32 instruction_lctl; u32 instruction_lctlg; @@ -467,7 +464,7 @@ struct kvm_s390_irq_payload { struct kvm_s390_local_interrupt { spinlock_t lock; struct kvm_s390_float_interrupt *float_int; - wait_queue_head_t *wq; + struct swait_queue_head *wq; atomic_t *cpuflags; DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); struct kvm_s390_irq_payload irq; @@ -549,15 +546,20 @@ struct kvm_vcpu_arch { struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; - union { - struct cpuid cpu_id; - u64 stidp_data; - }; struct gmap *gmap; struct kvm_guestdbg_info_arch guestdbg; unsigned long pfault_token; unsigned long pfault_select; unsigned long pfault_compare; + bool cputm_enabled; + /* + * The seqcount protects updates to cputm_start and sie_block.cputm, + * this way we can have non-blocking reads with consistent values. + * Only the owning VCPU thread (vcpu->cpu) is allowed to change these + * values and to start/stop/enable/disable cpu timer accounting. + */ + seqcount_t cputm_seqcount; + __u64 cputm_start; }; struct kvm_vm_stat { @@ -596,16 +598,12 @@ struct s390_io_adapter { #define S390_ARCH_FAC_MASK_SIZE_U64 \ (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) -struct kvm_s390_fac { - /* facility list requested by guest */ - __u64 list[S390_ARCH_FAC_LIST_SIZE_U64]; - /* facility mask supported by kvm & hosting machine */ - __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64]; -}; - struct kvm_s390_cpu_model { - struct kvm_s390_fac *fac; - struct cpuid cpu_id; + /* facility mask supported by kvm & hosting machine */ + __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; + /* facility list requested by guest (in dma page) */ + __u64 *fac_list; + u64 cpuid; unsigned short ibc; }; @@ -623,6 +621,16 @@ struct kvm_s390_crypto_cb { __u8 reserved80[128]; /* 0x0080 */ }; +/* + * sie_page2 has to be allocated as DMA because fac_list and crycb need + * 31bit addresses in the sie control block. + */ +struct sie_page2 { + __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ + struct kvm_s390_crypto_cb crycb; /* 0x0800 */ + u8 reserved900[0x1000 - 0x900]; /* 0x0900 */ +} __packed; + struct kvm_arch{ void *sca; int use_esca; @@ -643,6 +651,7 @@ struct kvm_arch{ int ipte_lock_count; struct mutex ipte_mutex; spinlock_t start_stop_lock; + struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; u64 epoch; @@ -689,4 +698,6 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} +void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index a52b6cca873d..2c1213785892 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -19,25 +19,14 @@ #include <linux/module.h> -#ifdef CONFIG_LIVEPATCH static inline int klp_check_compiler_support(void) { return 0; } -static inline int klp_write_module_reloc(struct module *mod, unsigned long - type, unsigned long loc, unsigned long value) -{ - /* not supported yet */ - return -ENOSYS; -} - static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) { regs->psw.addr = ip; } -#else -#error Include linux/livepatch.h, not asm/livepatch.h -#endif #endif diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index d29ad9545b41..081b2ad99d73 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -11,7 +11,7 @@ typedef struct { spinlock_t list_lock; struct list_head pgtable_list; struct list_head gmap_list; - unsigned long asce_bits; + unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; /* The mmu context allocates 4K page tables. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index e485817f7b1a..c837b79b455d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - if (mm->context.asce_limit == 0) { + switch (mm->context.asce_limit) { + case 1UL << 42: + /* + * forked 3-level task, fall through to set new asce with new + * mm->pgd + */ + case 0: /* context created by exec, set asce limit to 4TB */ - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION3; mm->context.asce_limit = STACK_TOP_MAX; - } else if (mm->context.asce_limit == (1UL << 31)) { + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + break; + case 1UL << 53: + /* forked 4-level task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + break; + case 1UL << 31: + /* forked 2-level compat task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + /* pgd_alloc() did not increase mm->nr_pmds */ mm_inc_nr_pmds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); @@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk, static inline void set_user_asce(struct mm_struct *mm) { - S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd); + S390_lowcore.user_asce = mm->context.asce; if (current->thread.mm_segment.ar4) __ctl_load(S390_lowcore.user_asce, 7, 7); set_cpu_flag(CIF_ASCE); @@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); + S390_lowcore.user_asce = next->context.asce; if (prev == next) return; if (MACHINE_HAS_TLB_LC) @@ -136,4 +152,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool execute, bool foreign) +{ + /* by default, allow everything */ + return true; +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + /* by default, allow everything */ + return true; +} #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c873e682b67f..0da91c4d30fd 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -9,7 +9,6 @@ #include <linux/pci.h> #include <linux/mutex.h> #include <asm-generic/pci.h> -#include <asm-generic/pci-dma-compat.h> #include <asm/pci_clp.h> #include <asm/pci_debug.h> @@ -32,20 +31,42 @@ int pci_proc_domain(struct pci_bus *); #define ZPCI_FC_BLOCKED 0x20 #define ZPCI_FC_DMA_ENABLED 0x10 +#define ZPCI_FMB_DMA_COUNTER_VALID (1 << 23) + +struct zpci_fmb_fmt0 { + u64 dma_rbytes; + u64 dma_wbytes; +}; + +struct zpci_fmb_fmt1 { + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; +}; + +struct zpci_fmb_fmt2 { + u64 consumed_work_units; + u64 max_work_units; +}; + struct zpci_fmb { - u32 format : 8; - u32 dma_valid : 1; - u32 : 23; + u32 format : 8; + u32 fmt_ind : 24; u32 samples; u64 last_update; - /* hardware counters */ + /* common counters */ u64 ld_ops; u64 st_ops; u64 stb_ops; u64 rpcit_ops; - u64 dma_rbytes; - u64 dma_wbytes; -} __packed __aligned(16); + /* format specific counters */ + union { + struct zpci_fmb_fmt0 fmt0; + struct zpci_fmb_fmt1 fmt1; + struct zpci_fmb_fmt2 fmt2; + }; +} __packed __aligned(128); enum zpci_state { ZPCI_FN_STATE_RESERVED, @@ -66,7 +87,6 @@ struct s390_domain; /* Private data per function */ struct zpci_dev { - struct pci_dev *pdev; struct pci_bus *bus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ @@ -192,7 +212,7 @@ int zpci_fmb_disable_device(struct zpci_dev *); /* Debug */ int zpci_debug_init(void); void zpci_debug_exit(void); -void zpci_debug_init_device(struct zpci_dev *); +void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); void zpci_debug_info(struct zpci_dev *, struct seq_file *); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index dd78f92f1cce..e75c64cbcf08 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -49,9 +49,6 @@ struct clp_fh_list_entry { /* List PCI functions request */ struct clp_req_list_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u64 reserved2; } __packed; @@ -59,9 +56,6 @@ struct clp_req_list_pci { /* List PCI functions response */ struct clp_rsp_list_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u32 reserved2; u16 max_fn; @@ -73,9 +67,6 @@ struct clp_rsp_list_pci { /* Query PCI function request */ struct clp_req_query_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved2; u64 reserved3; @@ -84,9 +75,6 @@ struct clp_req_query_pci { /* Query PCI function response */ struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 : 64; u16 vfn; /* virtual fn number */ u16 : 7; u16 util_str_avail : 1; /* utility string available? */ @@ -108,21 +96,15 @@ struct clp_rsp_query_pci { /* Query PCI function group request */ struct clp_req_query_pci_grp { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; - u32 : 24; + u32 reserved2 : 24; u32 pfgid : 8; /* function group id */ - u32 reserved2; - u64 reserved3; + u32 reserved3; + u64 reserved4; } __packed; /* Query PCI function group response */ struct clp_rsp_query_pci_grp { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; @@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp { /* Set PCI function request */ struct clp_req_set_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u16 reserved2; u8 oc; /* operation controls */ @@ -154,9 +133,6 @@ struct clp_req_set_pci { /* Set PCI function response */ struct clp_rsp_set_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved3; u64 reserved4; diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 6d6556ca24aa..90240dfef76a 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -178,7 +178,6 @@ ret__; \ }) -#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double #include <asm-generic/percpu.h> diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index f897ec73dc8c..1f7ff85c5e4c 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -21,7 +21,7 @@ #define PMU_F_ERR_LSDA 0x0200 #define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ extern __init const struct attribute_group **cpumf_cf_event_group(void); extern ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index d7cc79fb6191..da34cb6b1f3b 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); extern int page_table_allocate_pgste; -int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; @@ -56,8 +52,8 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _REGION2_ENTRY_EMPTY; } -int crst_table_upgrade(struct mm_struct *, unsigned long limit); -void crst_table_downgrade(struct mm_struct *, unsigned long limit); +int crst_table_upgrade(struct mm_struct *); +void crst_table_downgrade(struct mm_struct *); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 64ead8091248..18d2beb89340 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr) /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * dy..R...I...wr + * dy..R...I...rw * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 * prot-none, dirty, old 10..1...1...00 * prot-none, dirty, young 11..1...1...00 - * read-only, clean, old 00..1...1...01 - * read-only, clean, young 01..1...0...01 - * read-only, dirty, old 10..1...1...01 - * read-only, dirty, young 11..1...0...01 + * read-only, clean, old 00..1...1...10 + * read-only, clean, young 01..1...0...10 + * read-only, dirty, old 10..1...1...10 + * read-only, dirty, young 11..1...0...10 * read-write, clean, old 00..1...1...11 * read-write, clean, young 01..1...0...11 * read-write, dirty, old 10..0...1...11 @@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS -extern int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty); - -#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -631,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return pmd; } -static inline pgste_t pgste_get_lock(pte_t *ptep) -{ - unsigned long new = 0; -#ifdef CONFIG_PGSTE - unsigned long old; - - preempt_disable(); - asm( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear PCL bit in old */ - " oihh %1,0x0080\n" /* set PCL bit in new */ - " csg %0,%1,%2\n" - " jl 0b\n" - : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) - : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); -#endif - return __pgste(new); -} - -static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - asm( - " nihh %1,0xff7f\n" /* clear PCL bit */ - " stg %1,%0\n" - : "=Q" (ptep[PTRS_PER_PTE]) - : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) - : "cc", "memory"); - preempt_enable(); -#endif -} - -static inline pgste_t pgste_get(pte_t *ptep) -{ - unsigned long pgste = 0; -#ifdef CONFIG_PGSTE - pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); -#endif - return __pgste(pgste); -} - -static inline void pgste_set(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; -#endif -} - -static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address, bits, skey; - - if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID) - return pgste; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); - bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ - /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; -#endif - return pgste; - -} - -static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address; - unsigned long nkey; - - if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) - return; - VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); - address = pte_val(entry) & PAGE_MASK; - /* - * Set page access key and fetch protection bit from pgste. - * The guest C/R information is still in the PGSTE, set real - * key C/R to 0. - */ - nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; - nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; - page_set_storage_key(address, nkey, 0); -#endif -} - -static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) -{ - if ((pte_val(entry) & _PAGE_PRESENT) && - (pte_val(entry) & _PAGE_WRITE) && - !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { - /* - * Without enhanced suppression-on-protection force - * the dirty bit on for all writable ptes. - */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; - } - if (!(pte_val(entry) & _PAGE_PROTECT)) - /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; - } - *ptep = entry; - return pgste; -} - -/** - * struct gmap_struct - guest address space - * @crst_list: list of all crst tables used in the guest address space - * @mm: pointer to the parent mm_struct - * @guest_to_host: radix tree with guest to host address translation - * @host_to_guest: radix tree with pointer to segment table entries - * @guest_table_lock: spinlock to protect all entries in the guest page table - * @table: pointer to the page directory - * @asce: address space control element for gmap page table - * @pfault_enabled: defines if pfaults are applicable for the guest - */ -struct gmap { - struct list_head list; - struct list_head crst_list; - struct mm_struct *mm; - struct radix_tree_root guest_to_host; - struct radix_tree_root host_to_guest; - spinlock_t guest_table_lock; - unsigned long *table; - unsigned long asce; - unsigned long asce_end; - void *private; - bool pfault_enabled; -}; - -/** - * struct gmap_notifier - notify function block for page invalidation - * @notifier_call: address of callback function - */ -struct gmap_notifier { - struct list_head list; - void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); -}; - -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); -void gmap_free(struct gmap *gmap); -void gmap_enable(struct gmap *gmap); -void gmap_disable(struct gmap *gmap); -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len); -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); -unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); -void __gmap_zap(struct gmap *, unsigned long gaddr); -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); - - -void gmap_register_ipte_notifier(struct gmap_notifier *); -void gmap_unregister_ipte_notifier(struct gmap_notifier *); -int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); - -static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - if (pgste_val(pgste) & PGSTE_IN_BIT) { - pgste_val(pgste) &= ~PGSTE_IN_BIT; - gmap_do_ipte_notify(mm, addr, ptep); - } -#endif - return pgste; -} - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; - pgste_set_key(ptep, pgste, entry, mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else { - *ptep = entry; - } -} - /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -998,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) } while (nr != 255); } -static inline void ptep_flush_direct(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte_local(address, ptep); - else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - -static inline void ptep_flush_lazy(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pte_val(*ptep) |= _PAGE_INVALID; - mm->context.flush_mm = 1; - } else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - /* - * Get (and clear) the user dirty bit for a pte. + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. */ -static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - int dirty; - - if (!mm_has_pgste(mm)) - return 0; - pgste = pgste_get_lock(ptep); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; - pte = *ptep; - if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; - } - pgste_set_unlock(ptep, pgste); - return dirty; -} +pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t); +pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte, oldpte; - int young; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); - } - - oldpte = pte = *ptep; - ptep_flush_direct(vma->vm_mm, addr, ptep); - young = pte_young(pte); - pte = pte_mkold(pte); - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; + pte_t pte = *ptep; - return young; + pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return pte_young(pte); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -1097,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } -/* - * This is hard to understand. ptep_get_and_clear and ptep_clear_flush - * both clear the TLB for the unmapped pte. The reason is that - * ptep_get_and_clear is used in common code (e.g. change_pte_range) - * to modify an active pte. The sequence is - * 1) ptep_get_and_clear - * 2) set_pte_at - * 3) flush_tlb_range - * On s390 the tlb needs to get flushed with the modification of the pte - * if the pte is active. The only way how this can be implemented is to - * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range - * is a nop. - */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION -static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, - unsigned long address, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set(ptep, pgste); - } - return pte; -} - -static inline void ptep_modify_prot_commit(struct mm_struct *mm, - unsigned long address, - pte_t *ptep, pte_t pte) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get(ptep); - pgste_set_key(ptep, pgste, pte, mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; -} +pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *); +void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_direct(vma->vm_mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(vma->vm_mm)) { - if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == - _PGSTE_GPS_USAGE_UNUSED) - pte_val(pte) |= _PAGE_UNUSED; - pgste = pgste_update_all(&pte, pgste, vma->vm_mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); } /* @@ -1206,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pte_t *ptep, int full) { - pgste_t pgste; - pte_t pte; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - if (!full) - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); - pgste_set_unlock(ptep, pgste); + if (full) { + pte_t pte = *ptep; + *ptep = __pte(_PAGE_INVALID); + return pte; } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - pgste_t pgste; pte_t pte = *ptep; - if (pte_write(pte)) { - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - ptep_flush_lazy(mm, address, ptep); - pte = pte_wrprotect(pte); - - if (mm_has_pgste(mm)) { - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; - } - return pte; + if (pte_write(pte)) + ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, + unsigned long addr, pte_t *ptep, pte_t entry, int dirty) { - pgste_t pgste; - pte_t oldpte; - - oldpte = *ptep; - if (pte_same(oldpte, entry)) + if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } + ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); + return 1; +} - ptep_flush_direct(vma->vm_mm, address, ptep); +/* + * Additional functions to handle KVM guest page tables + */ +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry); +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , int reset); +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, bool nq); +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - if (mm_has_pgste(vma->vm_mm)) { - if (pte_val(oldpte) & _PAGE_INVALID) - pgste_set_key(ptep, pgste, entry, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + if (mm_has_pgste(mm)) + ptep_set_pte_at(mm, addr, ptep, entry); + else *ptep = entry; - return 1; } /* @@ -1476,54 +1103,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) : "cc" ); } -static inline void pmdp_flush_direct(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) -{ - int active, count; +pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); +pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return; - } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte_local(address, pmdp); - else - __pmdp_idte(address, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); -} +#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void pmdp_flush_lazy(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PGTABLE_DEPOSIT +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, + pmd_t entry, int dirty) { - int active, count; + VM_BUG_ON(addr & ~HPAGE_MASK); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; - mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(address, pmdp); - else - __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); + if (pmd_val(*pmdp) == pmd_val(entry)) + return 0; + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry); + return 1; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); + pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd)); + return pmd_young(pmd); +} -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + VM_BUG_ON(addr & ~HPAGE_MASK); + return pmdp_test_and_clear_young(vma, addr, pmdp); +} static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) @@ -1539,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return pmd; } -#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - pmd_t pmd; - - pmd = *pmdp; - pmdp_flush_direct(vma->vm_mm, address, pmdp); - *pmdp = pmd_mkold(pmd); - return pmd_young(pmd); -} - #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = *pmdp; - - pmdp_flush_direct(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pmd_t *pmdp, int full) { - pmd_t pmd = *pmdp; - - if (!full) - pmdp_flush_lazy(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + if (full) { + pmd_t pmd = *pmdp; + *pmdp = __pmd(_SEGMENT_ENTRY_INVALID); + return pmd; + } + return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); } #define __HAVE_ARCH_PMDP_INVALIDATE static inline void pmdp_invalidate(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmdp_flush_direct(vma->vm_mm, address, pmdp); + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { pmd_t pmd = *pmdp; - if (pmd_write(pmd)) { - pmdp_flush_direct(mm, address, pmdp); - set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); - } + if (pmd_write(pmd)) + pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd)); } static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, @@ -1617,6 +1223,7 @@ static inline int pmd_trans_huge(pmd_t pmd) return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; } +#define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { return MACHINE_HAS_HPAGE ? 1 : 0; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 1c4fe129486d..9d4d311d7e52 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -105,7 +105,6 @@ typedef struct { * Thread structure */ struct thread_struct { - struct fpu fpu; /* FP and VX register save area */ unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; @@ -120,6 +119,11 @@ struct thread_struct { /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + /* + * Warning: 'fpu' is dynamically-sized. It *MUST* be at + * the end. + */ + struct fpu fpu; /* FP and VX register save area */ }; /* Flag to disable transactions. */ @@ -155,10 +159,9 @@ struct stack_frame { #define ARCH_MIN_TASKALIGN 8 -extern __vector128 init_task_fpu_regs[__NUM_VXRS]; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ - .fpu.regs = (void *)&init_task_fpu_regs, \ + .fpu.regs = (void *) init_task.thread.fpu.fprs, \ } /* @@ -175,7 +178,7 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS]; regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ regs->psw.addr = new_psw; \ regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ + crst_table_downgrade(current->mm); \ execve_tail(); \ } while (0) @@ -184,6 +187,10 @@ struct task_struct; struct mm_struct; struct seq_file; +typedef int (*dump_trace_func_t)(void *data, unsigned long address); +void dump_trace(dump_trace_func_t func, void *data, + struct task_struct *task, unsigned long sp); + void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ @@ -203,6 +210,14 @@ unsigned long get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) +static inline unsigned long current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("la %0,0(15)" : "=a" (sp)); + return sp; +} + static inline unsigned short stap(void) { unsigned short cpu_address; diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 4b43ee7e6776..c75e4471e618 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -31,7 +31,7 @@ * This should be totally fair - if anything is waiting, a process that wants a * lock will go to the back of the queue. When the currently active lock is * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consequtive readers at the + * that will be woken up; if there's a bunch of consecutive readers at the * front, then they'll all be woken up, but no other readers will be. */ @@ -90,7 +90,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline long ___down_write(struct rw_semaphore *sem) { signed long old, new, tmp; @@ -104,13 +104,23 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) : "=&d" (old), "=&d" (new), "=Q" (sem->count) : "Q" (sem->count), "m" (tmp) : "cc", "memory"); - if (old != 0) - rwsem_down_write_failed(sem); + + return old; } static inline void __down_write(struct rw_semaphore *sem) { - __down_write_nested(sem, 0); + if (___down_write(sem)) + rwsem_down_write_failed(sem); +} + +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + if (___down_write(sem)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + + return 0; } /* diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index bab456be9a4f..e4f6f73afe2f 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -69,9 +69,22 @@ struct sclp_info { unsigned int max_cores; unsigned long hsa_size; unsigned long facilities; + unsigned int hmfai; }; extern struct sclp_info sclp; +struct zpci_report_error_header { + u8 version; /* Interface version byte */ + u8 action; /* Action qualifier byte + * 1: Deconfigure and repair action requested + * (OpenCrypto Problem Call Home) + * 2: Informational Report + * (OpenCrypto Successful Diagnostics Execution) + */ + u16 length; /* Length of Subsequent Data (up to 4K – SCLP header */ + u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */ +} __packed; + int sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); int sclp_core_deconfigure(u8 core); @@ -83,6 +96,7 @@ int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); +int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid); int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); void sclp_early_detect(void); diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h index 781a9cf9b002..e10f8337367b 100644 --- a/arch/s390/include/asm/seccomp.h +++ b/arch/s390/include/asm/seccomp.h @@ -13,4 +13,6 @@ #define __NR_seccomp_exit_32 __NR_exit #define __NR_seccomp_sigreturn_32 __NR_sigreturn +#include <asm-generic/seccomp.h> + #endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 69837225119e..c0f0efbb6ab5 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -101,6 +101,8 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); + extern void cmma_init(void); extern void (*_machine_restart)(char *command); diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index ec60cf7fa0a2..1c8f33fca356 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -27,6 +27,7 @@ /* SIGP cpu status bits */ +#define SIGP_STATUS_INVALID_ORDER 0x00000002UL #define SIGP_STATUS_CHECK_STOP 0x00000010UL #define SIGP_STATUS_STOPPED 0x00000040UL #define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 2fffc2c27581..f15c0398c363 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -62,6 +62,7 @@ static inline struct thread_info *current_thread_info(void) } void arch_release_task_struct(struct task_struct *tsk); +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define THREAD_SIZE_ORDER THREAD_ORDER diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index ca148f7c3eaa..a2e6ef32e054 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -110,8 +110,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_IDTE) - __tlb_flush_idte((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); } @@ -133,8 +132,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_TLB_LC) - __tlb_flush_idte_local((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte_local(init_mm.context.asce); else __tlb_flush_local(); } @@ -148,8 +146,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * only ran on the local cpu. */ if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) - __tlb_flush_asce(mm, (unsigned long) mm->pgd | - mm->context.asce_bits); + __tlb_flush_asce(mm, mm->context.asce); else __tlb_flush_full(mm); } diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9dd4cc47ddc7..e0900ddf91dd 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -79,18 +79,12 @@ struct exception_table_entry int insn, fixup; }; -static inline unsigned long extable_insn(const struct exception_table_entry *x) -{ - return (unsigned long)&x->insn + x->insn; -} - static inline unsigned long extable_fixup(const struct exception_table_entry *x) { return (unsigned long)&x->fixup + x->fixup; } -#define ARCH_HAS_SORT_EXTABLE -#define ARCH_HAS_SEARCH_EXTABLE +#define ARCH_HAS_RELATIVE_EXTABLE /** * __copy_from_user: - Copy a block of data from user space, with less checking. diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h index c82eb12a5b18..c988df744a70 100644 --- a/arch/s390/include/asm/xor.h +++ b/arch/s390/include/asm/xor.h @@ -1 +1,20 @@ -#include <asm-generic/xor.h> +/* + * Optimited xor routines + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ +#ifndef _ASM_S390_XOR_H +#define _ASM_S390_XOR_H + +extern struct xor_block_template xor_block_xc; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_xc); \ +} while (0) + +#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) + +#endif /* _ASM_S390_XOR_H */ diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h new file mode 100644 index 000000000000..ab72d9d24373 --- /dev/null +++ b/arch/s390/include/uapi/asm/clp.h @@ -0,0 +1,28 @@ +/* + * ioctl interface for /dev/clp + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef _ASM_CLP_H +#define _ASM_CLP_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +struct clp_req { + unsigned int c : 1; + unsigned int r : 1; + unsigned int lps : 6; + unsigned int cmd : 8; + unsigned int : 16; + unsigned int reserved; + __u64 data_p; +}; + +#define CLP_IOCTL_MAGIC 'c' + +#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req) + +#endif diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 5812a3b2df9e..1340311dab77 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -187,6 +187,36 @@ typedef struct format_data_t { #define DASD_FMT_INT_INVAL 4 /* invalidate tracks */ #define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */ +/* + * struct format_check_t + * represents all data necessary to evaluate the format of + * different tracks of a dasd + */ +typedef struct format_check_t { + /* Input */ + struct format_data_t expect; + + /* Output */ + unsigned int result; /* Error indication (DASD_FMT_ERR_*) */ + unsigned int unit; /* Track that is in error */ + unsigned int rec; /* Record that is in error */ + unsigned int num_records; /* Records in the track in error */ + unsigned int blksize; /* Blocksize of first record in error */ + unsigned int key_length; /* Key length of first record in error */ +} format_check_t; + +/* Values returned in format_check_t when a format error is detected: */ +/* Too few records were found on a single track */ +#define DASD_FMT_ERR_TOO_FEW_RECORDS 1 +/* Too many records were found on a single track */ +#define DASD_FMT_ERR_TOO_MANY_RECORDS 2 +/* Blocksize/data-length of a record was wrong */ +#define DASD_FMT_ERR_BLKSIZE 3 +/* A record ID is defined by cylinder, head, and record number (CHR). */ +/* On mismatch, this error is set */ +#define DASD_FMT_ERR_RECORD_ID 4 +/* If key-length was != 0 */ +#define DASD_FMT_ERR_KEY_LENGTH 5 /* * struct attrib_data_t @@ -288,6 +318,8 @@ struct dasd_snid_ioctl_data { /* Get Sense Path Group ID (SNID) data */ #define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data) +/* Check device format according to format_check_t */ +#define BIODASDCHECKFMT _IOWR(DASD_IOCTL_LETTER, 2, format_check_t) #define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t) diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index fe84bd5fe7ce..3b8e99ef9d58 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -25,6 +25,7 @@ #define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 +#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -154,6 +155,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) +#define KVM_SYNC_FPRS (1UL << 8) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -168,9 +170,12 @@ struct kvm_sync_regs { __u64 pft; /* pfault token [PFAULT] */ __u64 pfs; /* pfault select [PFAULT] */ __u64 pfc; /* pfault compare [PFAULT] */ - __u64 vrs[32][2]; /* vector registers */ + union { + __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */ + __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */ + }; __u8 reserved[512]; /* for future vector expansion */ - __u32 fpc; /* only valid with vector registers */ + __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ __u8 padding[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ }; diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index ee69c0854c88..8fb5d4a6dd25 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -7,6 +7,7 @@ { 0x9c, "DIAG (0x9c) time slice end directed" }, \ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \ { 0x258, "DIAG (0x258) page-reference services" }, \ + { 0x288, "DIAG (0x288) watchdog functions" }, \ { 0x308, "DIAG (0x308) ipl functions" }, \ { 0x500, "DIAG (0x500) KVM virtio functions" }, \ { 0x501, "DIAG (0x501) KVM breakpoint" } @@ -15,14 +16,19 @@ { 0x01, "SIGP sense" }, \ { 0x02, "SIGP external call" }, \ { 0x03, "SIGP emergency signal" }, \ + { 0x04, "SIGP start" }, \ { 0x05, "SIGP stop" }, \ { 0x06, "SIGP restart" }, \ { 0x09, "SIGP stop and store status" }, \ { 0x0b, "SIGP initial cpu reset" }, \ + { 0x0c, "SIGP cpu reset" }, \ { 0x0d, "SIGP set prefix" }, \ { 0x0e, "SIGP store status at address" }, \ { 0x12, "SIGP set architecture" }, \ - { 0x15, "SIGP sense running" } + { 0x13, "SIGP conditional emergency signal" }, \ + { 0x15, "SIGP sense running" }, \ + { 0x16, "SIGP set multithreading"}, \ + { 0x17, "SIGP store additional status ait address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index d02e89d14fef..41b51c2f4f1b 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -94,4 +94,6 @@ #define SO_ATTACH_REUSEPORT_CBPF 51 #define SO_ATTACH_REUSEPORT_EBPF 52 +#define SO_CNX_ADVICE 53 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index ab3aa6875a59..4384bc797a54 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -311,7 +311,9 @@ #define __NR_shutdown 373 #define __NR_mlock2 374 #define __NR_copy_file_range 375 -#define NR_syscalls 376 +#define __NR_preadv2 376 +#define __NR_pwritev2 377 +#define NR_syscalls 378 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 53bbc9e8b281..1f95cc1faeb7 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include <asm/idle.h> #include <asm/vdso.h> #include <asm/pgtable.h> +#include <asm/gmap.h> /* * Make sure that the compiler is new enough. We want a compiler that diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 8ba32436effe..77a84bd78be2 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -72,7 +72,6 @@ void show_cacheinfo(struct seq_file *m) if (!test_facility(34)) return; - get_online_cpus(); this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask)); for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { cache = this_cpu_ci->info_list + idx; @@ -86,7 +85,6 @@ void show_cacheinfo(struct seq_file *m) seq_printf(m, "associativity=%d", cache->ways_of_associativity); seq_puts(m, "\n"); } - put_online_cpus(); } static inline enum cache_type get_cache_type(struct cache_info *ci, int level) diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 7f768914fb4f..7f48e568ac64 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - pr_warning("The cpcmd kernel function failed to " - "allocate a response buffer\n"); + pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 3986c9f62191..29df8484282b 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -173,7 +173,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) /* * Copy memory of the old, dumped system to a user space virtual address */ -int copy_oldmem_user(void __user *dst, void *src, size_t count) +static int copy_oldmem_user(void __user *dst, void *src, size_t count) { unsigned long from, len; int rc; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index c890a5589e59..aa12de72fd47 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, /* Since debugfs currently does not support uid/gid other than root, */ /* we do not allow gid/uid != 0 until we get support for that. */ if ((uid != 0) || (gid != 0)) - pr_warning("Root becomes the owner of all s390dbf files " - "in sysfs\n"); + pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); mutex_lock(&debug_mutex); @@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - pr_warning("%s is not a valid level for a debug " - "feature\n", str); + pr_warn("%s is not a valid level for a debug feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 62973efd214a..8cb9bfdd3ea8 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) } if (separator) ptr += sprintf(ptr, "%c", separator); - /* - * Use four '%' characters below because of the - * following two conversions: - * - * 1) sprintf: %%%%r -> %%r - * 2) printk : %%r -> %r - */ if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%%%r%i", value); + ptr += sprintf(ptr, "%%r%i", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%%%f%i", value); + ptr += sprintf(ptr, "%%f%i", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%%%a%i", value); + ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%%%c%i", value); + ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) - ptr += sprintf(ptr, "%%%%v%i", value); + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 02bd02ff648b..69f9908ac44c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/kdebug.h> #include <linux/ptrace.h> +#include <linux/mm.h> #include <linux/module.h> #include <linux/sched.h> #include <asm/processor.h> @@ -19,28 +20,28 @@ #include <asm/ipl.h> /* - * For show_trace we have tree different stack to consider: + * For dump_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown * - the asynchronous interrupt stack (cpu related) * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially + * The stack trace can start at any of the three stacks and can potentially * touch all of them. The order is: panic stack, async stack, sync stack. */ static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) +__dump_trace(dump_trace_func_t func, void *data, unsigned long sp, + unsigned long low, unsigned long high) { struct stack_frame *sf; struct pt_regs *regs; - unsigned long addr; while (1) { if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk("([<%016lx>] %pSR)\n", addr, (void *)addr); /* Follow the backchain. */ while (1) { + if (func(data, sf->gprs[8])) + return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -48,46 +49,82 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - addr = regs->psw.addr; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + if (!user_mode(regs)) { + if (func(data, regs->psw.addr)) + return sp; + } low = sp; sp = regs->gprs[15]; } } -static void show_trace(struct task_struct *task, unsigned long *stack) +void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, + unsigned long sp) { - const unsigned long frame_size = - STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - register unsigned long __r15 asm ("15"); - unsigned long sp; + unsigned long frame_size; - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : __r15; - printk("Call Trace:\n"); + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.panic_stack + frame_size - 4096, S390_lowcore.panic_stack + frame_size); #endif - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size); if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); + __dump_trace(func, data, sp, + (unsigned long)task_stack_page(task), + (unsigned long)task_stack_page(task) + THREAD_SIZE); else - __show_trace(sp, S390_lowcore.thread_info, + __dump_trace(func, data, sp, + S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); +} +EXPORT_SYMBOL_GPL(dump_trace); + +struct return_address_data { + unsigned long address; + int depth; +}; + +static int __return_address(void *data, unsigned long address) +{ + struct return_address_data *rd = data; + + if (rd->depth--) + return 0; + rd->address = address; + return 1; +} + +unsigned long return_address(int depth) +{ + struct return_address_data rd = { .depth = depth + 2 }; + + dump_trace(__return_address, &rd, NULL, current_stack_pointer()); + return rd.address; +} +EXPORT_SYMBOL_GPL(return_address); + +static int show_address(void *data, unsigned long address) +{ + printk("([<%016lx>] %pSR)\n", address, (void *)address); + return 0; +} + +static void show_trace(struct task_struct *task, unsigned long sp) +{ + if (!sp) + sp = task ? task->thread.ksp : current_stack_pointer(); + printk("Call Trace:\n"); + dump_trace(show_address, NULL, task, sp); if (!task) task = current; debug_show_held_locks(task); @@ -95,15 +132,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack) void show_stack(struct task_struct *task, unsigned long *sp) { - register unsigned long *__r15 asm ("15"); unsigned long *stack; int i; - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - + stack = sp; + if (!stack) { + if (!task) + stack = (unsigned long *)current_stack_pointer(); + else + stack = (unsigned long *)task->thread.ksp; + } for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; @@ -112,7 +150,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) printk("%016lx ", *stack++); } printk("\n"); - show_trace(task, sp); + show_trace(task, (unsigned long)sp); } static void show_last_breaking_event(struct pt_regs *regs) @@ -121,13 +159,9 @@ static void show_last_breaking_event(struct pt_regs *regs) printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); } -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - void show_registers(struct pt_regs *regs) { + struct psw_bits *psw = &psw_bits(regs->psw); char *mode; mode = user_mode(regs) ? "User" : "Krnl"; @@ -136,13 +170,9 @@ void show_registers(struct pt_regs *regs) printk(" (%pSR)", (void *)regs->psw.addr); printk("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); + "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, + psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); + printk(" RI:%x EA:%x", psw->ri, psw->eaba); printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", @@ -160,7 +190,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_trace(NULL, regs->gprs[15]); show_last_breaking_event(regs); } @@ -184,9 +214,8 @@ void die(struct pt_regs *regs, const char *str) #ifdef CONFIG_SMP printk("SMP "); #endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif + if (debug_pagealloc_enabled()) + printk("DEBUG_PAGEALLOC"); printk("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); print_modules(); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c55576bbaa1f..a0684de5a93b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -448,7 +448,6 @@ void __init startup_init(void) rescue_initrd(); clear_bss_section(); init_kernel_storage_key(); - lockdep_init(); lockdep_off(); setup_lowcore_early(); setup_facility_list(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index cd5a191381b9..2d47f9cfcb36 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -186,6 +186,7 @@ ENTRY(__switch_to) stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next + /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task @@ -1199,114 +1200,12 @@ cleanup_critical: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 - clg %r9,BASED(.Lcleanup_save_fpu_regs_done) - jhe 5f - clg %r9,BASED(.Lcleanup_save_fpu_regs_fp) - jhe 4f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low) - jhe 2f - clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) - jhe 1f - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread -0: # Store floating-point controls - stfpc __THREAD_FPU_fpc(%r2) -1: # Load register save area and check if VX is active - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz 4f # no VX -> store FP regs -2: # Store vector registers (V0-V15) - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) -3: # Store vector registers (V16-V31) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j 5f # -> done, set CIF_FPU flag -4: # Store floating-point registers - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -5: # Set CIF_FPU flag - oi __LC_CPU_FLAGS+7,_CIF_FPU - lg %r9,48(%r11) # return from save_fpu_regs + larl %r9,save_fpu_regs br %r14 -.Lcleanup_save_fpu_fpc_end: - .quad .Lsave_fpu_regs_fpc_end -.Lcleanup_save_fpu_regs_vx_low: - .quad .Lsave_fpu_regs_vx_low -.Lcleanup_save_fpu_regs_vx_high: - .quad .Lsave_fpu_regs_vx_high -.Lcleanup_save_fpu_regs_fp: - .quad .Lsave_fpu_regs_fp -.Lcleanup_save_fpu_regs_done: - .quad .Lsave_fpu_regs_done .Lcleanup_load_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 - clg %r9,BASED(.Lcleanup_load_fpu_regs_done) - jhe 1f - clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) - jhe 2f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) - jhe 4f - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz 2f # -> no VX, load FP regs -4: # Load V0 ..V15 registers - VLM %v0,%v15,0,%r4 -3: # Load V16..V31 registers - VLM %v16,%v31,256,%r4 - j 1f -2: # Load floating-point registers - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -1: # Clear CIF_FPU bit - ni __LC_CPU_FLAGS+7,255-_CIF_FPU - lg %r9,48(%r11) # return from load_fpu_regs + larl %r9,load_fpu_regs br %r14 -.Lcleanup_load_fpu_regs_vx: - .quad .Lload_fpu_regs_vx -.Lcleanup_load_fpu_regs_vx_high: - .quad .Lload_fpu_regs_vx_high -.Lcleanup_load_fpu_regs_fp: - .quad .Lload_fpu_regs_fp -.Lcleanup_load_fpu_regs_done: - .quad .Lload_fpu_regs_done /* * Integer constants diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index b7019ab74070..bedd2f55d860 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -1,6 +1,7 @@ #ifndef _ENTRY_H #define _ENTRY_H +#include <linux/percpu.h> #include <linux/types.h> #include <linux/signal.h> #include <asm/ptrace.h> @@ -75,4 +76,7 @@ long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); + +DECLARE_PER_CPU(u64, mt_cycles[8]); + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index f20abdb5630a..d14069d4b88d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2064,12 +2064,5 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = (unsigned long) s390_base_pgm_handler; - /* - * Clear subchannel ID and number to signal new kernel that no CCW or - * SCSI IPL has been done (for kexec and kdump) - */ - S390_lowcore.subchannel_id = 0; - S390_lowcore.subchannel_nr = 0; - do_reset_calls(); } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f41d5208aaf7..c373a1d41d10 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -164,8 +164,7 @@ void do_softirq_own_stack(void) { unsigned long old, new; - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); + old = current_stack_pointer(); /* Check against async. stack address range. */ new = S390_lowcore.async_stack; if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 2f1b7217c25c..0e64f08d3d69 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -43,13 +43,13 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, switch (action) { case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: - if (crashk_res.start) - crash_map_reserved_pages(); + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: - if (crashk_res.start) - crash_unmap_reserved_pages(); + if (kexec_crash_image) + arch_kexec_protect_crashkres(); break; default: return NOTIFY_DONE; @@ -60,6 +60,8 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, static int __init machine_kdump_pm_init(void) { pm_notifier(machine_kdump_pm_cb, 0); + /* Create initial mapping for crashkernel memory */ + arch_kexec_unprotect_crashkres(); return 0; } arch_initcall(machine_kdump_pm_init); @@ -146,6 +148,8 @@ static int kdump_csum_valid(struct kimage *image) #endif } +#ifdef CONFIG_CRASH_DUMP + /* * Map or unmap crashkernel memory */ @@ -167,21 +171,25 @@ static void crash_map_pages(int enable) } /* - * Map crashkernel memory + * Unmap crashkernel memory */ -void crash_map_reserved_pages(void) +void arch_kexec_protect_crashkres(void) { - crash_map_pages(1); + if (crashk_res.end) + crash_map_pages(0); } /* - * Unmap crashkernel memory + * Map crashkernel memory */ -void crash_unmap_reserved_pages(void) +void arch_kexec_unprotect_crashkres(void) { - crash_map_pages(0); + if (crashk_res.end) + crash_map_pages(1); } +#endif + /* * Give back memory to hypervisor before new kdump is loaded */ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 7873e171457c..fbc07891f9e7 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -51,6 +51,10 @@ void *module_alloc(unsigned long size) void module_arch_freeing_init(struct module *mod) { + if (is_livepatch_module(mod) && + mod->state == MODULE_STATE_LIVE) + return; + vfree(mod->arch.syminfo); mod->arch.syminfo = NULL; } @@ -425,7 +429,5 @@ int module_finalize(const Elf_Ehdr *hdr, struct module *me) { jump_label_apply_nops(me); - vfree(me->arch.syminfo); - me->arch.syminfo = NULL; return 0; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 929c147e07b4..7ec63b1d920d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event) /* Validate the counter that is assigned to this event. * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicity + * same time without constraints, it is not necessary to explicitly * validate event groups (event->group_leader != event). */ err = validate_event(hwc); @@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) /* Performance monitoring unit for s390x */ static struct pmu cpumf_pmu = { + .task_ctx_nr = perf_sw_context, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, .pmu_enable = cpumf_pmu_enable, .pmu_disable = cpumf_pmu_disable, .event_init = cpumf_pmu_event_init, @@ -665,17 +667,21 @@ static struct pmu cpumf_pmu = { static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (long) hcpu; int flags; switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: + case CPU_DOWN_FAILED: flags = PMC_INIT; - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); break; case CPU_DOWN_PREPARE: flags = PMC_RELEASE; - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); break; default: break; @@ -704,12 +710,6 @@ static int __init cpumf_pmu_init(void) goto out; } - /* The CPU measurement counter facility does not have overflow - * interrupts to do sampling. Sampling must be provided by - * external means, for example, by timers. - */ - cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3d8da1e742c2..a8e832166417 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) /* * A non-zero guest program parameter indicates a guest * sample. - * Note that some early samples might be misaccounted to - * the host. + * Note that some early samples or samples from guests without + * lpp usage would be misaccounted to the host. We use the asn + * value as a heuristic to detect most of these guest samples. + * If the value differs from the host hpp value, we assume + * it to be a KVM guest. */ - if (sfr->basic.gpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp) sde_regs->in_guest = 1; overflow = 0; @@ -1507,7 +1510,6 @@ static void cpumf_measurement_alert(struct ext_code ext_code, static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { - unsigned int cpu = (long) hcpu; int flags; /* Ignore the notification if no events are scheduled on the PMU. @@ -1518,13 +1520,17 @@ static int cpumf_pmu_notifier(struct notifier_block *self, switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: + case CPU_DOWN_FAILED: flags = PMC_INIT; - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); break; case CPU_DOWN_PREPARE: flags = PMC_RELEASE; - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + local_irq_disable(); + setup_pmc_cpu(&flags); + local_irq_enable(); break; default: break; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0943b11a2f6e..87035fa58bbe 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,67 +222,23 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -/* See also arch/s390/kernel/traps.c */ -static unsigned long __store_trace(struct perf_callchain_entry *entry, - unsigned long sp, - unsigned long low, unsigned long high) +static int __perf_callchain_kernel(void *data, unsigned long address) { - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - perf_callchain_store(entry, sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } + struct perf_callchain_entry_ctx *entry = data; + + perf_callchain_store(entry, address); + return 0; } -void perf_callchain_kernel(struct perf_callchain_entry *entry, +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - unsigned long head, frame_size; - struct stack_frame *head_sf; - if (user_mode(regs)) return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame *) head; - - if (!head_sf || !head_sf->back_chain) - return; - - head = head_sf->back_chain; - head = __store_trace(entry, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __store_trace(entry, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]); } -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2bba7df4ac51..bba4fa74b321 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -7,6 +7,7 @@ * Denis Joseph Barrow, */ +#include <linux/elf-randomize.h> #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/sched.h> @@ -37,9 +38,6 @@ asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); -/* FPU save area for the init task */ -__vector128 init_task_fpu_regs[__NUM_VXRS] __init_task_data; - /* * Return saved PC of a blocked thread. used in kernel/sched. * resume in entry.S does not create a new stack frame, it @@ -70,9 +68,10 @@ extern void kernel_thread_starter(void); /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - exit_thread_runtime_instr(); + if (tsk == current) + exit_thread_runtime_instr(); } void flush_thread(void) @@ -85,35 +84,19 @@ void release_thread(struct task_struct *dead_task) void arch_release_task_struct(struct task_struct *tsk) { - /* Free either the floating-point or the vector register save area */ - kfree(tsk->thread.fpu.regs); } int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - size_t fpu_regs_size; - - *dst = *src; - - /* - * If the vector extension is available, it is enabled for all tasks, - * and, thus, the FPU register save area must be allocated accordingly. - */ - fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS - : sizeof(freg_t) * __NUM_FPRS; - dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT); - if (!dst->thread.fpu.regs) - return -ENOMEM; - /* * Save the floating-point or vector register state of the current * task and set the CIF_FPU flag to lazy restore the FPU register * state when returning to user space. */ save_fpu_regs(); - dst->thread.fpu.fpc = current->thread.fpu.fpc; - memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size); + memcpy(dst, src, arch_task_struct_size); + dst->thread.fpu.regs = dst->thread.fpu.fprs; return 0; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 647128d5b983..de7451065c34 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -6,6 +6,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/cpufeature.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/seq_file.h> @@ -84,7 +85,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_puts(m, "\n"); show_cacheinfo(m); } - get_online_cpus(); if (cpu_online(n)) { struct cpuid *id = &per_cpu(cpu_id, n); seq_printf(m, "processor %li: " @@ -93,23 +93,31 @@ static int show_cpuinfo(struct seq_file *m, void *v) "machine = %04X\n", n, id->version, id->ident, id->machine); } - put_online_cpus(); return 0; } +static inline void *c_update(loff_t *pos) +{ + if (*pos) + *pos = cpumask_next(*pos - 1, cpu_online_mask); + return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; +} + static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + get_online_cpus(); + return c_update(pos); } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { ++*pos; - return c_start(m, pos); + return c_update(pos); } static void c_stop(struct seq_file *m, void *v) { + put_online_cpus(); } const struct seq_operations cpuinfo_op = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9220db5c996a..f31939147ccd 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -327,6 +327,7 @@ static void __init setup_lowcore(void) + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, @@ -374,17 +375,17 @@ static void __init setup_lowcore(void) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata *standard_resources[] = { @@ -408,7 +409,7 @@ static void __init setup_resources(void) for_each_memblock(memory, reg) { res = alloc_bootmem_low(sizeof(*res)); - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; res->start = reg->base; @@ -779,6 +780,7 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "zEC12"); break; case 0x2964: + case 0x2965: strcpy(elf_platform, "z13"); break; } @@ -807,6 +809,22 @@ static void __init setup_randomness(void) } /* + * Find the correct size for the task_struct. This depends on + * the size of the struct fpu at the end of the thread_struct + * which is embedded in the task_struct. + */ +static void __init setup_task_size(void) +{ + int task_size = sizeof(struct task_struct); + + if (!MACHINE_HAS_VX) { + task_size -= sizeof(__vector128) * __NUM_VXRS; + task_size += sizeof(freg_t) * __NUM_FPRS; + } + arch_task_struct_size = task_size; +} + +/* * Setup function called from init/main.c just after the banner * was printed. */ @@ -844,6 +862,7 @@ void __init setup_arch(char **cmdline_p) os_info_init(); setup_ipl(); + setup_task_size(); /* Do some memory reservations *before* memory is added to memblock */ reserve_memory_end(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3c65a8eae34d..7b89a7572100 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); inc_irq_stat(CPU_RST); local_irq_enable(); - cpu_startup_entry(CPUHP_ONLINE); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } /* Upping and downing of CPUs */ @@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ - while (!cpu_online(cpu) || !cpu_active(cpu)) + while (!cpu_online(cpu)) cpu_relax(); return 0; } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 8f64ebd63767..44f84b23d4e5 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -10,78 +10,39 @@ #include <linux/kallsyms.h> #include <linux/module.h> -static unsigned long save_context_stack(struct stack_trace *trace, - unsigned long sp, - unsigned long low, - unsigned long high, - int savesched) +static int __save_address(void *data, unsigned long address, int nosched) { - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long addr; + struct stack_trace *trace = data; - while(1) { - if (sp < low || sp > high) - return sp; - sf = (struct stack_frame *)sp; - while(1) { - addr = sf->gprs[8]; - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *)sp; - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long)(sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *)sp; - addr = regs->psw.addr; - if (savesched || !in_sched_functions(addr)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - } - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = regs->gprs[15]; + if (nosched && in_sched_functions(address)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; } + if (trace->nr_entries < trace->max_entries) { + trace->entries[trace->nr_entries++] = address; + return 0; + } + return 1; } -static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) +static int save_address(void *data, unsigned long address) { - unsigned long new_sp, frame_size; + return __save_address(data, address, 0); +} - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - new_sp = save_context_stack(trace, sp, - S390_lowcore.panic_stack + frame_size - PAGE_SIZE, - S390_lowcore.panic_stack + frame_size, 1); - new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size, 1); - save_context_stack(trace, new_sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE, 1); +static int save_address_nosched(void *data, unsigned long address) +{ + return __save_address(data, address, 1); } void save_stack_trace(struct stack_trace *trace) { - register unsigned long r15 asm ("15"); unsigned long sp; - sp = r15; - __save_stack_trace(trace, sp); + sp = current_stack_pointer(); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -89,16 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - unsigned long sp, low, high; + unsigned long sp; sp = tsk->thread.ksp; - if (tsk == current) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (sp)); - } - low = (unsigned long) task_stack_page(tsk); - high = (unsigned long) task_pt_regs(tsk); - save_context_stack(trace, sp, low, high, 0); + if (tsk == current) + sp = current_stack_pointer(); + dump_trace(save_address_nosched, trace, tsk, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -109,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) unsigned long sp; sp = kernel_stack_pointer(regs); - __save_stack_trace(trace, sp); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 293d8b98fd52..9b59e6212d8f 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -384,3 +384,5 @@ SYSCALL(sys_recvmsg,compat_sys_recvmsg) SYSCALL(sys_shutdown,sys_shutdown) SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ +SYSCALL(sys_preadv2,compat_sys_preadv2) +SYSCALL(sys_pwritev2,compat_sys_pwritev2) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 99f84ac31307..9409d32f285e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -499,8 +499,7 @@ static void etr_reset(void) if (etr_port0_online && etr_port1_online) set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); } else if (etr_port0_online || etr_port1_online) { - pr_warning("The real or virtual hardware system does " - "not provide an ETR interface\n"); + pr_warn("The real or virtual hardware system does not provide an ETR interface\n"); etr_port0_online = etr_port1_online = 0; } } @@ -1433,7 +1432,7 @@ device_initcall(etr_init_sysfs); /* * Server Time Protocol (STP) code. */ -static int stp_online; +static bool stp_online; static struct stp_sstpi stp_info; static void *stp_page; @@ -1444,11 +1443,7 @@ static struct timer_list stp_timer; static int __init early_parse_stp(char *p) { - if (strncmp(p, "off", 3) == 0) - stp_online = 0; - else if (strncmp(p, "on", 2) == 0) - stp_online = 1; - return 0; + return kstrtobool(p, &stp_online); } early_param("stp", early_parse_stp); @@ -1464,8 +1459,7 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warning("The real or virtual hardware system does " - "not provide an STP interface\n"); + pr_warn("The real or virtual hardware system does not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; stp_online = 0; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 40b8102fdadb..64298a867589 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -37,7 +37,7 @@ static void set_topology_timer(void); static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; -static int topology_enabled = 1; +static bool topology_enabled = true; static DECLARE_WORK(topology_work, topology_work_fn); /* @@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu) static int __init early_parse_topology(char *p) { - if (strncmp(p, "off", 3)) - return 0; - topology_enabled = 0; - return 0; + return kstrtobool(p, &topology_enabled); } early_param("topology", early_parse_topology); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 017eb03daee2..dd97a3e8a34a 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -22,8 +22,6 @@ #include <asm/fpu/api.h> #include "entry.h" -int show_unhandled_signals = 1; - static inline void __user *get_trap_ip(struct pt_regs *regs) { unsigned long address; @@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) return (void __user *) (address - (regs->int_code >> 16)); } -static inline void report_user_fault(struct pt_regs *regs, int signr) -{ - if ((task_pid_nr(current) > 1) && !show_unhandled_signals) - return; - if (!unhandled_signal(current, signr)) - return; - if (!printk_ratelimit()) - return; - printk("User process fault: interruption code %04x ilc:%d ", - regs->int_code & 0xffff, regs->int_code >> 17); - print_vma_addr("in ", regs->psw.addr); - printk("\n"); - show_regs(regs); -} - int is_valid_bugaddr(unsigned long addr) { return 1; @@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) info.si_code = si_code; info.si_addr = get_trap_ip(regs); force_sig_info(si_signo, &info, current); - report_user_fault(regs, si_signo); + report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr); @@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap); void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 0); do_exit(SIGSEGV); } else die(regs, "Unknown program exception"); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 94495cac8be3..5904abf6b1ae 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -216,7 +216,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * it at vdso_base which is the "natural" base for it, but we might * fail and end up putting it elsewhere. */ - down_write(&mm->mmap_sem); + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 445657fe658c..0f41a8286378 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -28,6 +28,7 @@ SECTIONS LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } :text = 0x0700 diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index dafc44f519c3..856e30d8463f 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -18,6 +18,8 @@ #include <asm/cpu_mf.h> #include <asm/smp.h> +#include "entry.h" + static void virt_timer_expire(void); static LIST_HEAD(virt_timer_list); diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 5ea5af3c7db7..b1900239b0ab 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_INVALID_WAKEUPS select SRCU select KVM_VFIO ---help--- diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 05f7de9869a9..1ea4095b67d7 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -14,6 +14,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/virtio-ccw.h> #include "kvm-s390.h" #include "trace.h" diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d30db40437dc..66938d283b77 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) } static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, - int write) + enum gacc_mode mode) { union alet alet; struct ale ale; @@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, } } - if (ale.fo == 1 && write) + if (ale.fo == 1 && mode == GACC_STORE) return PGM_PROTECTION; asce->val = aste.asce; @@ -477,25 +477,28 @@ enum { }; static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, int write) + ar_t ar, enum gacc_mode mode) { int rc; - psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; struct trans_exc_code_bits *tec_bits; memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw_bits(*psw).as; + tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw.as; - if (!psw_bits(*psw).t) { + if (!psw.t) { asce->val = 0; asce->r = 1; return 0; } - switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { + if (mode == GACC_IFETCH) + psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY; + + switch (psw.as) { case PSW_AS_PRIMARY: asce->val = vcpu->arch.sie_block->gcr[1]; return 0; @@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, asce->val = vcpu->arch.sie_block->gcr[13]; return 0; case PSW_AS_ACCREG: - rc = ar_translation(vcpu, asce, ar, write); + rc = ar_translation(vcpu, asce, ar, mode); switch (rc) { case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @gva: guest virtual address * @gpa: points to where guest physical (absolute) address should be stored * @asce: effective asce - * @write: indicates if access is a write access + * @mode: indicates the access mode to be used * * Translate a guest virtual address into a guest absolute address by means * of dynamic address translation as specified by the architecture. @@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, unsigned long *gpa, const union asce asce, - int write) + enum gacc_mode mode) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; @@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, real_address: raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); absolute_address: - if (write && dat_protection) + if (mode == GACC_STORE && dat_protection) return PGM_PROTECTION; if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) return PGM_ADDRESSING; @@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, - const union asce asce, int write) + const union asce asce, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); tec_bits->addr = ga >> PAGE_SHIFT; - if (write && lap_enabled && is_low_address(ga)) { + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { pgm->code = PGM_PROTECTION; return pgm->code; } ga &= PAGE_MASK; if (psw_bits(*psw).t) { - rc = guest_translate(vcpu, ga, pages, asce, write); + rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; if (rc == PGM_PROTECTION) @@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, } int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write) + unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; unsigned long _len, nr_pages, gpa, idx; @@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); + rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode == GACC_STORE) rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); else rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); @@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, } int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write) + void *data, unsigned long len, enum gacc_mode mode) { unsigned long _len, gpa; int rc = 0; @@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (write) + if (mode) rc = write_guest_abs(vcpu, gpa, data, _len); else rc = read_guest_abs(vcpu, gpa, data, _len); @@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * has to take care of this. */ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long *gpa, int write) + unsigned long *gpa, enum gacc_mode mode) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, gva = kvm_s390_logical_to_effective(vcpu, gva); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, write); + rc = get_vcpu_asce(vcpu, &asce, ar, mode); tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (write) { + if (mode == GACC_STORE) { rc = pgm->code = PGM_PROTECTION; return rc; } } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, write); + rc = guest_translate(vcpu, gva, gpa, asce, mode); if (rc > 0) { if (rc == PGM_PROTECTION) tec->b61 = 1; @@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, * check_gva_range - test a range of guest virtual addresses for accessibility */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write) + unsigned long length, enum gacc_mode mode) { unsigned long gpa; unsigned long currlen; @@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, ipte_lock(vcpu); while (length > 0 && !rc) { currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); + rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); gva += currlen; length -= currlen; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index ef03726cc661..df0a79dd8159 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, return kvm_read_guest(vcpu->kvm, gpa, data, len); } +enum gacc_mode { + GACC_FETCH, + GACC_STORE, + GACC_IFETCH, +}; + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - ar_t ar, unsigned long *gpa, int write); + ar_t ar, unsigned long *gpa, enum gacc_mode mode); int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, - unsigned long length, int is_write); + unsigned long length, enum gacc_mode mode); int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, - unsigned long len, int write); + unsigned long len, enum gacc_mode mode); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, - void *data, unsigned long len, int write); + void *data, unsigned long len, enum gacc_mode mode); /** * write_guest - copy data from kernel space to guest space @@ -215,7 +221,7 @@ static inline __must_check int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 1); + return access_guest(vcpu, ga, ar, data, len, GACC_STORE); } /** @@ -235,7 +241,27 @@ static inline __must_check int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, 0); + return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); +} + +/** + * read_guest_instr - copy instruction data from guest space to kernel space + * @vcpu: virtual cpu + * @data: destination address in kernel space + * @len: number of bytes to copy + * + * Copy @len bytes from the current psw address (guest space) to @data (kernel + * space). + * + * The behaviour of read_guest_instr is identical to read_guest, except that + * instruction data will be read from primary space when in home-space or + * address-space mode. + */ +static inline __must_check +int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) +{ + return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, + GACC_IFETCH); } /** diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d697312ce9ee..e8c6843b9600 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -17,7 +17,7 @@ /* * Extends the address range given by *start and *stop to include the address * range starting with estart and the length len. Takes care of overflowing - * intervals and tries to minimize the overall intervall size. + * intervals and tries to minimize the overall interval size. */ static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) { @@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu) return; /* - * If the guest is not interrested in branching events, we can savely + * If the guest is not interested in branching events, we can safely * limit them to the PER address range. */ if (!(*cr9 & PER_EVENT_BRANCH)) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index d53c10753c46..252157181302 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = { [0xeb] = kvm_s390_handle_eb, }; -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + u8 ilen = 0; - /* Use the length of the EXECUTE instruction if necessary */ - if (sie_block->icptstatus & 1) { - ilc = (sie_block->icptstatus >> 4) & 0x6; - if (!ilc) - ilc = 4; + switch (vcpu->arch.sie_block->icptcode) { + case ICPT_INST: + case ICPT_INSTPROGI: + case ICPT_OPEREXC: + case ICPT_PARTEXEC: + case ICPT_IOINST: + /* instruction only stored for these icptcodes */ + ilen = insn_length(vcpu->arch.sie_block->ipa >> 8); + /* Use the length of the EXECUTE instruction if necessary */ + if (sie_block->icptstatus & 1) { + ilen = (sie_block->icptstatus >> 4) & 0x6; + if (!ilen) + ilen = 4; + } + break; + case ICPT_PROGI: + /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */ + ilen = vcpu->arch.sie_block->pgmilc & 0x6; + break; } - sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc); + return ilen; } static int handle_noop(struct kvm_vcpu *vcpu) @@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } -static void __extract_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) +static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu) { - memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info)); - pgm_info->code = vcpu->arch.sie_block->iprcc; + struct kvm_s390_pgm_info pgm_info = { + .code = vcpu->arch.sie_block->iprcc, + /* the PSW has already been rewound */ + .flags = KVM_S390_PGM_FLAGS_NO_REWIND, + }; switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) { case PGM_AFX_TRANSLATION: @@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: case PGM_SPACE_SWITCH: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; break; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY: - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: @@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; - pgm_info->op_access_id = vcpu->arch.sie_block->oai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.op_access_id = vcpu->arch.sie_block->oai; break; case PGM_MONITOR: - pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; - pgm_info->mon_code = vcpu->arch.sie_block->tecmc; + pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn; + pgm_info.mon_code = vcpu->arch.sie_block->tecmc; break; case PGM_VECTOR_PROCESSING: case PGM_DATA: - pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; + pgm_info.data_exc_code = vcpu->arch.sie_block->dxc; break; case PGM_PROTECTION: - pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc; - pgm_info->exc_access_id = vcpu->arch.sie_block->eai; + pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc; + pgm_info.exc_access_id = vcpu->arch.sie_block->eai; break; default: break; } if (vcpu->arch.sie_block->iprcc & PGM_PER) { - pgm_info->per_code = vcpu->arch.sie_block->perc; - pgm_info->per_atmid = vcpu->arch.sie_block->peratmid; - pgm_info->per_address = vcpu->arch.sie_block->peraddr; - pgm_info->per_access_id = vcpu->arch.sie_block->peraid; + pgm_info.per_code = vcpu->arch.sie_block->perc; + pgm_info.per_atmid = vcpu->arch.sie_block->peratmid; + pgm_info.per_address = vcpu->arch.sie_block->peraddr; + pgm_info.per_access_id = vcpu->arch.sie_block->peraid; } + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } /* @@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu) static int handle_prog(struct kvm_vcpu *vcpu) { - struct kvm_s390_pgm_info pgm_info; psw_t psw; int rc; @@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu) if (rc) return rc; - __extract_prog_irq(vcpu, &pgm_info); - return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + return inject_prog_on_prog_intercept(vcpu); } /** @@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the source is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - reg2, &srcaddr, 0); + reg2, &srcaddr, GACC_FETCH); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); @@ -311,20 +327,22 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the destination is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - reg1, &dstaddr, 1); + reg1, &dstaddr, GACC_STORE); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); if (rc != 0) return rc; - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); return 0; } static int handle_partial_execution(struct kvm_vcpu *vcpu) { + vcpu->stat.exit_pei++; + if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */ return handle_mvpg_pei(vcpu); if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f88ca72c3a77..5a80af740d3e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -23,6 +23,7 @@ #include <asm/uaccess.h> #include <asm/sclp.h> #include <asm/isc.h> +#include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -182,8 +183,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) { - return (vcpu->arch.sie_block->cputm >> 63) && - cpu_timer_interrupts_enabled(vcpu); + if (!cpu_timer_interrupts_enabled(vcpu)) + return 0; + return kvm_s390_get_cpu_timer(vcpu) >> 63; } static inline int is_ioirq(unsigned long irq_type) @@ -335,23 +337,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu) set_intercept_indicators_stop(vcpu); } -static u16 get_ilc(struct kvm_vcpu *vcpu) -{ - switch (vcpu->arch.sie_block->icptcode) { - case ICPT_INST: - case ICPT_INSTPROGI: - case ICPT_OPEREXC: - case ICPT_PARTEXEC: - case ICPT_IOINST: - /* last instruction only stored for these icptcodes */ - return insn_length(vcpu->arch.sie_block->ipa >> 8); - case ICPT_PROGI: - return vcpu->arch.sie_block->pgmilc; - default: - return 0; - } -} - static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -588,7 +573,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_pgm_info pgm_info; int rc = 0, nullifying = false; - u16 ilc = get_ilc(vcpu); + u16 ilen; spin_lock(&li->lock); pgm_info = li->irq.pgm; @@ -596,8 +581,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) memset(&li->irq.pgm, 0, sizeof(pgm_info)); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", - pgm_info.code, ilc); + ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; + VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", + pgm_info.code, ilen); vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info.code, 0); @@ -681,10 +667,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *) __LC_PER_ACCESS_ID); } - if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) - kvm_s390_rewind_psw(vcpu, ilc); + if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND)) + kvm_s390_rewind_psw(vcpu, ilen); - rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + /* bit 1+2 of the target are the ilc, so we can directly use ilen */ + rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, @@ -923,9 +910,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } +static u64 __calculate_sltime(struct kvm_vcpu *vcpu) +{ + u64 now, cputm, sltime = 0; + + if (ckc_interrupts_enabled(vcpu)) { + now = kvm_s390_get_tod_clock_fast(vcpu->kvm); + sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + /* already expired or overflow? */ + if (!sltime || vcpu->arch.sie_block->ckc <= now) + return 0; + if (cpu_timer_interrupts_enabled(vcpu)) { + cputm = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (cputm >> 63) + return 0; + return min(sltime, tod_to_ns(cputm)); + } + } else if (cpu_timer_interrupts_enabled(vcpu)) { + sltime = kvm_s390_get_cpu_timer(vcpu); + /* already expired? */ + if (sltime >> 63) + return 0; + } + return sltime; +} + int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { - u64 now, sltime; + u64 sltime; vcpu->stat.exit_wait_state++; @@ -938,22 +951,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } - if (!ckc_interrupts_enabled(vcpu)) { + if (!ckc_interrupts_enabled(vcpu) && + !cpu_timer_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); __set_cpu_idle(vcpu); goto no_timer; } - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); - - /* underflow */ - if (vcpu->arch.sie_block->ckc < now) + sltime = __calculate_sltime(vcpu); + if (!sltime) return 0; __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); - VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); + VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); @@ -966,13 +977,18 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - if (waitqueue_active(&vcpu->wq)) { + /* + * We cannot move this into the if, as the CPU might be already + * in kvm_vcpu_block without having the waitqueue set (polling) + */ + vcpu->valid_wakeup = true; + if (swait_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ vcpu->preempted = true; - wake_up_interruptible(&vcpu->wq); + swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -980,18 +996,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; - u64 now, sltime; + u64 sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - now = kvm_s390_get_tod_clock_fast(vcpu->kvm); - sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); + sltime = __calculate_sltime(vcpu); /* * If the monotonic clock runs faster than the tod clock we might be * woken up too early and have to go back to sleep to avoid deadlocks. */ - if (vcpu->arch.sie_block->ckc > now && - hrtimer_forward_now(timer, ns_to_ktime(sltime))) + if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) return HRTIMER_RESTART; kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART; @@ -1059,8 +1073,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, irq->u.pgm.code, 0); + if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { + /* auto detection if no valid ILC was given */ + irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK; + irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu); + irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID; + } + if (irq->u.pgm.code == PGM_PER) { li->irq.pgm.code |= PGM_PER; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify PER related information */ li->irq.pgm.per_address = irq->u.pgm.per_address; li->irq.pgm.per_code = irq->u.pgm.per_code; @@ -1069,6 +1091,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) } else if (!(irq->u.pgm.code & PGM_PER)) { li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | irq->u.pgm.code; + li->irq.pgm.flags = irq->u.pgm.flags; /* only modify non-PER information */ li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; li->irq.pgm.mon_code = irq->u.pgm.mon_code; @@ -2016,6 +2039,27 @@ static int modify_io_adapter(struct kvm_device *dev, return ret; } +static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) + +{ + const u64 isc_mask = 0xffUL << 24; /* all iscs set */ + u32 schid; + + if (attr->flags) + return -EINVAL; + if (attr->attr != sizeof(schid)) + return -EINVAL; + if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid))) + return -EFAULT; + kfree(kvm_s390_get_io_int(kvm, isc_mask, schid)); + /* + * If userspace is conforming to the architecture, we can have at most + * one pending I/O interrupt per subchannel, so this is effectively a + * clear all. + */ + return 0; +} + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; @@ -2049,6 +2093,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_FLIC_ADAPTER_MODIFY: r = modify_io_adapter(dev, attr); break; + case KVM_DEV_FLIC_CLEAR_IO_IRQ: + r = clear_io_irq(dev->kvm, attr); + break; default: r = -EINVAL; } @@ -2056,6 +2103,23 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } +static int flic_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_FLIC_GET_ALL_IRQS: + case KVM_DEV_FLIC_ENQUEUE: + case KVM_DEV_FLIC_CLEAR_IRQS: + case KVM_DEV_FLIC_APF_ENABLE: + case KVM_DEV_FLIC_APF_DISABLE_WAIT: + case KVM_DEV_FLIC_ADAPTER_REGISTER: + case KVM_DEV_FLIC_ADAPTER_MODIFY: + case KVM_DEV_FLIC_CLEAR_IO_IRQ: + return 0; + } + return -ENXIO; +} + static int flic_create(struct kvm_device *dev, u32 type) { if (!dev) @@ -2077,6 +2141,7 @@ struct kvm_device_ops kvm_flic_ops = { .name = "kvm-flic", .get_attr = flic_get_attr, .set_attr = flic_set_attr, + .has_attr = flic_has_attr, .create = flic_create, .destroy = flic_destroy, }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 03dfe9c667f4..43f2a2b80490 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include <asm/lowcore.h> #include <asm/etr.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/nmi.h> #include <asm/switch_to.h> #include <asm/isc.h> @@ -60,10 +61,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_external_request", VCPU_STAT(exit_external_request) }, { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, @@ -117,9 +120,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { }; /* upper facilities limit for kvm */ -unsigned long kvm_s390_fac_list_mask[] = { - 0xffe6fffbfcfdfc40UL, - 0x005e800000000000UL, +unsigned long kvm_s390_fac_list_mask[16] = { + 0xffe6000000000000UL, + 0x005e000000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) @@ -158,6 +161,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, kvm->arch.epoch -= *delta; kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.sie_block->epoch -= *delta; + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start += *delta; } } return NOTIFY_OK; @@ -274,16 +279,17 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, unsigned long address; struct gmap *gmap = kvm->arch.gmap; - down_read(&gmap->mm->mmap_sem); /* Loop over all guest pages */ last_gfn = memslot->base_gfn + memslot->npages; for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { address = gfn_to_hva_memslot(memslot, cur_gfn); - if (gmap_test_and_clear_dirty(address, gmap)) + if (test_and_clear_guest_dirty(gmap->mm, address)) mark_page_dirty(kvm, cur_gfn); + if (fatal_signal_pending(current)) + return; + cond_resched(); } - up_read(&gmap->mm->mmap_sem); } /* Section: vm related */ @@ -352,8 +358,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) if (atomic_read(&kvm->online_vcpus)) { r = -EBUSY; } else if (MACHINE_HAS_VX) { - set_kvm_facility(kvm->arch.model.fac->mask, 129); - set_kvm_facility(kvm->arch.model.fac->list, 129); + set_kvm_facility(kvm->arch.model.fac_mask, 129); + set_kvm_facility(kvm->arch.model.fac_list, 129); r = 0; } else r = -EINVAL; @@ -367,8 +373,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) if (atomic_read(&kvm->online_vcpus)) { r = -EBUSY; } else if (test_facility(64)) { - set_kvm_facility(kvm->arch.model.fac->mask, 64); - set_kvm_facility(kvm->arch.model.fac->list, 64); + set_kvm_facility(kvm->arch.model.fac_mask, 64); + set_kvm_facility(kvm->arch.model.fac_list, 64); r = 0; } mutex_unlock(&kvm->lock); @@ -634,6 +640,7 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_cpu_processor *proc; + u16 lowest_ibc, unblocked_ibc; int ret = 0; mutex_lock(&kvm->lock); @@ -648,10 +655,18 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) } if (!copy_from_user(proc, (void __user *)attr->addr, sizeof(*proc))) { - memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, - sizeof(struct cpuid)); - kvm->arch.model.ibc = proc->ibc; - memcpy(kvm->arch.model.fac->list, proc->fac_list, + kvm->arch.model.cpuid = proc->cpuid; + lowest_ibc = sclp.ibc >> 16 & 0xfff; + unblocked_ibc = sclp.ibc & 0xfff; + if (lowest_ibc && proc->ibc) { + if (proc->ibc > unblocked_ibc) + kvm->arch.model.ibc = unblocked_ibc; + else if (proc->ibc < lowest_ibc) + kvm->arch.model.ibc = lowest_ibc; + else + kvm->arch.model.ibc = proc->ibc; + } + memcpy(kvm->arch.model.fac_list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); } else ret = -EFAULT; @@ -683,9 +698,10 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENOMEM; goto out; } - memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); + proc->cpuid = kvm->arch.model.cpuid; proc->ibc = kvm->arch.model.ibc; - memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); + memcpy(&proc->fac_list, kvm->arch.model.fac_list, + S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -705,7 +721,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) } get_cpu_id((struct cpuid *) &mach->cpuid); mach->ibc = sclp.ibc; - memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, + memcpy(&mach->fac_mask, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); @@ -1076,22 +1092,21 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm) kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; } -static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) +static u64 kvm_s390_get_initial_cpuid(void) { - get_cpu_id(cpu_id); - cpu_id->version = 0xff; + struct cpuid cpuid; + + get_cpu_id(&cpuid); + cpuid.version = 0xff; + return *((u64 *) &cpuid); } -static int kvm_s390_crypto_init(struct kvm *kvm) +static void kvm_s390_crypto_init(struct kvm *kvm) { if (!test_kvm_facility(kvm, 76)) - return 0; - - kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb), - GFP_KERNEL | GFP_DMA); - if (!kvm->arch.crypto.crycb) - return -ENOMEM; + return; + kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; kvm_s390_set_crycb_format(kvm); /* Enable AES/DEA protected key functions by default */ @@ -1101,8 +1116,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm) sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); - - return 0; } static void sca_dispose(struct kvm *kvm) @@ -1156,37 +1169,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.dbf) goto out_err; - /* - * The architectural maximum amount of facilities is 16 kbit. To store - * this amount, 2 kbyte of memory is required. Thus we need a full - * page to hold the guest facility list (arch.model.fac->list) and the - * facility mask (arch.model.fac->mask). Its address size has to be - * 31 bits and word aligned. - */ - kvm->arch.model.fac = - (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!kvm->arch.model.fac) + kvm->arch.sie_page2 = + (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!kvm->arch.sie_page2) goto out_err; /* Populate the facility mask initially. */ - memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, + memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) - kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; + kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; else - kvm->arch.model.fac->mask[i] = 0UL; + kvm->arch.model.fac_mask[i] = 0UL; } /* Populate the facility list initially. */ - memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, + kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; + memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); - kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); + kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; - if (kvm_s390_crypto_init(kvm) < 0) - goto out_err; + kvm_s390_crypto_init(kvm); spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) @@ -1222,8 +1228,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; out_err: - kfree(kvm->arch.crypto.crycb); - free_page((unsigned long)kvm->arch.model.fac); + free_page((unsigned long)kvm->arch.sie_page2); debug_unregister(kvm->arch.dbf); sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); @@ -1269,10 +1274,9 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); - free_page((unsigned long)kvm->arch.model.fac); sca_dispose(kvm); debug_unregister(kvm->arch.dbf); - kfree(kvm->arch.crypto.crycb); + free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); @@ -1414,8 +1418,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_PFAULT; if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; - if (test_kvm_facility(vcpu->kvm, 129)) + /* fprs can be synchronized via vrs, even if the guest has no vx. With + * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. + */ + if (MACHINE_HAS_VX) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; + else + vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS; if (kvm_is_ucontrol(vcpu->kvm)) return __kvm_ucontrol_vcpu_init(vcpu); @@ -1423,6 +1432,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_start != 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + vcpu->arch.cputm_start = get_tod_clock_fast(); + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_start == 0); + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start; + vcpu->arch.cputm_start = 0; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(vcpu->arch.cputm_enabled); + vcpu->arch.cputm_enabled = true; + __start_cpu_timer_accounting(vcpu); +} + +/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ +static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!vcpu->arch.cputm_enabled); + __stop_cpu_timer_accounting(vcpu); + vcpu->arch.cputm_enabled = false; +} + +static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __enable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + __disable_cpu_timer_accounting(vcpu); + preempt_enable(); +} + +/* set the cpu timer - may only be called from the VCPU thread itself */ +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm) +{ + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); + if (vcpu->arch.cputm_enabled) + vcpu->arch.cputm_start = get_tod_clock_fast(); + vcpu->arch.sie_block->cputm = cputm; + raw_write_seqcount_end(&vcpu->arch.cputm_seqcount); + preempt_enable(); +} + +/* update and get the cpu timer - can also be called from other VCPU threads */ +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu) +{ + unsigned int seq; + __u64 value; + + if (unlikely(!vcpu->arch.cputm_enabled)) + return vcpu->arch.sie_block->cputm; + + preempt_disable(); /* protect from TOD sync and vcpu_load/put */ + do { + seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); + /* + * If the writer would ever execute a read in the critical + * section, e.g. in irq context, we have a deadlock. + */ + WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu); + value = vcpu->arch.sie_block->cputm; + /* if cputm_start is 0, accounting is being started/stopped */ + if (likely(vcpu->arch.cputm_start)) + value -= get_tod_clock_fast() - vcpu->arch.cputm_start; + } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1)); + preempt_enable(); + return value; +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Save host register state */ @@ -1430,10 +1526,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; - /* Depending on MACHINE_HAS_VX, data stored to vrs either - * has vector register or floating point register format. - */ - current->thread.fpu.regs = vcpu->run->s.regs.vrs; + if (MACHINE_HAS_VX) + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + else + current->thread.fpu.regs = vcpu->run->s.regs.fprs; current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ @@ -1443,10 +1539,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) + __start_cpu_timer_accounting(vcpu); + vcpu->cpu = cpu; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + vcpu->cpu = -1; + if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu)) + __stop_cpu_timer_accounting(vcpu); atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); @@ -1468,7 +1570,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gpsw.mask = 0UL; vcpu->arch.sie_block->gpsw.addr = 0UL; kvm_s390_set_prefix(vcpu, 0); - vcpu->arch.sie_block->cputm = 0UL; + kvm_s390_set_cpu_timer(vcpu, 0); vcpu->arch.sie_block->ckc = 0UL; vcpu->arch.sie_block->todpr = 0; memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); @@ -1536,9 +1638,9 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) { struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; - vcpu->arch.cpu_id = model->cpu_id; vcpu->arch.sie_block->ibc = model->ibc; - vcpu->arch.sie_block->fac = (int) (long) model->fac->list; + if (test_kvm_facility(vcpu->kvm, 7)) + vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) @@ -1556,11 +1658,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - vcpu->arch.sie_block->ecb = 6; + vcpu->arch.sie_block->ecb = 0x02; + if (test_kvm_facility(vcpu->kvm, 9)) + vcpu->arch.sie_block->ecb |= 0x04; if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; - vcpu->arch.sie_block->ecb2 = 8; + if (test_kvm_facility(vcpu->kvm, 8)) + vcpu->arch.sie_block->ecb2 |= 0x08; vcpu->arch.sie_block->eca = 0xC1002000U; if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; @@ -1616,6 +1721,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + seqcount_init(&vcpu->arch.cputm_seqcount); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) @@ -1715,7 +1821,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = put_user(vcpu->arch.sie_block->cputm, + r = put_user(kvm_s390_get_cpu_timer(vcpu), (u64 __user *)reg->addr); break; case KVM_REG_S390_CLOCK_COMP: @@ -1753,6 +1859,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { int r = -EINVAL; + __u64 val; switch (reg->id) { case KVM_REG_S390_TODPR: @@ -1764,8 +1871,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, (u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER: - r = get_user(vcpu->arch.sie_block->cputm, - (u64 __user *)reg->addr); + r = get_user(val, (u64 __user *)reg->addr); + if (!r) + kvm_s390_set_cpu_timer(vcpu, val); break; case KVM_REG_S390_CLOCK_COMP: r = get_user(vcpu->arch.sie_block->ckc, @@ -2158,8 +2266,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - u8 opcode; + struct kvm_s390_pgm_info pgm_info = { + .code = PGM_ADDRESSING, + }; + u8 opcode, ilen; int rc; VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); @@ -2173,12 +2283,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) * to look up the current opcode to get the length of the instruction * to be able to forward the PSW. */ - rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); - psw->addr = __rewind_psw(*psw, -insn_length(opcode)); - - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + rc = read_guest_instr(vcpu, &opcode, 1); + ilen = insn_length(opcode); + if (rc < 0) { + return rc; + } else if (rc) { + /* Instruction-Fetching Exceptions - we can't detect the ilen. + * Forward by arbitrary ilc, injection will take care of + * nullification if necessary. + */ + pgm_info = vcpu->arch.pgm; + ilen = 4; + } + pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID; + kvm_s390_forward_psw(vcpu, ilen); + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -2244,10 +2363,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ local_irq_disable(); __kvm_guest_enter(); + __disable_cpu_timer_accounting(vcpu); local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); local_irq_disable(); + __enable_cpu_timer_accounting(vcpu); __kvm_guest_exit(); local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); @@ -2271,7 +2392,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { - vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm; + kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; @@ -2293,7 +2414,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu); memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); - kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm; + kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; @@ -2325,6 +2446,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } sync_regs(vcpu, kvm_run); + enable_cpu_timer_accounting(vcpu); might_fault(); rc = __vcpu_run(vcpu); @@ -2344,6 +2466,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } + disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); if (vcpu->sigset_active) @@ -2364,7 +2487,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) unsigned char archmode = 1; freg_t fprs[NUM_FPRS]; unsigned int px; - u64 clkcomp; + u64 clkcomp, cputm; int rc; px = kvm_s390_get_prefix(vcpu); @@ -2386,7 +2509,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) fprs, 128); } else { rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, - vcpu->run->s.regs.vrs, 128); + vcpu->run->s.regs.fprs, 128); } rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); @@ -2398,8 +2521,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) &vcpu->run->s.regs.fpc, 4); rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); + cputm = kvm_s390_get_cpu_timer(vcpu); rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, - &vcpu->arch.sie_block->cputm, 8); + &cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); @@ -2605,7 +2729,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_FETCH); break; } r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); @@ -2616,7 +2741,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, + mop->size, GACC_STORE); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { @@ -2861,13 +2987,31 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, return; } +static inline unsigned long nonhyp_mask(int i) +{ + unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30; + + return 0x0000ffffffffffffUL >> (nonhyp_fai << 4); +} + +void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) +{ + vcpu->valid_wakeup = false; +} + static int __init kvm_s390_init(void) { + int i; + if (!sclp.has_sief2) { pr_info("SIE not available\n"); return -ENODEV; } + for (i = 0; i < 16; i++) + kvm_s390_fac_list_mask[i] |= + S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index df1abada1f36..8621ab00ec8e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,6 +19,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/facility.h> +#include <asm/processor.h> typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); @@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED; } +static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT; +} + static inline int kvm_is_ucontrol(struct kvm *kvm) { #ifdef CONFIG_KVM_S390_UCONTROL @@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) /* test availability of facility in a kvm instance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { - return __test_facility(nr, kvm->arch.model.fac->mask) && - __test_facility(nr, kvm->arch.model.fac->list); + return __test_facility(nr, kvm->arch.model.fac_mask) && + __test_facility(nr, kvm->arch.model.fac_list); } static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) @@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm, int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in intercept.c */ -void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc); +u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu); int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); +static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen) +{ + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; + + sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen); +} +static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen) +{ + kvm_s390_rewind_psw(vcpu, -ilen); +} +static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) +{ + kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu)); +} /* implemented in priv.c */ int is_valid_psw(psw_t *psw); @@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); unsigned long kvm_s390_fac_list_mask_size(void); extern unsigned long kvm_s390_fac_list_mask[]; +void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); +__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed74e86d9b9e..95916fa7c670 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -23,6 +23,7 @@ #include <asm/sysinfo.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> +#include <asm/gmap.h> #include <asm/io.h> #include <asm/ptrace.h> #include <asm/compat.h> @@ -173,7 +174,7 @@ static int handle_skey(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return 0; } @@ -184,7 +185,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu) if (psw_bits(vcpu->arch.sie_block->gpsw).p) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu)); - kvm_s390_rewind_psw(vcpu, 4); + kvm_s390_retry_instr(vcpu); VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation"); return 0; } @@ -354,7 +355,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * We need to shift the lower 32 facility bits (bit 0-31) from a u64 * into a u32 memory representation. They will remain bits 0-31. */ - fac = *vcpu->kvm->arch.model.fac->list >> 32; + fac = *vcpu->kvm->arch.model.fac_list >> 32; rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) @@ -438,7 +439,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) static int handle_stidp(struct kvm_vcpu *vcpu) { - u64 stidp_data = vcpu->arch.stidp_data; + u64 stidp_data = vcpu->kvm->arch.model.cpuid; u64 operand2; int rc; ar_t ar; @@ -669,8 +670,9 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* Only provide non-quiescing support if the host supports it */ - if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14)) + /* Only provide non-quiescing support if enabled for the guest */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && + !test_kvm_facility(vcpu->kvm, 14)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* No support for conditional-SSKE */ @@ -743,7 +745,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) { /* entries expected to be 1FF */ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; - unsigned long *cbrlo, cbrle; + unsigned long *cbrlo; struct gmap *gmap; int i; @@ -759,22 +761,14 @@ static int handle_essa(struct kvm_vcpu *vcpu) if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* Rewind PSW to repeat the ESSA instruction */ - kvm_s390_rewind_psw(vcpu, 4); + /* Retry the ESSA instruction */ + kvm_s390_retry_instr(vcpu); vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); down_read(&gmap->mm->mmap_sem); - for (i = 0; i < entries; ++i) { - cbrle = cbrlo[i]; - if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE)) - /* invalid entry */ - break; - /* try to free backing */ - __gmap_zap(gmap, cbrle); - } + for (i = 0; i < entries; ++i) + __gmap_zap(gmap, cbrlo[i]); up_read(&gmap->mm->mmap_sem); - if (i < entries) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); return 0; } @@ -981,11 +975,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); + ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); if (ret == PGM_PROTECTION) { /* Write protected? Try again with read-only... */ cc = 1; - ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); + ret = guest_translate_address(vcpu, address1, ar, &gpa, + GACC_FETCH); } if (ret) { if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 77c22d685c7a..28ea0cab1f1b 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -240,6 +240,12 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, struct kvm_s390_local_interrupt *li; int rc; + if (!test_kvm_facility(vcpu->kvm, 9)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_ORDER; + return SIGP_CC_STATUS_STORED; + } + li = &dst_vcpu->arch.local_int; if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { /* running */ diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 0e8fefe5b0ce..1d1af31e8354 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # lib-y += delay.o string.o uaccess.o find.o -obj-y += mem.o +obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index d4549c964589..e5f50a7d2f4e 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -105,6 +105,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) if (_raw_compare_and_swap(&lp->lock, 0, cpu)) return; local_irq_restore(flags); + continue; } /* Check if the lock owner is running. */ if (first_diag && cpu_is_preempted(~owner)) { diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c new file mode 100644 index 000000000000..7d94e3ec34a9 --- /dev/null +++ b/arch/s390/lib/xor.c @@ -0,0 +1,134 @@ +/* + * Optimized xor_block operation for RAID4/5 + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/raid/xor.h> + +static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + "3:\n" + : : "d" (bytes), "a" (p1), "a" (p2) + : "0", "1", "cc", "memory"); +} + +static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + /* Get around a gcc oddity */ + register unsigned long *reg7 asm ("7") = p5; + + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " xc 0(256,%1),0(%5)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " la %5,256(%5)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " ex %0,18(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + " xc 0(1,%1),0(%5)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), + "+a" (reg7) + : : "0", "1", "cc", "memory"); +} + +struct xor_block_template xor_block_xc = { + .name = "xc", + .do_2 = xor_xc_2, + .do_3 = xor_xc_3, + .do_4 = xor_xc_4, + .do_5 = xor_xc_5, +}; +EXPORT_SYMBOL(xor_block_xc); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 839592ca265c..0aa0ad165d8b 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,9 +2,11 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o -obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o +obj-y += page-states.o gup.o pageattr.o mem_detect.o +obj-y += pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_PGSTE) += gmap.o diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c deleted file mode 100644 index 18c8b819b0aa..000000000000 --- a/arch/s390/mm/extable.c +++ /dev/null @@ -1,85 +0,0 @@ -#include <linux/module.h> -#include <linux/sort.h> -#include <asm/uaccess.h> - -/* - * Search one exception table for an entry corresponding to the - * given instruction address, and return the address of the entry, - * or NULL if none is found. - * We use a binary search, and thus we assume that the table is - * already sorted. - */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - const struct exception_table_entry *mid; - unsigned long addr; - - while (first <= last) { - mid = ((last - first) >> 1) + first; - addr = extable_insn(mid); - if (addr < value) - first = mid + 1; - else if (addr > value) - last = mid - 1; - else - return mid; - } - return NULL; -} - -/* - * The exception table needs to be sorted so that the binary - * search that we use to find entries in it works properly. - * This is used both for the kernel exception table and for - * the exception tables of modules that get loaded. - * - */ -static int cmp_ex(const void *a, const void *b) -{ - const struct exception_table_entry *x = a, *y = b; - - /* This compare is only valid after normalization. */ - return x->insn - y->insn; -} - -void sort_extable(struct exception_table_entry *start, - struct exception_table_entry *finish) -{ - struct exception_table_entry *p; - int i; - - /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) { - p->insn += i; - p->fixup += i + 4; - } - sort(start, finish - start, sizeof(*start), cmp_ex, NULL); - /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) { - p->insn -= i; - p->fixup -= i + 4; - } -} - -#ifdef CONFIG_MODULES -/* - * If the exception table is sorted, any referring to the module init - * will be at the beginning or the end. - */ -void trim_init_extable(struct module *m) -{ - /* Trim the beginning */ - while (m->num_exentries && - within_module_init(extable_insn(&m->extable[0]), m)) { - m->extable++; - m->num_exentries--; - } - /* Trim the end */ - while (m->num_exentries && - within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m)) - m->num_exentries--; -} -#endif /* CONFIG_MODULES */ diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index a1bf4ad8925d..02042b6b66bf 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg) goto out_free; } if (diag_cc > 1) { - pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc); + pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } @@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_resource; } if (diag_cc > 1) { - pr_warning("Loading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); @@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared) goto out_unlock; } if (atomic_read (&seg->ref_count) != 1) { - pr_warning("DCSS %s is in use and cannot be reloaded\n", - name); + pr_warn("DCSS %s is in use and cannot be reloaded\n", name); rc = -EAGAIN; goto out_unlock; } @@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared) seg->res->flags |= IORESOURCE_READONLY; if (request_resource(&iomem_resource, seg->res)) { - pr_warning("DCSS %s overlaps with used memory resources " - "and cannot be reloaded\n", name); + pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n", + name); rc = -EBUSY; kfree(seg->res); goto out_del_mem; @@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared) goto out_del_res; } if (diag_cc > 1) { - pr_warning("Reloading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Reloading DCSS %s failed with rc=%ld\n", + name, end_addr); rc = dcss_diag_translate_rc(end_addr); goto out_del_res; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 791a4146052c..19288c1b36d3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include <asm/asm-offsets.h> #include <asm/diag.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/facility.h> @@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs) { unsigned long asce; + pr_alert("Failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); pr_alert("Fault in "); switch (regs->int_parm_long & 3) { case 3: @@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs) dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); } -static inline void report_user_fault(struct pt_regs *regs, long signr) +int show_unhandled_signals = 1; + +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -230,9 +235,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr); printk(KERN_CONT "\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); - dump_fault_info(regs); + if (is_mm_fault) + dump_fault_info(regs); show_regs(regs); } @@ -244,8 +248,9 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 1); si.si_signo = SIGSEGV; + si.si_errno = 0; si.si_code = si_code; si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); force_sig_info(SIGSEGV, &si, current); @@ -272,8 +277,6 @@ static noinline void do_no_context(struct pt_regs *regs) else printk(KERN_ALERT "Unable to handle kernel paging request" " in virtual user address space\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); dump_fault_info(regs); die(regs, "Oops"); do_exit(SIGKILL); @@ -629,6 +632,29 @@ void pfault_fini(void) static DEFINE_SPINLOCK(pfault_lock); static LIST_HEAD(pfault_list); +#define PF_COMPLETE 0x0080 + +/* + * The mechanism of our pfault code: if Linux is running as guest, runs a user + * space process and the user space process accesses a page that the host has + * paged out we get a pfault interrupt. + * + * This allows us, within the guest, to schedule a different process. Without + * this mechanism the host would have to suspend the whole virtual cpu until + * the page has been paged in. + * + * So when we get such an interrupt then we set the state of the current task + * to uninterruptible and also set the need_resched flag. Both happens within + * interrupt context(!). If we later on want to return to user space we + * recognize the need_resched flag and then call schedule(). It's not very + * obvious how this works... + * + * Of course we have a lot of additional fun with the completion interrupt (-> + * host signals that a page of a process has been paged in and the process can + * continue to run). This interrupt can arrive on any cpu and, since we have + * virtual cpus, actually appear before the interrupt that signals that a page + * is missing. + */ static void pfault_interrupt(struct ext_code ext_code, unsigned int param32, unsigned long param64) { @@ -637,10 +663,9 @@ static void pfault_interrupt(struct ext_code ext_code, pid_t pid; /* - * Get the external interruption subcode & pfault - * initial/completion signal bit. VM stores this - * in the 'cpu address' field associated with the - * external interrupt. + * Get the external interruption subcode & pfault initial/completion + * signal bit. VM stores this in the 'cpu address' field associated + * with the external interrupt. */ subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) @@ -656,7 +681,7 @@ static void pfault_interrupt(struct ext_code ext_code, if (!tsk) return; spin_lock(&pfault_lock); - if (subcode & 0x0080) { + if (subcode & PF_COMPLETE) { /* signal bit is set -> a page has been swapped in by VM */ if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion @@ -685,8 +710,7 @@ static void pfault_interrupt(struct ext_code ext_code, goto out; if (tsk->thread.pfault_wait == 1) { /* Already on the list with a reference: put to sleep */ - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); - set_tsk_need_resched(tsk); + goto block; } else if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait @@ -701,7 +725,11 @@ static void pfault_interrupt(struct ext_code ext_code, get_task_struct(tsk); tsk->thread.pfault_wait = 1; list_add(&tsk->thread.list, &pfault_list); - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); +block: + /* Since this must be a userspace fault, there + * is no kernel task state to trample. Rely on the + * return to userspace schedule() to block. */ + __set_current_state(TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); } } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c new file mode 100644 index 000000000000..cace818d86eb --- /dev/null +++ b/arch/s390/mm/gmap.c @@ -0,0 +1,774 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/swapops.h> +#include <linux/ksm.h> +#include <linux/mman.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/gmap.h> +#include <asm/tlb.h> + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * @limit: maximum address of the gmap address space + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + unsigned long etype, atype; + + if (limit < (1UL << 31)) { + limit = (1UL << 31) - 1; + atype = _ASCE_TYPE_SEGMENT; + etype = _SEGMENT_ENTRY_EMPTY; + } else if (limit < (1UL << 42)) { + limit = (1UL << 42) - 1; + atype = _ASCE_TYPE_REGION3; + etype = _REGION3_ENTRY_EMPTY; + } else if (limit < (1UL << 53)) { + limit = (1UL << 53) - 1; + atype = _ASCE_TYPE_REGION2; + etype = _REGION2_ENTRY_EMPTY; + } else { + limit = -1UL; + atype = _ASCE_TYPE_REGION1; + etype = _REGION1_ENTRY_EMPTY; + } + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + spin_lock_init(&gmap->guest_table_lock); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + goto out_free; + page->index = 0; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, etype); + gmap->table = table; + gmap->asce = atype | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + gmap->asce_end = limit; + down_write(&mm->mmap_sem); + list_add(&gmap->list, &mm->context.gmap_list); + up_write(&mm->mmap_sem); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); +} + +static void gmap_radix_tree_free(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + radix_tree_delete(root, index); + } + } while (nr > 0); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) + __free_pages(page, 2); + gmap_radix_tree_free(&gmap->guest_to_host); + gmap_radix_tree_free(&gmap->host_to_guest); + down_write(&gmap->mm->mmap_sem); + list_del(&gmap->list); + up_write(&gmap->mm->mmap_sem); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, + unsigned long init, unsigned long gaddr) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + spin_lock(&gmap->mm->page_table_lock); + if (*table & _REGION_ENTRY_INVALID) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + page->index = gaddr; + page = NULL; + } + spin_unlock(&gmap->mm->page_table_lock); + if (page) + __free_pages(page, 2); + return 0; +} + +/** + * __gmap_segment_gaddr - find virtual address from segment pointer + * @entry: pointer to a segment table entry in the guest address space + * + * Returns the virtual address in the guest address space for the segment + */ +static unsigned long __gmap_segment_gaddr(unsigned long *entry) +{ + struct page *page; + unsigned long offset, mask; + + offset = (unsigned long) entry / sizeof(unsigned long); + offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + page = virt_to_page((void *)((unsigned long) entry & mask)); + return page->index + offset; +} + +/** + * __gmap_unlink_by_vmaddr - unlink a single segment via a host address + * @gmap: pointer to the guest address space structure + * @vmaddr: address in the host process address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +{ + unsigned long *entry; + int flush = 0; + + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); + if (entry) { + flush = (*entry != _SEGMENT_ENTRY_INVALID); + *entry = _SEGMENT_ENTRY_INVALID; + } + spin_unlock(&gmap->guest_table_lock); + return flush; +} + +/** + * __gmap_unmap_by_gaddr - unmap a single segment via a guest address + * @gmap: pointer to the guest address space structure + * @gaddr: address in the guest address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @to: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_map_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * @len: length of the memory area to map + * + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len < from || to + len < to || + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) { + /* Remove old translation */ + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + /* Store new translation */ + if (radix_tree_insert(&gmap->guest_to_host, + (to + off) >> PMD_SHIFT, + (void *) from + off)) + break; + } + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + if (off >= len) + return 0; + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +/** + * __gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(gmap, gaddr); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +/** + * gmap_unlink - disconnect a page table from the gmap shadow tables + * @gmap: pointer to guest mapping meta data structure + * @table: pointer to the host page table + * @vmaddr: vm address associated with the host page table + */ +void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) +{ + struct gmap *gmap; + int flush; + + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); + if (flush) + gmap_flush_tlb(gmap); + } +} + +/** + * gmap_link - set up shadow page tables to connect a host to a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @vmaddr: vm address + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +{ + struct mm_struct *mm; + unsigned long *table; + spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + int rc; + + /* Create higher level tables in the gmap page table */ + table = gmap->table; + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { + table += (gaddr >> 53) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, + gaddr & 0xffe0000000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { + table += (gaddr >> 42) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, + gaddr & 0xfffffc0000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { + table += (gaddr >> 31) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, + gaddr & 0xffffffff80000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + table += (gaddr >> 20) & 0x7ff; + /* Walk the parent mm page table */ + mm = gmap->mm; + pgd = pgd_offset(mm, vmaddr); + VM_BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, vmaddr); + VM_BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); + /* large pmds cannot yet be handled */ + if (pmd_large(*pmd)) + return -EFAULT; + /* Link gmap segment table entry location to page table. */ + rc = radix_tree_preload(GFP_KERNEL); + if (rc) + return rc; + ptl = pmd_lock(mm, pmd); + spin_lock(&gmap->guest_table_lock); + if (*table == _SEGMENT_ENTRY_INVALID) { + rc = radix_tree_insert(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT, table); + if (!rc) + *table = pmd_val(*pmd); + } else + rc = 0; + spin_unlock(&gmap->guest_table_lock); + spin_unlock(ptl); + radix_tree_preload_end(); + return rc; +} + +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + */ +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + unsigned long vmaddr; + int rc; + bool unlocked; + + down_read(&gmap->mm->mmap_sem); + +retry: + unlocked = false; + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + goto out_up; + } + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + &unlocked)) { + rc = -EFAULT; + goto out_up; + } + /* + * In the case that fixup_user_fault unlocked the mmap_sem during + * faultin redo __gmap_translate to not race with a map/unmap_segment. + */ + if (unlocked) + goto retry; + + rc = __gmap_link(gmap, gaddr, vmaddr); +out_up: + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + spinlock_t *ptl; + pte_t *ptep; + + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (vmaddr) { + vmaddr |= gaddr & ~PMD_MASK; + /* Get pointer to the page table entry */ + ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); + if (likely(ptep)) + ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); + pte_unmap_unlock(ptep, ptl); + } +} +EXPORT_SYMBOL_GPL(__gmap_zap); + +void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +{ + unsigned long gaddr, vmaddr, size; + struct vm_area_struct *vma; + + down_read(&gmap->mm->mmap_sem); + for (gaddr = from; gaddr < to; + gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) + continue; + vmaddr |= gaddr & ~PMD_MASK; + /* Find vma in the parent mm */ + vma = find_vma(gmap->mm, vmaddr); + size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); + zap_page_range(vma, vmaddr, size, NULL); + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep; + bool unlocked; + int rc = 0; + + if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + unlocked = false; + /* Convert gmap address and connect the page tables */ + addr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { + rc = -EFAULT; + break; + } + /* While trying to map mmap_sem got unlocked. Let us retry */ + if (unlocked) + continue; + rc = __gmap_link(gmap, gaddr, addr); + if (rc) + break; + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + VM_BUG_ON(!ptep); + /* Set notification bit in the pgste of the pte */ + if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { + ptep_set_notify(gmap->mm, addr, ptep); + gaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + pte_unmap_unlock(ptep, ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * ptep_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) +{ + unsigned long offset, gaddr; + unsigned long *table; + struct gmap_notifier *nb; + struct gmap *gmap; + + offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + offset = offset * (4096 / sizeof(pte_t)); + spin_lock(&gmap_notifier_lock); + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + table = radix_tree_lookup(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (!table) + continue; + gaddr = __gmap_segment_gaddr(table) + offset; + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(gmap, gaddr); + } + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(ptep_notify); + +static inline void thp_split_mm(struct mm_struct *mm) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct vm_area_struct *vma; + unsigned long addr; + + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (addr = vma->vm_start; + addr < vma->vm_end; + addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + } + mm->def_flags |= VM_NOHUGEPAGE; +#endif +} + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct mm_struct *mm = current->mm; + + /* Do we have pgstes? if yes, we are done */ + if (mm_has_pgste(mm)) + return 0; + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; + down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + up_write(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); + +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +static int __s390_enable_skey(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) + ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); + /* Clear storage key */ + ptep_zap_key(walk->mm, addr, pte); + return 0; +} + +int s390_enable_skey(void) +{ + struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; + + down_write(&mm->mmap_sem); + if (mm_use_skey(mm)) + goto out_up; + + mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + +out_up: + up_write(&mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_skey); + +/* + * Reset CMMA state, make all pages stable again. + */ +static int __s390_reset_cmma(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + ptep_zap_unused(walk->mm, addr, pte, 1); + return 0; +} + +void s390_reset_cmma(struct mm_struct *mm) +{ + struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; + + down_write(&mm->mmap_sem); + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(s390_reset_cmma); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 13dab0c1645c..a8a6765f1a51 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -20,9 +20,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { + struct page *head, *page; unsigned long mask; pte_t *ptep, pte; - struct page *page; mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; @@ -37,12 +37,14 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - if (!page_cache_get_speculative(page)) + head = compound_head(page); + if (!page_cache_get_speculative(head)) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(page); + put_page(head); return 0; } + VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; (*nr)++; @@ -210,7 +212,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - struct mm_struct *mm = current->mm; int nr, ret; might_sleep(); @@ -222,8 +223,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f81096b6940d..1b5e8983f4f3 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pmd_t *pmdp = (pmd_t *) ptep; - pte_t pte = huge_ptep_get(ptep); + pmd_t old; - pmdp_flush_direct(mm, addr, pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; - return pte; + old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return __pmd_to_pte(old); } pte_t *huge_pte_alloc(struct mm_struct *mm, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 73e290337092..2489b2e917c8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -89,7 +89,8 @@ void __init paging_init(void) asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; pgd_type = _REGION3_ENTRY_EMPTY; } - S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + S390_lowcore.kernel_asce = init_mm.context.asce; clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); @@ -108,6 +109,13 @@ void __init paging_init(void) free_area_init_nodes(max_zone_pfns); } +void mark_rodata_ro(void) +{ + /* Text and rodata are already protected. Nothing to do here. */ + pr_info("Write protecting the kernel read-only data: %luk\n", + ((unsigned long)&_eshared - (unsigned long)&_stext) >> 10); +} + void __init mem_init(void) { if (MACHINE_HAS_TLB_LC) @@ -126,9 +134,6 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ mem_init_print_info(NULL); - printk("Write protected kernel read-only data: %#lx - %#lx\n", - (unsigned long)&_stext, - PFN_ALIGN((unsigned long)&_eshared) - 1); } void free_initmem(void) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 45c4daa49930..eb9df2822da1 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -22,6 +22,7 @@ * Started by Ingo Molnar <mingo@elte.hu> */ +#include <linux/elf-randomize.h> #include <linux/personality.h> #include <linux/mm.h> #include <linux/mman.h> @@ -174,7 +175,7 @@ int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) if (!(flags & MAP_FIXED)) addr = 0; if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm, TASK_MAX_SIZE); + return crst_table_upgrade(current->mm); return 0; } @@ -191,7 +192,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr, return area; if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, TASK_MAX_SIZE); + rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); @@ -213,7 +214,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, return area; if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, TASK_MAX_SIZE); + rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area_topdown(filp, addr, len, diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 749c98407b41..f2a5c29a97e9 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr) static void change_page_attr(unsigned long addr, int numpages, pte_t (*set) (pte_t)) { - pte_t *ptep, pte; + pte_t *ptep; int i; for (i = 0; i < numpages; i++) { ptep = walk_page_table(addr); if (WARN_ON_ONCE(!ptep)) break; - pte = *ptep; - pte = set(pte); - __ptep_ipte(addr, ptep); - *ptep = pte; + *ptep = set(*ptep); addr += PAGE_SIZE; } + __tlb_flush_kernel(); } int set_memory_ro(unsigned long addr, int numpages) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c new file mode 100644 index 000000000000..e2565d2d0c32 --- /dev/null +++ b/arch/s390/mm/pgalloc.c @@ -0,0 +1,331 @@ +/* + * Page table allocation functions + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/gmap.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +#ifdef CONFIG_PGSTE + +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + +#endif /* CONFIG_PGSTE */ + +unsigned long *crst_table_alloc(struct mm_struct *mm) +{ + struct page *page = alloc_pages(GFP_KERNEL, 2); + + if (!page) + return NULL; + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + free_pages((unsigned long) table, 2); +} + +static void __crst_table_upgrade(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); + } + __tlb_flush_local(); +} + +int crst_table_upgrade(struct mm_struct *mm) +{ + unsigned long *table, *pgd; + + /* upgrade should only happen from 3 to 4 levels */ + BUG_ON(mm->context.asce_limit != (1UL << 42)); + + table = crst_table_alloc(mm); + if (!table) + return -ENOMEM; + + spin_lock_bh(&mm->page_table_lock); + pgd = (unsigned long *) mm->pgd; + crst_table_init(table, _REGION2_ENTRY_EMPTY); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = 1UL << 53; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + mm->task_size = mm->context.asce_limit; + spin_unlock_bh(&mm->page_table_lock); + + on_each_cpu(__crst_table_upgrade, mm, 0); + return 0; +} + +void crst_table_downgrade(struct mm_struct *mm) +{ + pgd_t *pgd; + + /* downgrade should only happen from 3 to 2 levels (compat only) */ + BUG_ON(mm->context.asce_limit != (1UL << 42)); + + if (current->active_mm == mm) { + clear_user_asce(); + __tlb_flush_mm(mm); + } + + pgd = mm->pgd; + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->context.asce_limit = 1UL << 31; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + + if (current->active_mm == mm) + set_user_asce(mm); +} + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm) +{ + unsigned long *table; + struct page *page; + unsigned int mask, bit; + + /* Try to get a fragment of a 4K page as a 2K page table */ + if (!mm_alloc_pgste(mm)) { + table = NULL; + spin_lock_bh(&mm->context.list_lock); + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + mask = atomic_read(&page->_mapcount); + mask = (mask | (mask >> 4)) & 3; + if (mask != 3) { + table = (unsigned long *) page_to_phys(page); + bit = mask & 1; /* =1 -> second 2K */ + if (bit) + table += PTRS_PER_PTE; + atomic_xor_bits(&page->_mapcount, 1U << bit); + list_del(&page->lru); + } + } + spin_unlock_bh(&mm->context.list_lock); + if (table) + return table; + } + /* Allocate a fresh page */ + page = alloc_page(GFP_KERNEL); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + /* Initialize page table */ + table = (unsigned long *) page_to_phys(page); + if (mm_alloc_pgste(mm)) { + /* Return 4K page table with PGSTEs */ + atomic_set(&page->_mapcount, 3); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + } else { + /* Return the first 2K fragment of the page */ + atomic_set(&page->_mapcount, 1); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); + spin_lock_bh(&mm->context.list_lock); + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + } + return table; +} + +void page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned int bit, mask; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (!mm_alloc_pgste(mm)) { + /* Free 2K page table fragment of a 4K page */ + bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 1U << bit); + if (mask & 3) + list_add(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (mask != 0) + return; + } + + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); +} + +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, + unsigned long vmaddr) +{ + struct mm_struct *mm; + struct page *page; + unsigned int bit, mask; + + mm = tlb->mm; + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (mm_alloc_pgste(mm)) { + gmap_unlink(mm, table, vmaddr); + table = (unsigned long *) (__pa(table) | 3); + tlb_remove_table(tlb, table); + return; + } + bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); + if (mask & 3) + list_add_tail(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *) (__pa(table) | (1U << bit)); + tlb_remove_table(tlb, table); +} + +static void __tlb_remove_table(void *_table) +{ + unsigned int mask = (unsigned long) _table & 3; + void *table = (void *)((unsigned long) _table ^ mask); + struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + + switch (mask) { + case 0: /* pmd or pud */ + free_pages((unsigned long) table, 2); + break; + case 1: /* lower 2K of a 4K page table */ + case 2: /* higher 2K of a 4K page table */ + if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) + break; + /* fallthrough */ + case 3: /* 4K page table with pgstes */ + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + break; + } +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + tlb->mm->context.flush_mm = 1; + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm_lazy(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_flush_mmu(tlb); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5109827883ac..9f0ce0e6eeb4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,591 +24,397 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> -unsigned long *crst_table_alloc(struct mm_struct *mm) -{ - struct page *page = alloc_pages(GFP_KERNEL, 2); - - if (!page) - return NULL; - return (unsigned long *) page_to_phys(page); +static inline pte_t ptep_flush_direct(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(addr, ptep); + else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -void crst_table_free(struct mm_struct *mm, unsigned long *table) +static inline pte_t ptep_flush_lazy(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - free_pages((unsigned long) table, 2); + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; + mm->context.flush_mm = 1; + } else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -static void __crst_table_upgrade(void *arg) +static inline pgste_t pgste_get_lock(pte_t *ptep) { - struct mm_struct *mm = arg; + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; - if (current->active_mm == mm) { - clear_user_asce(); - set_user_asce(mm); - } - __tlb_flush_local(); + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); +#endif + return __pgste(new); } -int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { - unsigned long *table, *pgd; - unsigned long entry; - int flush; - - BUG_ON(limit > TASK_MAX_SIZE); - flush = 0; -repeat: - table = crst_table_alloc(mm); - if (!table) - return -ENOMEM; - spin_lock_bh(&mm->page_table_lock); - if (mm->context.asce_limit < limit) { - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit <= (1UL << 31)) { - entry = _REGION3_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - } else { - entry = _REGION2_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 53; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION2; - } - crst_table_init(table, entry); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->task_size = mm->context.asce_limit; - table = NULL; - flush = 1; - } - spin_unlock_bh(&mm->page_table_lock); - if (table) - crst_table_free(mm, table); - if (mm->context.asce_limit < limit) - goto repeat; - if (flush) - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; +#ifdef CONFIG_PGSTE + asm( + " nihh %1,0xff7f\n" /* clear PCL bit */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); + preempt_enable(); +#endif } -void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +static inline pgste_t pgste_get(pte_t *ptep) { - pgd_t *pgd; - - if (current->active_mm == mm) { - clear_user_asce(); - __tlb_flush_mm(mm); - } - while (mm->context.asce_limit > limit) { - pgd = mm->pgd; - switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { - case _REGION_ENTRY_TYPE_R2: - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - break; - case _REGION_ENTRY_TYPE_R3: - mm->context.asce_limit = 1UL << 31; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_SEGMENT; - break; - default: - BUG(); - } - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->task_size = mm->context.asce_limit; - crst_table_free(mm, (unsigned long *) pgd); - } - if (current->active_mm == mm) - set_user_asce(mm); + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); } +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ #ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} -/** - * gmap_alloc - allocate a guest address space - * @mm: pointer to the parent mm_struct - * @limit: maximum address of the gmap address space - * - * Returns a guest address space structure. - */ -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, + struct mm_struct *mm) { - struct gmap *gmap; - struct page *page; - unsigned long *table; - unsigned long etype, atype; - - if (limit < (1UL << 31)) { - limit = (1UL << 31) - 1; - atype = _ASCE_TYPE_SEGMENT; - etype = _SEGMENT_ENTRY_EMPTY; - } else if (limit < (1UL << 42)) { - limit = (1UL << 42) - 1; - atype = _ASCE_TYPE_REGION3; - etype = _REGION3_ENTRY_EMPTY; - } else if (limit < (1UL << 53)) { - limit = (1UL << 53) - 1; - atype = _ASCE_TYPE_REGION2; - etype = _REGION2_ENTRY_EMPTY; - } else { - limit = -1UL; - atype = _ASCE_TYPE_REGION1; - etype = _REGION1_ENTRY_EMPTY; - } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); - if (!gmap) - goto out; - INIT_LIST_HEAD(&gmap->crst_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - spin_lock_init(&gmap->guest_table_lock); - gmap->mm = mm; - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); - crst_table_init(table, etype); - gmap->table = table; - gmap->asce = atype | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(table); - gmap->asce_end = limit; - down_write(&mm->mmap_sem); - list_add(&gmap->list, &mm->context.gmap_list); - up_write(&mm->mmap_sem); - return gmap; - -out_free: - kfree(gmap); -out: - return NULL; +#ifdef CONFIG_PGSTE + unsigned long address, bits, skey; + + if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) + return pgste; + address = pte_val(pte) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; +#endif + return pgste; + } -EXPORT_SYMBOL_GPL(gmap_alloc); -static void gmap_flush_tlb(struct gmap *gmap) +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, + struct mm_struct *mm) { - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long nkey; + + if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) + return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); + address = pte_val(entry) & PAGE_MASK; + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + page_set_storage_key(address, nkey, 0); +#endif } -static void gmap_radix_tree_free(struct radix_tree_root *root) +static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { - struct radix_tree_iter iter; - unsigned long indices[16]; - unsigned long index; - void **slot; - int i, nr; - - /* A radix tree is freed by deleting all of its entries */ - index = 0; - do { - nr = 0; - radix_tree_for_each_slot(slot, root, &iter, index) { - indices[nr] = iter.index; - if (++nr == 16) - break; - } - for (i = 0; i < nr; i++) { - index = indices[i]; - radix_tree_delete(root, index); +#ifdef CONFIG_PGSTE + if ((pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE) && + !(pte_val(entry) & _PAGE_INVALID)) { + if (!MACHINE_HAS_ESOP) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; } - } while (nr > 0); + if (!(pte_val(entry) & _PAGE_PROTECT)) + /* This pte allows write access, set user-dirty */ + pgste_val(pgste) |= PGSTE_UC_BIT; + } +#endif + *ptep = entry; + return pgste; } -/** - * gmap_free - free a guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_free(struct gmap *gmap) +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) { - struct page *page, *next; - - /* Flush tlb. */ - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); - - /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, 2); - gmap_radix_tree_free(&gmap->guest_to_host); - gmap_radix_tree_free(&gmap->host_to_guest); - down_write(&gmap->mm->mmap_sem); - list_del(&gmap->list); - up_write(&gmap->mm->mmap_sem); - kfree(gmap); +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; + ptep_notify(mm, addr, ptep); + } +#endif + return pgste; } -EXPORT_SYMBOL_GPL(gmap_free); -/** - * gmap_enable - switch primary space to the guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_enable(struct gmap *gmap) +static inline pgste_t ptep_xchg_start(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - S390_lowcore.gmap = (unsigned long) gmap; + pgste_t pgste = __pgste(0); + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + } + return pgste; } -EXPORT_SYMBOL_GPL(gmap_enable); -/** - * gmap_disable - switch back to the standard primary address space - * @gmap: pointer to the guest address space structure - */ -void gmap_disable(struct gmap *gmap) +static inline void ptep_xchg_commit(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pgste_t pgste, pte_t old, pte_t new) { - S390_lowcore.gmap = 0UL; + if (mm_has_pgste(mm)) { + if (pte_val(old) & _PAGE_INVALID) + pgste_set_key(ptep, pgste, new, mm); + if (pte_val(new) & _PAGE_INVALID) { + pgste = pgste_update_all(old, pgste, mm); + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(old) |= _PAGE_UNUSED; + } + pgste = pgste_set_pte(ptep, pgste, new); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = new; + } } -EXPORT_SYMBOL_GPL(gmap_disable); -/* - * gmap_alloc_table is assumed to be called with mmap_sem held - */ -static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, - unsigned long init, unsigned long gaddr) +pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) { - struct page *page; - unsigned long *new; - - /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - return -ENOMEM; - new = (unsigned long *) page_to_phys(page); - crst_table_init(new, init); - spin_lock(&gmap->mm->page_table_lock); - if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | - (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; - page = NULL; - } - spin_unlock(&gmap->mm->page_table_lock); - if (page) - __free_pages(page, 2); - return 0; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_direct(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; } +EXPORT_SYMBOL(ptep_xchg_direct); -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) { - struct page *page; - unsigned long offset, mask; - - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); - page = virt_to_page((void *)((unsigned long) entry & mask)); - return page->index + offset; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; } +EXPORT_SYMBOL(ptep_xchg_lazy); -/** - * __gmap_unlink_by_vmaddr - unlink a single segment via a host address - * @gmap: pointer to the guest address space structure - * @vmaddr: address in the host process address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - unsigned long *entry; - int flush = 0; - - spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_INVALID); - *entry = _SEGMENT_ENTRY_INVALID; + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(old, pgste, mm); + pgste_set(ptep, pgste); } - spin_unlock(&gmap->guest_table_lock); - return flush; + return old; } +EXPORT_SYMBOL(ptep_modify_prot_start); -/** - * __gmap_unmap_by_gaddr - unmap a single segment via a guest address - * @gmap: pointer to the guest address space structure - * @gaddr: address in the guest address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { - unsigned long vmaddr; + pgste_t pgste; - vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; + if (mm_has_pgste(mm)) { + pgste = pgste_get(ptep); + pgste_set_key(ptep, pgste, pte, mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = pte; + } } +EXPORT_SYMBOL(ptep_modify_prot_commit); -/** - * gmap_unmap_segment - unmap segment from the guest address space - * @gmap: pointer to the guest address space structure - * @to: address in the guest address space - * @len: length of the memory area to unmap - * - * Returns 0 if the unmap succeeded, -EINVAL if not. - */ -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) { - unsigned long off; - int flush; - - if ((to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || to + len < to) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - return 0; + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return old; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(addr, pmdp); + else + __pmdp_idte(addr, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + mm->context.flush_mm = 1; + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp); + else + __pmdp_csp(pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; } -EXPORT_SYMBOL_GPL(gmap_unmap_segment); - -/** - * gmap_mmap_segment - map a segment to the guest address space - * @gmap: pointer to the guest address space structure - * @from: source address in the parent address space - * @to: target address in the guest address space - * @len: length of the memory area to map - * - * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. - */ -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len) + +pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) { - unsigned long off; - int flush; - - if ((from | to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) { - /* Remove old translation */ - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - /* Store new translation */ - if (radix_tree_insert(&gmap->guest_to_host, - (to + off) >> PMD_SHIFT, - (void *) from + off)) - break; - } - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - if (off >= len) - return 0; - gmap_unmap_segment(gmap, to, len); - return -ENOMEM; + pmd_t old; + + old = pmdp_flush_direct(mm, addr, pmdp); + *pmdp = new; + return old; } -EXPORT_SYMBOL_GPL(gmap_map_segment); - -/** - * __gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +EXPORT_SYMBOL(pmdp_xchg_direct); + +pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) { - unsigned long vmaddr; + pmd_t old; - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); - return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; + old = pmdp_flush_lazy(mm, addr, pmdp); + *pmdp = new; + return old; } -EXPORT_SYMBOL_GPL(__gmap_translate); - -/** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +EXPORT_SYMBOL(pmdp_xchg_lazy); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) { - unsigned long rc; + struct list_head *lh = (struct list_head *) pgtable; - down_read(&gmap->mm->mmap_sem); - rc = __gmap_translate(gmap, gaddr); - up_read(&gmap->mm->mmap_sem); - return rc; + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } -EXPORT_SYMBOL_GPL(gmap_translate); -/** - * gmap_unlink - disconnect a page table from the gmap shadow tables - * @gmap: pointer to guest mapping meta data structure - * @table: pointer to the host page table - * @vmaddr: vm address associated with the host page table - */ -static void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { - struct gmap *gmap; - int flush; + struct list_head *lh; + pgtable_t pgtable; + pte_t *ptep; + + assert_spin_locked(pmd_lockptr(mm, pmdp)); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); - if (flush) - gmap_flush_tlb(gmap); + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); } + ptep = (pte_t *) pgtable; + pte_val(*ptep) = _PAGE_INVALID; + ptep++; + pte_val(*ptep) = _PAGE_INVALID; + return pgtable; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/** - * gmap_link - set up shadow page tables to connect a host to a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @vmaddr: vm address - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +#ifdef CONFIG_PGSTE +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) { - struct mm_struct *mm; - unsigned long *table; - spinlock_t *ptl; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - int rc; - - /* Create higher level tables in the gmap page table */ - table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { - table += (gaddr >> 53) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { - table += (gaddr >> 42) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { - table += (gaddr >> 31) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - table += (gaddr >> 20) & 0x7ff; - /* Walk the parent mm page table */ - mm = gmap->mm; - pgd = pgd_offset(mm, vmaddr); - VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); - VM_BUG_ON(pud_none(*pud)); - pmd = pmd_offset(pud, vmaddr); - VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) - return -EFAULT; - /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); - if (rc) - return rc; - ptl = pmd_lock(mm, pmd); - spin_lock(&gmap->guest_table_lock); - if (*table == _SEGMENT_ENTRY_INVALID) { - rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; - spin_unlock(&gmap->guest_table_lock); - spin_unlock(ptl); - radix_tree_preload_end(); - return rc; + pgste_t pgste; + + /* the mm_has_pgste() check is done in set_pte_at() */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); } -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long vmaddr; - int rc; - bool unlocked; - - down_read(&gmap->mm->mmap_sem); - -retry: - unlocked = false; - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) { - rc = vmaddr; - goto out_up; - } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, - &unlocked)) { - rc = -EFAULT; - goto out_up; - } - /* - * In the case that fixup_user_fault unlocked the mmap_sem during - * faultin redo __gmap_translate to not race with a map/unmap_segment. - */ - if (unlocked) - goto retry; + pgste_t pgste; - rc = __gmap_link(gmap, gaddr, vmaddr); -out_up: - up_read(&gmap->mm->mmap_sem); - return rc; + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_set_unlock(ptep, pgste); } -EXPORT_SYMBOL_GPL(gmap_fault); -static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) { if (!non_swap_entry(entry)) dec_mm_counter(mm, MM_SWAPENTS); @@ -620,225 +426,99 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) free_swap_and_cache(entry); } -/* - * this function is assumed to be called with mmap_sem held - */ -void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int reset) { - unsigned long vmaddr, ptev, pgstev; - pte_t *ptep, pte; - spinlock_t *ptl; + unsigned long pgstev; pgste_t pgste; + pte_t pte; - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - return; - vmaddr |= gaddr & ~PMD_MASK; - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (unlikely(!ptep)) - return; - pte = *ptep; - if (!pte_swap(pte)) - goto out_pte; /* Zap unused and logically-zero pages */ pgste = pgste_get_lock(ptep); pgstev = pgste_val(pgste); - ptev = pte_val(pte); - if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || - ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { - gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); - pte_clear(gmap->mm, vmaddr, ptep); - } + pte = *ptep; + if (!reset && pte_swap(pte) && + ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO))) { + ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + pte_clear(mm, addr, ptep); + } + if (reset) + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; pgste_set_unlock(ptep, pgste); -out_pte: - pte_unmap_unlock(ptep, ptl); } -EXPORT_SYMBOL_GPL(__gmap_zap); -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - down_read(&gmap->mm->mmap_sem); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); - } - up_read(&gmap->mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(gmap_discard); - -static LIST_HEAD(gmap_notifier_list); -static DEFINE_SPINLOCK(gmap_notifier_lock); + unsigned long ptev; + pgste_t pgste; -/** - * gmap_register_ipte_notifier - register a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_register_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_add(&nb->list, &gmap_notifier_list); - spin_unlock(&gmap_notifier_lock); + /* Clear storage key */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + ptev = pte_val(*ptep); + if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + pgste_set_unlock(ptep, pgste); } -EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); -/** - * gmap_unregister_ipte_notifier - remove a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_del_init(&nb->list); - spin_unlock(&gmap_notifier_lock); -} -EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); - -/** - * gmap_ipte_notify - mark a range of ptes for invalidation notification - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * - * Returns 0 if for each page in the given range a gmap mapping exists and - * the invalidation notification could be set. If the gmap mapping is missing - * for one or more pages -EFAULT is returned. If no memory could be allocated - * -ENOMEM is returned. This function establishes missing page table entries. +/* + * Test and reset if a guest page is dirty */ -int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { - unsigned long addr; spinlock_t *ptl; - pte_t *ptep, entry; pgste_t pgste; - bool unlocked; - int rc = 0; - - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) - return -EINVAL; - down_read(&gmap->mm->mmap_sem); - while (len) { - unlocked = false; - /* Convert gmap address and connect the page tables */ - addr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(addr)) { - rc = addr; - break; - } - /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - rc = -EFAULT; - break; - } - /* While trying to map mmap_sem got unlocked. Let us retry */ - if (unlocked) - continue; - rc = __gmap_link(gmap, gaddr, addr); - if (rc) - break; - /* Walk the process page table, lock and get pte pointer */ - ptep = get_locked_pte(gmap->mm, addr, &ptl); - VM_BUG_ON(!ptep); - /* Set notification bit in the pgste of the pte */ - entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; - pgste_set_unlock(ptep, pgste); - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; - } - pte_unmap_unlock(ptep, ptl); - } - up_read(&gmap->mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_ipte_notify); - -/** - * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. - * @mm: pointer to the process mm_struct - * @addr: virtual address in the process address space - * @pte: pointer to the page table entry - * - * This function is assumed to be called with the page table lock held - * for the pte to notify. - */ -void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) -{ - unsigned long offset, gaddr; - unsigned long *table; - struct gmap_notifier *nb; - struct gmap *gmap; - - offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - offset = offset * (4096 / sizeof(pte_t)); - spin_lock(&gmap_notifier_lock); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (!table) - continue; - gaddr = __gmap_segment_gaddr(table) + offset; - list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(gmap, gaddr); + pte_t *ptep; + pte_t pte; + bool dirty; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return false; + + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; } - spin_unlock(&gmap_notifier_lock); + pgste_set_unlock(ptep, pgste); + + spin_unlock(ptl); + return dirty; } -EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); +EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq) + unsigned char key, bool nq) { + unsigned long keyul; spinlock_t *ptl; pgste_t old, new; pte_t *ptep; - bool unlocked; down_read(&mm->mmap_sem); -retry: - unlocked = false; ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; } - if (!(pte_val(*ptep) & _PAGE_INVALID) && - (pte_val(*ptep) & _PAGE_PROTECT)) { - pte_unmap_unlock(ptep, ptl); - /* - * We do not really care about unlocked. We will retry either - * way. But this allows fixup_user_fault to enable userfaultfd. - */ - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - goto retry; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + keyul = (unsigned long) key; + pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { unsigned long address, bits, skey; @@ -863,13 +543,12 @@ retry: } EXPORT_SYMBOL(set_guest_storage_key); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) { + unsigned char key; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; - uint64_t physaddr; - unsigned long key = 0; down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); @@ -880,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) pgste = pgste_get_lock(ptep); if (pte_val(*ptep) & _PAGE_INVALID) { - key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; + key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; } else { - physaddr = pte_val(*ptep) & PAGE_MASK; - key = page_get_storage_key(physaddr); + key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); /* Reflect guest's logical view, not physical */ if (pgste_val(pgste) & PGSTE_GR_BIT) @@ -901,471 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return key; } EXPORT_SYMBOL(get_guest_storage_key); - -static int page_table_allocate_pgste_min = 0; -static int page_table_allocate_pgste_max = 1; -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, - .extra1 = &page_table_allocate_pgste_min, - .extra2 = &page_table_allocate_pgste_max, - }, - { } -}; - -static struct ctl_table page_table_sysctl_dir[] = { - { - .procname = "vm", - .maxlen = 0, - .mode = 0555, - .child = page_table_sysctl, - }, - { } -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#else /* CONFIG_PGSTE */ - -static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) -{ -} - -#endif /* CONFIG_PGSTE */ - -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) -{ - unsigned int old, new; - - do { - old = atomic_read(v); - new = old ^ bits; - } while (atomic_cmpxchg(v, old, new) != old); - return new; -} - -/* - * page table entry allocation/free routines. - */ -unsigned long *page_table_alloc(struct mm_struct *mm) -{ - unsigned long *table; - struct page *page; - unsigned int mask, bit; - - /* Try to get a fragment of a 4K page as a 2K page table */ - if (!mm_alloc_pgste(mm)) { - table = NULL; - spin_lock_bh(&mm->context.list_lock); - if (!list_empty(&mm->context.pgtable_list)) { - page = list_first_entry(&mm->context.pgtable_list, - struct page, lru); - mask = atomic_read(&page->_mapcount); - mask = (mask | (mask >> 4)) & 3; - if (mask != 3) { - table = (unsigned long *) page_to_phys(page); - bit = mask & 1; /* =1 -> second 2K */ - if (bit) - table += PTRS_PER_PTE; - atomic_xor_bits(&page->_mapcount, 1U << bit); - list_del(&page->lru); - } - } - spin_unlock_bh(&mm->context.list_lock); - if (table) - return table; - } - /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - /* Initialize page table */ - table = (unsigned long *) page_to_phys(page); - if (mm_alloc_pgste(mm)) { - /* Return 4K page table with PGSTEs */ - atomic_set(&page->_mapcount, 3); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); - } else { - /* Return the first 2K fragment of the page */ - atomic_set(&page->_mapcount, 1); - clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.list_lock); - } - return table; -} - -void page_table_free(struct mm_struct *mm, unsigned long *table) -{ - struct page *page; - unsigned int bit, mask; - - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (!mm_alloc_pgste(mm)) { - /* Free 2K page table fragment of a 4K page */ - bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 1U << bit); - if (mask & 3) - list_add(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (mask != 0) - return; - } - - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); -} - -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, - unsigned long vmaddr) -{ - struct mm_struct *mm; - struct page *page; - unsigned int bit, mask; - - mm = tlb->mm; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (mm_alloc_pgste(mm)) { - gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | 3); - tlb_remove_table(tlb, table); - return; - } - bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); - if (mask & 3) - list_add_tail(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *) (__pa(table) | (1U << bit)); - tlb_remove_table(tlb, table); -} - -static void __tlb_remove_table(void *_table) -{ - unsigned int mask = (unsigned long) _table & 3; - void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - - switch (mask) { - case 0: /* pmd or pud */ - free_pages((unsigned long) table, 2); - break; - case 1: /* lower 2K of a 4K page table */ - case 2: /* higher 2K of a 4K page table */ - if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) - break; - /* fallthrough */ - case 3: /* 4K page table with pgstes */ - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); - break; - } -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely - * on IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - tlb->mm->context.flush_mm = 1; - if (*batch == NULL) { - *batch = (struct mmu_table_batch *) - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - __tlb_flush_mm_lazy(tlb->mm); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_flush_mmu(tlb); -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void thp_split_vma(struct vm_area_struct *vma) -{ - unsigned long addr; - - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); -} - -static inline void thp_split_mm(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - thp_split_vma(vma); - vma->vm_flags &= ~VM_HUGEPAGE; - vma->vm_flags |= VM_NOHUGEPAGE; - } - mm->def_flags |= VM_NOHUGEPAGE; -} -#else -static inline void thp_split_mm(struct mm_struct *mm) -{ -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - -/* - * switch on pgstes for its userspace process (for kvm) - */ -int s390_enable_sie(void) -{ - struct mm_struct *mm = current->mm; - - /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(mm)) - return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; - down_write(&mm->mmap_sem); - mm->context.has_pgste = 1; - /* split thp mappings and disable thp for future mappings */ - thp_split_mm(mm); - up_write(&mm->mmap_sem); - return 0; -} -EXPORT_SYMBOL_GPL(s390_enable_sie); - -/* - * Enable storage key handling from now on and initialize the storage - * keys with the default key. - */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - unsigned long ptev; - pgste_t pgste; - - pgste = pgste_get_lock(pte); - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) { - ptep_flush_direct(walk->mm, addr, pte); - pte_val(*pte) = _PAGE_INVALID; - } - /* Clear storage key */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); - ptev = pte_val(*pte); - if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); - pgste_set_unlock(pte, pgste); - return 0; -} - -int s390_enable_skey(void) -{ - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int rc = 0; - - down_write(&mm->mmap_sem); - if (mm_use_skey(mm)) - goto out_up; - - mm->context.use_skey = 1; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags)) { - mm->context.use_skey = 0; - rc = -ENOMEM; - goto out_up; - } - } - mm->def_flags &= ~VM_MERGEABLE; - - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - -out_up: - up_write(&mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(s390_enable_skey); - -/* - * Reset CMMA state, make all pages stable again. - */ -static int __s390_reset_cmma(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgste_t pgste; - - pgste = pgste_get_lock(pte); - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; - pgste_set_unlock(pte, pgste); - return 0; -} - -void s390_reset_cmma(struct mm_struct *mm) -{ - struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; - - down_write(&mm->mmap_sem); - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - up_write(&mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(s390_reset_cmma); - -/* - * Test and reset if a guest page is dirty - */ -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) -{ - pte_t *pte; - spinlock_t *ptl; - bool dirty = false; - - pte = get_locked_pte(gmap->mm, address, &ptl); - if (unlikely(!pte)) - return false; - - if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) - dirty = true; - - spin_unlock(ptl); - return dirty; -} -EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB */ - return pmdp_test_and_clear_young(vma, address, pmdp); -} - -int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - entry = pmd_mkyoung(entry); - if (dirty) - entry = pmd_mkdirty(entry); - if (pmd_same(*pmdp, entry)) - return 0; - pmdp_invalidate(vma, address, pmdp); - set_pmd_at(vma->vm_mm, address, pmdp, entry); - return 1; -} - -void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable) -{ - struct list_head *lh = (struct list_head *) pgtable; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(lh); - else - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); - pmd_huge_pte(mm, pmdp) = pgtable; -} - -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) -{ - struct list_head *lh; - pgtable_t pgtable; - pte_t *ptep; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - pgtable = pmd_huge_pte(mm, pmdp); - lh = (struct list_head *) pgtable; - if (list_empty(lh)) - pmd_huge_pte(mm, pmdp) = NULL; - else { - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; - list_del(lh); - } - ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_INVALID; - ptep++; - pte_val(*ptep) = _PAGE_INVALID; - return pgtable; -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index ef7d6c8fea66..d48cf25cfe99 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -56,7 +56,7 @@ static inline pmd_t *vmem_pmd_alloc(void) return pmd; } -static pte_t __ref *vmem_pte_alloc(unsigned long address) +static pte_t __ref *vmem_pte_alloc(void) { pte_t *pte; @@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); -#ifndef CONFIG_DEBUG_PAGEALLOC if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address && - !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { + !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) && + !debug_pagealloc_enabled()) { pud_val(*pu_dir) = __pa(address) | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | (ro ? _REGION_ENTRY_PROTECT : 0); address += PUD_SIZE; continue; } -#endif if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) @@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pud_populate(&init_mm, pu_dir, pm_dir); } pm_dir = pmd_offset(pu_dir, address); -#ifndef CONFIG_DEBUG_PAGEALLOC if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address && - !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { + !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) && + !debug_pagealloc_enabled()) { pmd_val(*pm_dir) = __pa(address) | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_YOUNG | @@ -121,9 +120,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) address += PMD_SIZE; continue; } -#endif if (pmd_none(*pm_dir)) { - pt_dir = vmem_pte_alloc(address); + pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; pmd_populate(&init_mm, pm_dir, pt_dir); @@ -235,7 +233,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) address = (address + PMD_SIZE) & PMD_MASK; continue; } - pt_dir = vmem_pte_alloc(address); + pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; pmd_populate(&init_mm, pm_dir, pt_dir); @@ -372,7 +370,7 @@ void __init vmem_map_init(void) ro_end = (unsigned long)&_eshared & PAGE_MASK; for_each_memblock(memory, reg) { start = reg->base; - end = reg->base + reg->size - 1; + end = reg->base + reg->size; if (start >= ro_end || end <= ro_start) vmem_add_mem(start, end - start, 0); else if (start >= ro_start && end <= ro_end) diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h index f010c93a88b1..fda605dbc1b4 100644 --- a/arch/s390/net/bpf_jit.h +++ b/arch/s390/net/bpf_jit.h @@ -37,7 +37,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; * | | | * +---------------+ | * | 8 byte skbp | | - * R15+170 -> +---------------+ | + * R15+176 -> +---------------+ | * | 8 byte hlen | | * R15+168 -> +---------------+ | * | 4 byte align | | @@ -58,7 +58,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; #define STK_OFF (STK_SPACE - STK_160_UNUSED) #define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ #define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ -#define STK_OFF_SKBP 170 /* Offset of SKB pointer on stack */ +#define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */ #define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ #define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 3c0bfc1f2694..bee281f3163d 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -45,7 +45,7 @@ struct bpf_jit { int labels[1]; /* Labels for local jumps */ }; -#define BPF_SIZE_MAX 0x7ffff /* Max size for program (20 bit signed displ) */ +#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ #define SEEN_SKB 1 /* skb access */ #define SEEN_MEM 2 /* use mem[] for temporary storage */ @@ -54,16 +54,17 @@ struct bpf_jit { #define SEEN_FUNC 16 /* calls C functions */ #define SEEN_TAIL_CALL 32 /* code uses tail calls */ #define SEEN_SKB_CHANGE 64 /* code changes skb data */ +#define SEEN_REG_AX 128 /* code uses constant blinding */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* * s390 registers */ -#define REG_W0 (__MAX_BPF_REG+0) /* Work register 1 (even) */ -#define REG_W1 (__MAX_BPF_REG+1) /* Work register 2 (odd) */ -#define REG_SKB_DATA (__MAX_BPF_REG+2) /* SKB data register */ -#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */ -#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */ +#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ +#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ +#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */ +#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */ +#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */ #define REG_0 REG_W0 /* Register 0 */ #define REG_1 REG_W1 /* Register 1 */ #define REG_2 BPF_REG_1 /* Register 2 */ @@ -88,6 +89,8 @@ static const int reg2hex[] = { [BPF_REG_9] = 10, /* BPF stack pointer */ [BPF_REG_FP] = 13, + /* Register for blinding (shared with REG_SKB_DATA) */ + [BPF_REG_AX] = 12, /* SKB data pointer */ [REG_SKB_DATA] = 12, /* Work registers for s390x backend */ @@ -385,7 +388,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op) /* * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S" * we store the SKB header length on the stack and the SKB data - * pointer in REG_SKB_DATA. + * pointer in REG_SKB_DATA if BPF_REG_AX is not used. */ static void emit_load_skb_data_hlen(struct bpf_jit *jit) { @@ -397,9 +400,10 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) offsetof(struct sk_buff, data_len)); /* stg %w1,ST_OFF_HLEN(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN); - /* lg %skb_data,data_off(%b1) */ - EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, - BPF_REG_1, offsetof(struct sk_buff, data)); + if (!(jit->seen & SEEN_REG_AX)) + /* lg %skb_data,data_off(%b1) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, + BPF_REG_1, offsetof(struct sk_buff, data)); } /* @@ -446,7 +450,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit) emit_load_skb_data_hlen(jit); if (jit->seen & SEEN_SKB_CHANGE) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, + EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); } @@ -487,6 +491,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i s32 imm = insn->imm; s16 off = insn->off; + if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) + jit->seen |= SEEN_REG_AX; switch (insn->code) { /* * BPF_MOV @@ -1188,7 +1194,7 @@ call_fn: /* * Implicit input: * BPF_REG_6 (R7) : skb pointer - * REG_SKB_DATA (R12): skb data pointer + * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX) * * Calculated input: * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb @@ -1209,6 +1215,11 @@ call_fn: /* agfr %b2,%src (%src is s32 here) */ EMIT4(0xb9180000, BPF_REG_2, src_reg); + /* Reload REG_SKB_DATA if BPF_REG_AX is used */ + if (jit->seen & SEEN_REG_AX) + /* lg %skb_data,data_off(%b6) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, + BPF_REG_6, offsetof(struct sk_buff, data)); /* basr %b5,%w1 (%b5 is call saved) */ EMIT2(0x0d00, BPF_REG_5, REG_W1); @@ -1262,37 +1273,62 @@ void bpf_jit_compile(struct bpf_prog *fp) /* * Compile eBPF program "fp" */ -void bpf_int_jit_compile(struct bpf_prog *fp) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { + struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; + bool tmp_blinded = false; struct bpf_jit jit; int pass; if (!bpf_jit_enable) - return; + return orig_fp; + + tmp = bpf_jit_blind_constants(fp); + /* + * If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_fp; + if (tmp != fp) { + tmp_blinded = true; + fp = tmp; + } + memset(&jit, 0, sizeof(jit)); jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); - if (jit.addrs == NULL) - return; + if (jit.addrs == NULL) { + fp = orig_fp; + goto out; + } /* * Three initial passes: * - 1/2: Determine clobbered registers * - 3: Calculate program size and addrs arrray */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp)) + if (bpf_jit_prog(&jit, fp)) { + fp = orig_fp; goto free_addrs; + } } /* * Final pass: Allocate and generate program */ - if (jit.size >= BPF_SIZE_MAX) + if (jit.size >= BPF_SIZE_MAX) { + fp = orig_fp; goto free_addrs; + } header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); - if (!header) + if (!header) { + fp = orig_fp; goto free_addrs; - if (bpf_jit_prog(&jit, fp)) + } + if (bpf_jit_prog(&jit, fp)) { + fp = orig_fp; goto free_addrs; + } if (bpf_jit_enable > 1) { bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf); if (jit.prg_buf) @@ -1305,6 +1341,11 @@ void bpf_int_jit_compile(struct bpf_prog *fp) } free_addrs: kfree(jit.addrs); +out: + if (tmp_blinded) + bpf_jit_prog_release_other(fp, fp == orig_fp ? + tmp : orig_fp); + return fp; } /* diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 1bd23017191e..496e4a7ee00e 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o oprofile-y += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c deleted file mode 100644 index 1884e1759529..000000000000 --- a/arch/s390/oprofile/backtrace.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * S390 Version - * Copyright IBM Corp. 2005 - * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com> - */ - -#include <linux/oprofile.h> - -#include <asm/processor.h> /* for struct stack_frame */ - -static unsigned long -__show_trace(unsigned int *depth, unsigned long sp, - unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (*depth) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - /* Follow the backchain. */ - while (*depth) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - } - - if (*depth == 0) - break; - - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } - return sp; -} - -void s390_backtrace(struct pt_regs * const regs, unsigned int depth) -{ - unsigned long head, frame_size; - struct stack_frame* head_sf; - - if (user_mode(regs)) - return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame*)head; - - if (!head_sf->back_chain) - return; - - head = head_sf->back_chain; - - head = __show_trace(&depth, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __show_trace(&depth, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); -} diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 9cfa2ffaa9d6..791935a65800 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -20,8 +20,6 @@ #include "../../../drivers/oprofile/oprof.h" -extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); - #include "hwsampler.h" #include "op_counter.h" @@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; + case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break; default: return -ENODEV; } } @@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void) hwsampler_shutdown(); } +static int __s390_backtrace(void *data, unsigned long address) +{ + unsigned int *depth = data; + + if (*depth == 0) + return 1; + (*depth)--; + oprofile_add_trace(address); + return 0; +} + +static void s390_backtrace(struct pt_regs *regs, unsigned int depth) +{ + if (user_mode(regs)) + return; + dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]); +} + int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8f19c8f9d660..871af75c69c2 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -637,12 +637,11 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { - struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; - zdev->pdev = pdev; pdev->dev.groups = zpci_attr_groups; + pdev->dev.archdata.dma_ops = &s390_pci_dma_ops; zpci_map_resources(pdev); for (i = 0; i < PCI_BAR_COUNT; i++) { @@ -664,8 +663,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) { struct zpci_dev *zdev = to_zpci(pdev); - zdev->pdev = pdev; - zpci_debug_init_device(zdev); + zpci_debug_init_device(zdev, dev_name(&pdev->dev)); zpci_fmb_enable_device(zdev); return pci_enable_resources(pdev, mask); @@ -677,7 +675,6 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_fmb_disable_device(zdev); zpci_debug_exit_device(zdev); - zdev->pdev = NULL; } #ifdef CONFIG_HIBERNATE_CALLBACKS @@ -864,8 +861,11 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), - 16, 0, NULL); + __alignof__(struct zpci_fmb), 0, NULL); if (!zdev_fmb_cache) goto error_fmb; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d6e411ed8b1f..1a4512c8544a 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -8,13 +8,19 @@ #define KMSG_COMPONENT "zpci" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/compat.h> #include <linux/kernel.h> +#include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/delay.h> #include <linux/pci.h> +#include <linux/uaccess.h> #include <asm/pci_debug.h> #include <asm/pci_clp.h> +#include <asm/compat.h> +#include <asm/clp.h> +#include <uapi/asm/clp.h> static inline void zpci_err_clp(unsigned int rsp, int rc) { @@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc) } /* - * Call Logical Processor - * Retry logic is handled by the caller. + * Call Logical Processor with c=1, lps=0 and command 1 + * to get the bit mask of installed logical processors */ -static inline u8 clp_instr(void *data) +static inline int clp_get_ilp(unsigned long *ilp) +{ + unsigned long mask; + int cc = 3; + + asm volatile ( + " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1) + : "cc"); + *ilp = mask; + return cc; +} + +/* + * Call Logical Processor with c=0, the give constant lps and an lpcb request. + */ +static inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; - u8 cc; + int cc = 3; asm volatile ( - " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n" - " ipm %[cc]\n" + " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req) - : [req] "a" (req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req) + : [req] "a" (req), [lps] "i" (lps) : "cc"); return cc; } @@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.pfgid = pfgid; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) clp_store_query_pci_fngrp(zdev, &rrb->response); else { @@ -143,13 +171,12 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.fh = fh; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { rc = clp_store_query_pci_fn(zdev, &rrb->response); if (rc) goto out; - if (rrb->response.pfgid) - rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); + rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); } else { zpci_err("Q PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); @@ -214,7 +241,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.oc = command; rrb->request.ndas = nr_dma_as; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) { retries--; if (retries < 0) @@ -280,7 +307,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, rrb->request.resume_token = resume_token; /* Get PCI function handle list */ - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("List PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); @@ -391,3 +418,198 @@ int clp_rescan_pci_devices_simple(void) clp_free_block(rrb); return rc; } + +static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0; +} + +static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_base_slpc(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query(struct clp_req *req, + struct clp_req_rsp_query_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query_grp(struct clp_req *req, + struct clp_req_rsp_query_pci_grp *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 || + lpcb->request.reserved4 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_pci_slpc(req, (void *) lpcb); + case 0x0002: /* list PCI functions */ + return clp_pci_list(req, (void *) lpcb); + case 0x0003: /* query PCI function */ + return clp_pci_query(req, (void *) lpcb); + case 0x0004: /* query PCI function group */ + return clp_pci_query_grp(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_normal_command(struct clp_req *req) +{ + struct clp_req_hdr *lpcb; + void __user *uptr; + int rc; + + rc = -EINVAL; + if (req->lps != 0 && req->lps != 2) + goto out; + + rc = -ENOMEM; + lpcb = clp_alloc_block(GFP_KERNEL); + if (!lpcb) + goto out; + + rc = -EFAULT; + uptr = (void __force __user *)(unsigned long) req->data_p; + if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0) + goto out_free; + + rc = -EINVAL; + if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0) + goto out_free; + + switch (req->lps) { + case 0: + rc = clp_base_command(req, lpcb); + break; + case 2: + rc = clp_pci_command(req, lpcb); + break; + } + if (rc) + goto out_free; + + rc = -EFAULT; + if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0) + goto out_free; + + rc = 0; + +out_free: + clp_free_block(lpcb); +out: + return rc; +} + +static int clp_immediate_command(struct clp_req *req) +{ + void __user *uptr; + unsigned long ilp; + int exists; + + if (req->cmd > 1 || clp_get_ilp(&ilp) != 0) + return -EINVAL; + + uptr = (void __force __user *)(unsigned long) req->data_p; + if (req->cmd == 0) { + /* Command code 0: test for a specific processor */ + exists = test_bit_inv(req->lps, &ilp); + return put_user(exists, (int __user *) uptr); + } + /* Command code 1: return bit mask of installed processors */ + return put_user(ilp, (unsigned long __user *) uptr); +} + +static long clp_misc_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct clp_req req; + void __user *argp; + + if (cmd != CLP_SYNC) + return -EINVAL; + + argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg; + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + if (req.r != 0) + return -EINVAL; + return req.c ? clp_immediate_command(&req) : clp_normal_command(&req); +} + +static int clp_misc_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static const struct file_operations clp_misc_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .release = clp_misc_release, + .unlocked_ioctl = clp_misc_ioctl, + .compat_ioctl = clp_misc_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice clp_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "clp", + .fops = &clp_misc_fops, +}; + +static int __init clp_misc_init(void) +{ + return misc_register(&clp_misc_device); +} + +device_initcall(clp_misc_init); diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 4129b0a5fd78..38993b156924 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012,2015 * * Author(s): * Jan Glauber <jang@linux.vnet.ibm.com> @@ -23,22 +23,45 @@ EXPORT_SYMBOL_GPL(pci_debug_msg_id); debug_info_t *pci_debug_err_id; EXPORT_SYMBOL_GPL(pci_debug_err_id); -static char *pci_perf_names[] = { - /* hardware counters */ +static char *pci_common_names[] = { "Load operations", "Store operations", "Store block operations", "Refresh operations", +}; + +static char *pci_fmt0_names[] = { "DMA read bytes", "DMA write bytes", }; +static char *pci_fmt1_names[] = { + "Received bytes", + "Received packets", + "Transmitted bytes", + "Transmitted packets", +}; + +static char *pci_fmt2_names[] = { + "Consumed work units", + "Maximum work units", +}; + static char *pci_sw_names[] = { "Allocated pages", "Mapped pages", "Unmapped pages", }; +static void pci_fmb_show(struct seq_file *m, char *name[], int length, + u64 *data) +{ + int i; + + for (i = 0; i < length; i++, data++) + seq_printf(m, "%26s:\t%llu\n", name[i], *data); +} + static void pci_sw_counter_show(struct seq_file *m) { struct zpci_dev *zdev = m->private; @@ -53,8 +76,6 @@ static void pci_sw_counter_show(struct seq_file *m) static int pci_perf_show(struct seq_file *m, void *v) { struct zpci_dev *zdev = m->private; - u64 *stat; - int i; if (!zdev) return 0; @@ -72,15 +93,27 @@ static int pci_perf_show(struct seq_file *m, void *v) seq_printf(m, "Samples: %u\n", zdev->fmb->samples); seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update); - /* hardware counters */ - stat = (u64 *) &zdev->fmb->ld_ops; - for (i = 0; i < 4; i++) - seq_printf(m, "%26s:\t%llu\n", - pci_perf_names[i], *(stat + i)); - if (zdev->fmb->dma_valid) - for (i = 4; i < 6; i++) - seq_printf(m, "%26s:\t%llu\n", - pci_perf_names[i], *(stat + i)); + pci_fmb_show(m, pci_common_names, ARRAY_SIZE(pci_common_names), + &zdev->fmb->ld_ops); + + switch (zdev->fmb->format) { + case 0: + if (!(zdev->fmb->fmt_ind & ZPCI_FMB_DMA_COUNTER_VALID)) + break; + pci_fmb_show(m, pci_fmt0_names, ARRAY_SIZE(pci_fmt0_names), + &zdev->fmb->fmt0.dma_rbytes); + break; + case 1: + pci_fmb_show(m, pci_fmt1_names, ARRAY_SIZE(pci_fmt1_names), + &zdev->fmb->fmt1.rx_bytes); + break; + case 2: + pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names), + &zdev->fmb->fmt2.consumed_work_units); + break; + default: + seq_puts(m, "Unknown format\n"); + } pci_sw_counter_show(m); mutex_unlock(&zdev->lock); @@ -128,10 +161,9 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -void zpci_debug_init_device(struct zpci_dev *zdev) +void zpci_debug_init_device(struct zpci_dev *zdev, const char *name) { - zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), - debugfs_root); + zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root); if (IS_ERR(zdev->debugfs_dev)) zdev->debugfs_dev = NULL; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 4638b93c7632..1ea8c07eab84 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table) dma_free_cpu_table(table); } -static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, +static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, 0, boundary_size, 0); } -static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) +static unsigned long dma_alloc_iommu(struct device *dev, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long offset, flags; int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); + offset = __dma_alloc_iommu(dev, zdev->next_bit, size); if (offset == -1) { /* wrap-around */ - offset = __dma_alloc_iommu(zdev, 0, size); + offset = __dma_alloc_iommu(dev, 0, size); wrap = 1; } @@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) return offset; } -static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size) +static void dma_free_iommu(struct device *dev, unsigned long offset, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long flags; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); @@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - iommu_page_index = dma_alloc_iommu(zdev, nr_pages); + iommu_page_index = dma_alloc_iommu(dev, nr_pages); if (iommu_page_index == -1) { ret = -ENOSPC; goto out_err; @@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, return dma_addr + (offset & ~PAGE_MASK); out_free: - dma_free_iommu(zdev, iommu_page_index, nr_pages); + dma_free_iommu(dev, iommu_page_index, nr_pages); out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); @@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, atomic64_add(npages, &zdev->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - dma_free_iommu(zdev, iommu_page_index, npages); + dma_free_iommu(dev, iommu_page_index, npages); } static void *s390_dma_alloc(struct device *dev, size_t size, @@ -454,7 +457,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { rc = -ENOMEM; - goto out_clean; + goto out; } /* @@ -474,18 +477,22 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; - goto out_reg; + goto free_dma_table; } rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) - goto out_reg; - return 0; + goto free_bitmap; -out_reg: + return 0; +free_bitmap: + vfree(zdev->iommu_bitmap); + zdev->iommu_bitmap = NULL; +free_dma_table: dma_free_cpu_table(zdev->dma_table); -out_clean: + zdev->dma_table = NULL; +out: return rc; } @@ -544,7 +551,7 @@ static int __init dma_debug_do_init(void) } fs_initcall(dma_debug_do_init); -struct dma_map_ops s390_dma_ops = { +struct dma_map_ops s390_pci_dma_ops = { .alloc = s390_dma_alloc, .free = s390_dma_free, .map_sg = s390_dma_map_sg, @@ -555,7 +562,7 @@ struct dma_map_ops s390_dma_ops = { .is_phys = 0, /* dma_supported is unconditionally true without a callback */ }; -EXPORT_SYMBOL_GPL(s390_dma_ops); +EXPORT_SYMBOL_GPL(s390_pci_dma_ops); static int __init s390_iommu_setup(char *str) { diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b0e04751c5d5..fb2a9a560fdc 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -46,11 +46,14 @@ struct zpci_ccdf_avail { static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; zpci_err("error CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) return; pdev->error_state = pci_channel_io_perm_failure; + pci_dev_put(pdev); } void zpci_event_error(void *data) @@ -69,9 +73,12 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; int ret; + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); @@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (pdev) + pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index f37a5808883d..ed484dc84d14 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -12,6 +12,8 @@ #include <linux/stat.h> #include <linux/pci.h> +#include <asm/sclp.h> + #define zpci_attr(name, fmt, member) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -77,8 +79,29 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj, sizeof(zdev->util_str)); } static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); + +static ssize_t report_error_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + struct zpci_report_error_header *report = (void *) buf; + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = to_zpci(pdev); + int ret; + + if (off || (count < sizeof(*report))) + return -EINVAL; + + ret = sclp_pci_report(report, zdev->fh, zdev->fid); + + return ret ? ret : count; +} +static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); + static struct bin_attribute *zpci_bin_attrs[] = { &bin_attr_util_string, + &bin_attr_report_error, NULL, }; |