summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig29
-rw-r--r--arch/s390/boot/compressed/Makefile1
-rw-r--r--arch/s390/configs/default_defconfig44
-rw-r--r--arch/s390/configs/gcov_defconfig34
-rw-r--r--arch/s390/configs/performance_defconfig36
-rw-r--r--arch/s390/configs/zfcpdump_defconfig4
-rw-r--r--arch/s390/crypto/aes_s390.c123
-rw-r--r--arch/s390/crypto/crypt_s390.h493
-rw-r--r--arch/s390/crypto/des_s390.c72
-rw-r--r--arch/s390/crypto/ghash_s390.c16
-rw-r--r--arch/s390/crypto/prng.c62
-rw-r--r--arch/s390/crypto/sha1_s390.c10
-rw-r--r--arch/s390/crypto/sha256_s390.c14
-rw-r--r--arch/s390/crypto/sha512_s390.c14
-rw-r--r--arch/s390/crypto/sha_common.c10
-rw-r--r--arch/s390/defconfig44
-rw-r--r--arch/s390/hypfs/inode.c4
-rw-r--r--arch/s390/include/asm/cache.h3
-rw-r--r--arch/s390/include/asm/checksum.h6
-rw-r--r--arch/s390/include/asm/clp.h27
-rw-r--r--arch/s390/include/asm/cpacf.h410
-rw-r--r--arch/s390/include/asm/device.h6
-rw-r--r--arch/s390/include/asm/dma-mapping.h6
-rw-r--r--arch/s390/include/asm/fpu/api.h2
-rw-r--r--arch/s390/include/asm/fpu/types.h10
-rw-r--r--arch/s390/include/asm/ftrace.h4
-rw-r--r--arch/s390/include/asm/gmap.h64
-rw-r--r--arch/s390/include/asm/kvm_host.h55
-rw-r--r--arch/s390/include/asm/livepatch.h11
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h40
-rw-r--r--arch/s390/include/asm/pci.h40
-rw-r--r--arch/s390/include/asm/pci_clp.h30
-rw-r--r--arch/s390/include/asm/percpu.h1
-rw-r--r--arch/s390/include/asm/perf_event.h2
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h637
-rw-r--r--arch/s390/include/asm/processor.h23
-rw-r--r--arch/s390/include/asm/rwsem.h20
-rw-r--r--arch/s390/include/asm/sclp.h14
-rw-r--r--arch/s390/include/asm/seccomp.h2
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/sigp.h1
-rw-r--r--arch/s390/include/asm/thread_info.h1
-rw-r--r--arch/s390/include/asm/tlbflush.h9
-rw-r--r--arch/s390/include/asm/uaccess.h8
-rw-r--r--arch/s390/include/asm/xor.h21
-rw-r--r--arch/s390/include/uapi/asm/clp.h28
-rw-r--r--arch/s390/include/uapi/asm/dasd.h32
-rw-r--r--arch/s390/include/uapi/asm/kvm.h9
-rw-r--r--arch/s390/include/uapi/asm/sie.h8
-rw-r--r--arch/s390/include/uapi/asm/socket.h2
-rw-r--r--arch/s390/include/uapi/asm/unistd.h4
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/cache.c2
-rw-r--r--arch/s390/kernel/cpcmd.c3
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/debug.c6
-rw-r--r--arch/s390/kernel/dis.c17
-rw-r--r--arch/s390/kernel/dumpstack.c123
-rw-r--r--arch/s390/kernel/early.c1
-rw-r--r--arch/s390/kernel/entry.S107
-rw-r--r--arch/s390/kernel/entry.h4
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/s390/kernel/irq.c3
-rw-r--r--arch/s390/kernel/machine_kexec.c28
-rw-r--r--arch/s390/kernel/module.c6
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c20
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c20
-rw-r--r--arch/s390/kernel/perf_event.c60
-rw-r--r--arch/s390/kernel/process.c29
-rw-r--r--arch/s390/kernel/processor.c16
-rw-r--r--arch/s390/kernel/setup.c27
-rw-r--r--arch/s390/kernel/smp.c4
-rw-r--r--arch/s390/kernel/stacktrace.c93
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kernel/time.c14
-rw-r--r--arch/s390/kernel/topology.c7
-rw-r--r--arch/s390/kernel/traps.c21
-rw-r--r--arch/s390/kernel/vdso.c3
-rw-r--r--arch/s390/kernel/vmlinux.lds.S1
-rw-r--r--arch/s390/kernel/vtime.c2
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/diag.c1
-rw-r--r--arch/s390/kvm/gaccess.c57
-rw-r--r--arch/s390/kvm/gaccess.h38
-rw-r--r--arch/s390/kvm/guestdbg.c4
-rw-r--r--arch/s390/kvm/intercept.c80
-rw-r--r--arch/s390/kvm/interrupt.c145
-rw-r--r--arch/s390/kvm/kvm-s390.c300
-rw-r--r--arch/s390/kvm/kvm-s390.h28
-rw-r--r--arch/s390/kvm/priv.c37
-rw-r--r--arch/s390/kvm/sigp.c6
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/spinlock.c1
-rw-r--r--arch/s390/lib/xor.c134
-rw-r--r--arch/s390/mm/Makefile6
-rw-r--r--arch/s390/mm/extable.c85
-rw-r--r--arch/s390/mm/extmem.c16
-rw-r--r--arch/s390/mm/fault.c58
-rw-r--r--arch/s390/mm/gmap.c774
-rw-r--r--arch/s390/mm/gup.c12
-rw-r--r--arch/s390/mm/hugetlbpage.c7
-rw-r--r--arch/s390/mm/init.c13
-rw-r--r--arch/s390/mm/mmap.c7
-rw-r--r--arch/s390/mm/pageattr.c8
-rw-r--r--arch/s390/mm/pgalloc.c331
-rw-r--r--arch/s390/mm/pgtable.c1549
-rw-r--r--arch/s390/mm/vmem.c18
-rw-r--r--arch/s390/net/bpf_jit.h4
-rw-r--r--arch/s390/net/bpf_jit_comp.c81
-rw-r--r--arch/s390/oprofile/Makefile2
-rw-r--r--arch/s390/oprofile/backtrace.c78
-rw-r--r--arch/s390/oprofile/init.c21
-rw-r--r--arch/s390/pci/pci.c12
-rw-r--r--arch/s390/pci/pci_clp.c250
-rw-r--r--arch/s390/pci/pci_debug.c66
-rw-r--r--arch/s390/pci/pci_dma.c41
-rw-r--r--arch/s390/pci/pci_event.c13
-rw-r--r--arch/s390/pci/pci_sysfs.c23
120 files changed, 4038 insertions, 3458 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3be9c832dec1..a8c259059adf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -4,6 +4,9 @@ config MMU
config ZONE_DMA
def_bool y
+config CPU_BIG_ENDIAN
+ def_bool y
+
config LOCKDEP_SUPPORT
def_bool y
@@ -59,6 +62,9 @@ config PCI_QUIRKS
config ARCH_SUPPORTS_UPROBES
def_bool y
+config DEBUG_RODATA
+ def_bool y
+
config S390
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
@@ -101,6 +107,7 @@ config S390
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_PROT_NUMA_PROT_NONE
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
@@ -116,16 +123,19 @@ config S390
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_EARLY_PFN_TO_NID
select HAVE_ARCH_JUMP_LABEL
+ select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+ select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DMA_API_DEBUG
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_EXIT_THREAD
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
@@ -157,6 +167,7 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+ select HAVE_NMI
config SCHED_OMIT_FRAME_POINTER
@@ -203,7 +214,7 @@ config HAVE_MARCH_Z13_FEATURES
choice
prompt "Processor type"
- default MARCH_Z900
+ default MARCH_Z196
config MARCH_Z900
bool "IBM zSeries model z800 and z900"
@@ -254,12 +265,12 @@ config MARCH_ZEC12
older machines.
config MARCH_Z13
- bool "IBM z13"
+ bool "IBM z13s and z13"
select HAVE_MARCH_Z13_FEATURES
help
- Select this to enable optimizations for IBM z13 (2964 series).
- The kernel will be slightly faster but will not work on older
- machines.
+ Select this to enable optimizations for IBM z13s and z13 (2965 and
+ 2964 series). The kernel will be slightly faster but will not work on
+ older machines.
endchoice
@@ -605,8 +616,6 @@ config PCI_NR_MSI
PCI devices.
source "drivers/pci/Kconfig"
-source "drivers/pci/pcie/Kconfig"
-source "drivers/pci/hotplug/Kconfig"
endif # PCI
@@ -619,10 +628,6 @@ config HAS_IOMEM
config IOMMU_HELPER
def_bool PCI
-config HAS_DMA
- def_bool PCI
- select HAVE_DMA_API_DEBUG
-
config NEED_SG_DMA_LENGTH
def_bool PCI
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index fac6ac9790fa..1dd210347e12 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -22,7 +22,6 @@ OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
$(call if_changed,ld)
- @:
sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p'
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 0ac42cc4f880..d5ec71b2ed02 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -1,8 +1,7 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -13,19 +12,19 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
-CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
@@ -55,7 +54,6 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
@@ -65,6 +63,15 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_CMA=y
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ZPOOL=m
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_PCI_DEBUG=y
CONFIG_HOTPLUG_PCI=y
@@ -452,6 +459,7 @@ CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
+# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
@@ -537,6 +545,8 @@ CONFIG_DLM=m
CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
CONFIG_FRAME_WARN=1024
CONFIG_READABLE_ASM=y
CONFIG_UNUSED_SYMBOLS=y
@@ -555,13 +565,17 @@ CONFIG_SLUB_DEBUG_ON=y
CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_VMACACHE=y
CONFIG_DEBUG_VM_RB=y
+CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_DETECT_HUNG_TASK=y
+CONFIG_WQ_WATCHDOG=y
CONFIG_PANIC_ON_OOPS=y
+CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_TIMER_STATS=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
@@ -596,6 +610,8 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_LKDTM=m
CONFIG_TEST_LIST_SORT=y
CONFIG_KPROBES_SANITY_TEST=y
@@ -607,7 +623,6 @@ CONFIG_TEST_STRING_HELPERS=y
CONFIG_TEST_KSTRTOX=y
CONFIG_DMA_API_DEBUG=y
CONFIG_TEST_BPF=m
-# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
@@ -651,7 +666,6 @@ CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=y
CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
@@ -664,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
CONFIG_X509_CERTIFICATE_PARSER=m
CONFIG_CRC7=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index a31dcd56f7c0..f46a35115d2d 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -1,8 +1,7 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -13,17 +12,17 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
-CONFIG_BLK_CGROUP=y
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
@@ -53,7 +52,6 @@ CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
-CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
@@ -62,6 +60,14 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_CMA=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
@@ -530,6 +536,8 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
@@ -547,13 +555,13 @@ CONFIG_LATENCYTOP=y
CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
CONFIG_BLK_DEV_IO_TRACE=y
# CONFIG_KPROBE_EVENT is not set
+CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_LKDTM=m
CONFIG_RBTREE_TEST=m
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
-# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
@@ -597,8 +605,6 @@ CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=y
-CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_USER_API_HASH=m
@@ -610,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
CONFIG_X509_CERTIFICATE_PARSER=m
CONFIG_CRC7=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 7b73bf353345..ba0f2a58b8cd 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -1,8 +1,7 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -14,17 +13,17 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
-CONFIG_BLK_CGROUP=y
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
@@ -53,7 +52,6 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
@@ -62,6 +60,14 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_CMA=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
@@ -447,6 +453,7 @@ CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
+# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
@@ -530,6 +537,8 @@ CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
@@ -546,11 +555,12 @@ CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
-# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
@@ -594,8 +604,6 @@ CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_ZLIB=y
-CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_USER_API_HASH=m
@@ -607,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_KEY_TYPE=y
CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
CONFIG_X509_CERTIFICATE_PARSER=m
CONFIG_CRC7=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 1719843a55a2..4366a3e3e754 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,5 +1,5 @@
# CONFIG_SWAP is not set
-CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -7,7 +7,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
-CONFIG_MARCH_Z196=y
CONFIG_TUNE_ZEC12=y
# CONFIG_COMPAT is not set
CONFIG_NR_CPUS=2
@@ -64,7 +63,6 @@ CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
# CONFIG_FTRACE is not set
-# CONFIG_STRICT_DEVMEM is not set
# CONFIG_PFAULT is not set
# CONFIG_S390_HYPFS_FS is not set
# CONFIG_VIRTUALIZATION is not set
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 0b9b95f3c703..7554a8bb2adc 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -27,7 +27,8 @@
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
-#include "crypt_s390.h"
+#include <crypto/xts.h>
+#include <asm/cpacf.h>
#define AES_KEYLEN_128 1
#define AES_KEYLEN_192 2
@@ -144,16 +145,16 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
switch (sctx->key_len) {
case 16:
- crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in,
- AES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_AES_128_ENC, &sctx->key, out, in,
+ AES_BLOCK_SIZE);
break;
case 24:
- crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in,
- AES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_AES_192_ENC, &sctx->key, out, in,
+ AES_BLOCK_SIZE);
break;
case 32:
- crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in,
- AES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_AES_256_ENC, &sctx->key, out, in,
+ AES_BLOCK_SIZE);
break;
}
}
@@ -169,16 +170,16 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
switch (sctx->key_len) {
case 16:
- crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in,
- AES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_AES_128_DEC, &sctx->key, out, in,
+ AES_BLOCK_SIZE);
break;
case 24:
- crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in,
- AES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_AES_192_DEC, &sctx->key, out, in,
+ AES_BLOCK_SIZE);
break;
case 32:
- crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in,
- AES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_AES_256_DEC, &sctx->key, out, in,
+ AES_BLOCK_SIZE);
break;
}
}
@@ -211,7 +212,7 @@ static void fallback_exit_cip(struct crypto_tfm *tfm)
static struct crypto_alg aes_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -297,16 +298,16 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
switch (key_len) {
case 16:
- sctx->enc = KM_AES_128_ENCRYPT;
- sctx->dec = KM_AES_128_DECRYPT;
+ sctx->enc = CPACF_KM_AES_128_ENC;
+ sctx->dec = CPACF_KM_AES_128_DEC;
break;
case 24:
- sctx->enc = KM_AES_192_ENCRYPT;
- sctx->dec = KM_AES_192_DECRYPT;
+ sctx->enc = CPACF_KM_AES_192_ENC;
+ sctx->dec = CPACF_KM_AES_192_DEC;
break;
case 32:
- sctx->enc = KM_AES_256_ENCRYPT;
- sctx->dec = KM_AES_256_DECRYPT;
+ sctx->enc = CPACF_KM_AES_256_ENC;
+ sctx->dec = CPACF_KM_AES_256_DEC;
break;
}
@@ -325,7 +326,7 @@ static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_km(func, param, out, in, n);
+ ret = cpacf_km(func, param, out, in, n);
if (ret < 0 || ret != n)
return -EIO;
@@ -392,7 +393,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
static struct crypto_alg ecb_aes_alg = {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: aes + ecb */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -426,16 +427,16 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
switch (key_len) {
case 16:
- sctx->enc = KMC_AES_128_ENCRYPT;
- sctx->dec = KMC_AES_128_DECRYPT;
+ sctx->enc = CPACF_KMC_AES_128_ENC;
+ sctx->dec = CPACF_KMC_AES_128_DEC;
break;
case 24:
- sctx->enc = KMC_AES_192_ENCRYPT;
- sctx->dec = KMC_AES_192_DECRYPT;
+ sctx->enc = CPACF_KMC_AES_192_ENC;
+ sctx->dec = CPACF_KMC_AES_192_DEC;
break;
case 32:
- sctx->enc = KMC_AES_256_ENCRYPT;
- sctx->dec = KMC_AES_256_DECRYPT;
+ sctx->enc = CPACF_KMC_AES_256_ENC;
+ sctx->dec = CPACF_KMC_AES_256_DEC;
break;
}
@@ -464,7 +465,7 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, &param, out, in, n);
+ ret = cpacf_kmc(func, &param, out, in, n);
if (ret < 0 || ret != n)
return -EIO;
@@ -508,7 +509,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg cbc_aes_alg = {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: aes + cbc */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -587,11 +588,16 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
{
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
+ int err;
+
+ err = xts_check_key(tfm, in_key, key_len);
+ if (err)
+ return err;
switch (key_len) {
case 32:
- xts_ctx->enc = KM_XTS_128_ENCRYPT;
- xts_ctx->dec = KM_XTS_128_DECRYPT;
+ xts_ctx->enc = CPACF_KM_XTS_128_ENC;
+ xts_ctx->dec = CPACF_KM_XTS_128_DEC;
memcpy(xts_ctx->key + 16, in_key, 16);
memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
break;
@@ -601,8 +607,8 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
xts_fallback_setkey(tfm, in_key, key_len);
break;
case 64:
- xts_ctx->enc = KM_XTS_256_ENCRYPT;
- xts_ctx->dec = KM_XTS_256_DECRYPT;
+ xts_ctx->enc = CPACF_KM_XTS_256_ENC;
+ xts_ctx->dec = CPACF_KM_XTS_256_DEC;
memcpy(xts_ctx->key, in_key, 32);
memcpy(xts_ctx->pcc_key, in_key + 32, 32);
break;
@@ -637,7 +643,8 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
- ret = crypt_s390_pcc(func, &pcc_param.key[offset]);
+ /* remove decipher modifier bit from 'func' and call PCC */
+ ret = cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
if (ret < 0)
return -EIO;
@@ -649,7 +656,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
- ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n);
+ ret = cpacf_km(func, &xts_param.key[offset], out, in, n);
if (ret < 0 || ret != n)
return -EIO;
@@ -715,7 +722,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
static struct crypto_alg xts_aes_alg = {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: aes + xts */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -745,16 +752,16 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
switch (key_len) {
case 16:
- sctx->enc = KMCTR_AES_128_ENCRYPT;
- sctx->dec = KMCTR_AES_128_DECRYPT;
+ sctx->enc = CPACF_KMCTR_AES_128_ENC;
+ sctx->dec = CPACF_KMCTR_AES_128_DEC;
break;
case 24:
- sctx->enc = KMCTR_AES_192_ENCRYPT;
- sctx->dec = KMCTR_AES_192_DECRYPT;
+ sctx->enc = CPACF_KMCTR_AES_192_ENC;
+ sctx->dec = CPACF_KMCTR_AES_192_DEC;
break;
case 32:
- sctx->enc = KMCTR_AES_256_ENCRYPT;
- sctx->dec = KMCTR_AES_256_DECRYPT;
+ sctx->enc = CPACF_KMCTR_AES_256_ENC;
+ sctx->dec = CPACF_KMCTR_AES_256_DEC;
break;
}
@@ -798,8 +805,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
n = __ctrblk_init(ctrptr, nbytes);
else
n = AES_BLOCK_SIZE;
- ret = crypt_s390_kmctr(func, sctx->key, out, in,
- n, ctrptr);
+ ret = cpacf_kmctr(func, sctx->key, out, in, n, ctrptr);
if (ret < 0 || ret != n) {
if (ctrptr == ctrblk)
spin_unlock(&ctrblk_lock);
@@ -831,8 +837,8 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
if (nbytes) {
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
- ret = crypt_s390_kmctr(func, sctx->key, buf, in,
- AES_BLOCK_SIZE, ctrbuf);
+ ret = cpacf_kmctr(func, sctx->key, buf, in,
+ AES_BLOCK_SIZE, ctrbuf);
if (ret < 0 || ret != AES_BLOCK_SIZE)
return -EIO;
memcpy(out, buf, nbytes);
@@ -869,7 +875,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg ctr_aes_alg = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: aes + ctr */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct s390_aes_ctx),
@@ -893,11 +899,11 @@ static int __init aes_s390_init(void)
{
int ret;
- if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
+ if (cpacf_query(CPACF_KM, CPACF_KM_AES_128_ENC))
keylen_flag |= AES_KEYLEN_128;
- if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA))
+ if (cpacf_query(CPACF_KM, CPACF_KM_AES_192_ENC))
keylen_flag |= AES_KEYLEN_192;
- if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA))
+ if (cpacf_query(CPACF_KM, CPACF_KM_AES_256_ENC))
keylen_flag |= AES_KEYLEN_256;
if (!keylen_flag)
@@ -920,22 +926,17 @@ static int __init aes_s390_init(void)
if (ret)
goto cbc_aes_err;
- if (crypt_s390_func_available(KM_XTS_128_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
- crypt_s390_func_available(KM_XTS_256_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128_ENC) &&
+ cpacf_query(CPACF_KM, CPACF_KM_XTS_256_ENC)) {
ret = crypto_register_alg(&xts_aes_alg);
if (ret)
goto xts_aes_err;
xts_aes_alg_reg = 1;
}
- if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
- crypt_s390_func_available(KMCTR_AES_192_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
- crypt_s390_func_available(KMCTR_AES_256_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128_ENC) &&
+ cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192_ENC) &&
+ cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256_ENC)) {
ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
if (!ctrblk) {
ret = -ENOMEM;
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
deleted file mode 100644
index d9c4c313fbc6..000000000000
--- a/arch/s390/crypto/crypt_s390.h
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Support for s390 cryptographic instructions.
- *
- * Copyright IBM Corp. 2003, 2015
- * Author(s): Thomas Spatzier
- * Jan Glauber (jan.glauber@de.ibm.com)
- * Harald Freudenberger (freude@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-#ifndef _CRYPTO_ARCH_S390_CRYPT_S390_H
-#define _CRYPTO_ARCH_S390_CRYPT_S390_H
-
-#include <asm/errno.h>
-#include <asm/facility.h>
-
-#define CRYPT_S390_OP_MASK 0xFF00
-#define CRYPT_S390_FUNC_MASK 0x00FF
-
-#define CRYPT_S390_PRIORITY 300
-#define CRYPT_S390_COMPOSITE_PRIORITY 400
-
-#define CRYPT_S390_MSA 0x1
-#define CRYPT_S390_MSA3 0x2
-#define CRYPT_S390_MSA4 0x4
-#define CRYPT_S390_MSA5 0x8
-
-/* s390 cryptographic operations */
-enum crypt_s390_operations {
- CRYPT_S390_KM = 0x0100,
- CRYPT_S390_KMC = 0x0200,
- CRYPT_S390_KIMD = 0x0300,
- CRYPT_S390_KLMD = 0x0400,
- CRYPT_S390_KMAC = 0x0500,
- CRYPT_S390_KMCTR = 0x0600,
- CRYPT_S390_PPNO = 0x0700
-};
-
-/*
- * function codes for KM (CIPHER MESSAGE) instruction
- * 0x80 is the decipher modifier bit
- */
-enum crypt_s390_km_func {
- KM_QUERY = CRYPT_S390_KM | 0x0,
- KM_DEA_ENCRYPT = CRYPT_S390_KM | 0x1,
- KM_DEA_DECRYPT = CRYPT_S390_KM | 0x1 | 0x80,
- KM_TDEA_128_ENCRYPT = CRYPT_S390_KM | 0x2,
- KM_TDEA_128_DECRYPT = CRYPT_S390_KM | 0x2 | 0x80,
- KM_TDEA_192_ENCRYPT = CRYPT_S390_KM | 0x3,
- KM_TDEA_192_DECRYPT = CRYPT_S390_KM | 0x3 | 0x80,
- KM_AES_128_ENCRYPT = CRYPT_S390_KM | 0x12,
- KM_AES_128_DECRYPT = CRYPT_S390_KM | 0x12 | 0x80,
- KM_AES_192_ENCRYPT = CRYPT_S390_KM | 0x13,
- KM_AES_192_DECRYPT = CRYPT_S390_KM | 0x13 | 0x80,
- KM_AES_256_ENCRYPT = CRYPT_S390_KM | 0x14,
- KM_AES_256_DECRYPT = CRYPT_S390_KM | 0x14 | 0x80,
- KM_XTS_128_ENCRYPT = CRYPT_S390_KM | 0x32,
- KM_XTS_128_DECRYPT = CRYPT_S390_KM | 0x32 | 0x80,
- KM_XTS_256_ENCRYPT = CRYPT_S390_KM | 0x34,
- KM_XTS_256_DECRYPT = CRYPT_S390_KM | 0x34 | 0x80,
-};
-
-/*
- * function codes for KMC (CIPHER MESSAGE WITH CHAINING)
- * instruction
- */
-enum crypt_s390_kmc_func {
- KMC_QUERY = CRYPT_S390_KMC | 0x0,
- KMC_DEA_ENCRYPT = CRYPT_S390_KMC | 0x1,
- KMC_DEA_DECRYPT = CRYPT_S390_KMC | 0x1 | 0x80,
- KMC_TDEA_128_ENCRYPT = CRYPT_S390_KMC | 0x2,
- KMC_TDEA_128_DECRYPT = CRYPT_S390_KMC | 0x2 | 0x80,
- KMC_TDEA_192_ENCRYPT = CRYPT_S390_KMC | 0x3,
- KMC_TDEA_192_DECRYPT = CRYPT_S390_KMC | 0x3 | 0x80,
- KMC_AES_128_ENCRYPT = CRYPT_S390_KMC | 0x12,
- KMC_AES_128_DECRYPT = CRYPT_S390_KMC | 0x12 | 0x80,
- KMC_AES_192_ENCRYPT = CRYPT_S390_KMC | 0x13,
- KMC_AES_192_DECRYPT = CRYPT_S390_KMC | 0x13 | 0x80,
- KMC_AES_256_ENCRYPT = CRYPT_S390_KMC | 0x14,
- KMC_AES_256_DECRYPT = CRYPT_S390_KMC | 0x14 | 0x80,
- KMC_PRNG = CRYPT_S390_KMC | 0x43,
-};
-
-/*
- * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER)
- * instruction
- */
-enum crypt_s390_kmctr_func {
- KMCTR_QUERY = CRYPT_S390_KMCTR | 0x0,
- KMCTR_DEA_ENCRYPT = CRYPT_S390_KMCTR | 0x1,
- KMCTR_DEA_DECRYPT = CRYPT_S390_KMCTR | 0x1 | 0x80,
- KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2,
- KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80,
- KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3,
- KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80,
- KMCTR_AES_128_ENCRYPT = CRYPT_S390_KMCTR | 0x12,
- KMCTR_AES_128_DECRYPT = CRYPT_S390_KMCTR | 0x12 | 0x80,
- KMCTR_AES_192_ENCRYPT = CRYPT_S390_KMCTR | 0x13,
- KMCTR_AES_192_DECRYPT = CRYPT_S390_KMCTR | 0x13 | 0x80,
- KMCTR_AES_256_ENCRYPT = CRYPT_S390_KMCTR | 0x14,
- KMCTR_AES_256_DECRYPT = CRYPT_S390_KMCTR | 0x14 | 0x80,
-};
-
-/*
- * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
- * instruction
- */
-enum crypt_s390_kimd_func {
- KIMD_QUERY = CRYPT_S390_KIMD | 0,
- KIMD_SHA_1 = CRYPT_S390_KIMD | 1,
- KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
- KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
- KIMD_GHASH = CRYPT_S390_KIMD | 65,
-};
-
-/*
- * function codes for KLMD (COMPUTE LAST MESSAGE DIGEST)
- * instruction
- */
-enum crypt_s390_klmd_func {
- KLMD_QUERY = CRYPT_S390_KLMD | 0,
- KLMD_SHA_1 = CRYPT_S390_KLMD | 1,
- KLMD_SHA_256 = CRYPT_S390_KLMD | 2,
- KLMD_SHA_512 = CRYPT_S390_KLMD | 3,
-};
-
-/*
- * function codes for KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
- * instruction
- */
-enum crypt_s390_kmac_func {
- KMAC_QUERY = CRYPT_S390_KMAC | 0,
- KMAC_DEA = CRYPT_S390_KMAC | 1,
- KMAC_TDEA_128 = CRYPT_S390_KMAC | 2,
- KMAC_TDEA_192 = CRYPT_S390_KMAC | 3
-};
-
-/*
- * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER
- * OPERATION) instruction
- */
-enum crypt_s390_ppno_func {
- PPNO_QUERY = CRYPT_S390_PPNO | 0,
- PPNO_SHA512_DRNG_GEN = CRYPT_S390_PPNO | 3,
- PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83
-};
-
-/**
- * crypt_s390_km:
- * @func: the function code passed to KM; see crypt_s390_km_func
- * @param: address of parameter block; see POP for details on each func
- * @dest: address of destination memory area
- * @src: address of source memory area
- * @src_len: length of src operand in bytes
- *
- * Executes the KM (CIPHER MESSAGE) operation of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of processed
- * bytes for encryption/decryption funcs
- */
-static inline int crypt_s390_km(long func, void *param,
- u8 *dest, const u8 *src, long src_len)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param;
- register const u8 *__src asm("2") = src;
- register long __src_len asm("3") = src_len;
- register u8 *__dest asm("4") = dest;
- int ret;
-
- asm volatile(
- "0: .insn rre,0xb92e0000,%3,%1\n" /* KM opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
- : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
-}
-
-/**
- * crypt_s390_kmc:
- * @func: the function code passed to KM; see crypt_s390_kmc_func
- * @param: address of parameter block; see POP for details on each func
- * @dest: address of destination memory area
- * @src: address of source memory area
- * @src_len: length of src operand in bytes
- *
- * Executes the KMC (CIPHER MESSAGE WITH CHAINING) operation of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of processed
- * bytes for encryption/decryption funcs
- */
-static inline int crypt_s390_kmc(long func, void *param,
- u8 *dest, const u8 *src, long src_len)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param;
- register const u8 *__src asm("2") = src;
- register long __src_len asm("3") = src_len;
- register u8 *__dest asm("4") = dest;
- int ret;
-
- asm volatile(
- "0: .insn rre,0xb92f0000,%3,%1\n" /* KMC opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
- : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
-}
-
-/**
- * crypt_s390_kimd:
- * @func: the function code passed to KM; see crypt_s390_kimd_func
- * @param: address of parameter block; see POP for details on each func
- * @src: address of source memory area
- * @src_len: length of src operand in bytes
- *
- * Executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) operation
- * of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of processed
- * bytes for digest funcs
- */
-static inline int crypt_s390_kimd(long func, void *param,
- const u8 *src, long src_len)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param;
- register const u8 *__src asm("2") = src;
- register long __src_len asm("3") = src_len;
- int ret;
-
- asm volatile(
- "0: .insn rre,0xb93e0000,%1,%1\n" /* KIMD opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (ret), "+a" (__src), "+d" (__src_len)
- : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
-}
-
-/**
- * crypt_s390_klmd:
- * @func: the function code passed to KM; see crypt_s390_klmd_func
- * @param: address of parameter block; see POP for details on each func
- * @src: address of source memory area
- * @src_len: length of src operand in bytes
- *
- * Executes the KLMD (COMPUTE LAST MESSAGE DIGEST) operation of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of processed
- * bytes for digest funcs
- */
-static inline int crypt_s390_klmd(long func, void *param,
- const u8 *src, long src_len)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param;
- register const u8 *__src asm("2") = src;
- register long __src_len asm("3") = src_len;
- int ret;
-
- asm volatile(
- "0: .insn rre,0xb93f0000,%1,%1\n" /* KLMD opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (ret), "+a" (__src), "+d" (__src_len)
- : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
-}
-
-/**
- * crypt_s390_kmac:
- * @func: the function code passed to KM; see crypt_s390_klmd_func
- * @param: address of parameter block; see POP for details on each func
- * @src: address of source memory area
- * @src_len: length of src operand in bytes
- *
- * Executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) operation
- * of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of processed
- * bytes for digest funcs
- */
-static inline int crypt_s390_kmac(long func, void *param,
- const u8 *src, long src_len)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param;
- register const u8 *__src asm("2") = src;
- register long __src_len asm("3") = src_len;
- int ret;
-
- asm volatile(
- "0: .insn rre,0xb91e0000,%1,%1\n" /* KLAC opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "=d" (ret), "+a" (__src), "+d" (__src_len)
- : "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
-}
-
-/**
- * crypt_s390_kmctr:
- * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func
- * @param: address of parameter block; see POP for details on each func
- * @dest: address of destination memory area
- * @src: address of source memory area
- * @src_len: length of src operand in bytes
- * @counter: address of counter value
- *
- * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of processed
- * bytes for encryption/decryption funcs
- */
-static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
- const u8 *src, long src_len, u8 *counter)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param;
- register const u8 *__src asm("2") = src;
- register long __src_len asm("3") = src_len;
- register u8 *__dest asm("4") = dest;
- register u8 *__ctr asm("6") = counter;
- int ret = -1;
-
- asm volatile(
- "0: .insn rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
- "+a" (__ctr)
- : "d" (__func), "a" (__param) : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
-}
-
-/**
- * crypt_s390_ppno:
- * @func: the function code passed to PPNO; see crypt_s390_ppno_func
- * @param: address of parameter block; see POP for details on each func
- * @dest: address of destination memory area
- * @dest_len: size of destination memory area in bytes
- * @seed: address of seed data
- * @seed_len: size of seed data in bytes
- *
- * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
- * operation of the CPU.
- *
- * Returns -1 for failure, 0 for the query func, number of random
- * bytes stored in dest buffer for generate function
- */
-static inline int crypt_s390_ppno(long func, void *param,
- u8 *dest, long dest_len,
- const u8 *seed, long seed_len)
-{
- register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
- register void *__param asm("1") = param; /* param block (240 bytes) */
- register u8 *__dest asm("2") = dest; /* buf for recv random bytes */
- register long __dest_len asm("3") = dest_len; /* requested random bytes */
- register const u8 *__seed asm("4") = seed; /* buf with seed data */
- register long __seed_len asm("5") = seed_len; /* bytes in seed buf */
- int ret = -1;
-
- asm volatile (
- "0: .insn rre,0xb93c0000,%1,%5\n" /* PPNO opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "+d" (ret), "+a"(__dest), "+d"(__dest_len)
- : "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len)
- : "cc", "memory");
- if (ret < 0)
- return ret;
- return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0;
-}
-
-/**
- * crypt_s390_func_available:
- * @func: the function code of the specific function; 0 if op in general
- *
- * Tests if a specific crypto function is implemented on the machine.
- *
- * Returns 1 if func available; 0 if func or op in general not available
- */
-static inline int crypt_s390_func_available(int func,
- unsigned int facility_mask)
-{
- unsigned char status[16];
- int ret;
-
- if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
- return 0;
- if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
- return 0;
- if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
- return 0;
- if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57))
- return 0;
-
- switch (func & CRYPT_S390_OP_MASK) {
- case CRYPT_S390_KM:
- ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
- break;
- case CRYPT_S390_KMC:
- ret = crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0);
- break;
- case CRYPT_S390_KIMD:
- ret = crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0);
- break;
- case CRYPT_S390_KLMD:
- ret = crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0);
- break;
- case CRYPT_S390_KMAC:
- ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
- break;
- case CRYPT_S390_KMCTR:
- ret = crypt_s390_kmctr(KMCTR_QUERY, &status,
- NULL, NULL, 0, NULL);
- break;
- case CRYPT_S390_PPNO:
- ret = crypt_s390_ppno(PPNO_QUERY, &status,
- NULL, 0, NULL, 0);
- break;
- default:
- return 0;
- }
- if (ret < 0)
- return 0;
- func &= CRYPT_S390_FUNC_MASK;
- func &= 0x7f; /* mask modifier bit */
- return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
-}
-
-/**
- * crypt_s390_pcc:
- * @func: the function code passed to KM; see crypt_s390_km_func
- * @param: address of parameter block; see POP for details on each func
- *
- * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU.
- *
- * Returns -1 for failure, 0 for success.
- */
-static inline int crypt_s390_pcc(long func, void *param)
-{
- register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */
- register void *__param asm("1") = param;
- int ret = -1;
-
- asm volatile(
- "0: .insn rre,0xb92c0000,0,0\n" /* PCC opcode */
- "1: brc 1,0b\n" /* handle partial completion */
- " la %0,0\n"
- "2:\n"
- EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
- : "+d" (ret)
- : "d" (__func), "a" (__param) : "cc", "memory");
- return ret;
-}
-
-#endif /* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index fba1c10a2dd0..697e71a75fc2 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -20,8 +20,7 @@
#include <linux/crypto.h>
#include <crypto/algapi.h>
#include <crypto/des.h>
-
-#include "crypt_s390.h"
+#include <asm/cpacf.h>
#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
@@ -54,20 +53,20 @@ static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_DEA_ENC, ctx->key, out, in, DES_BLOCK_SIZE);
}
static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_DEA_DEC, ctx->key, out, in, DES_BLOCK_SIZE);
}
static struct crypto_alg des_alg = {
.cra_name = "des",
.cra_driver_name = "des-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -95,7 +94,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_km(func, key, out, in, n);
+ ret = cpacf_km(func, key, out, in, n);
if (ret < 0 || ret != n)
return -EIO;
@@ -128,7 +127,7 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
u8 *out = walk->dst.virt.addr;
u8 *in = walk->src.virt.addr;
- ret = crypt_s390_kmc(func, &param, out, in, n);
+ ret = cpacf_kmc(func, &param, out, in, n);
if (ret < 0 || ret != n)
return -EIO;
@@ -149,7 +148,7 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk);
+ return ecb_desall_crypt(desc, CPACF_KM_DEA_ENC, ctx->key, &walk);
}
static int ecb_des_decrypt(struct blkcipher_desc *desc,
@@ -160,13 +159,13 @@ static int ecb_des_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk);
+ return ecb_desall_crypt(desc, CPACF_KM_DEA_DEC, ctx->key, &walk);
}
static struct crypto_alg ecb_des_alg = {
.cra_name = "ecb(des)",
.cra_driver_name = "ecb-des-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: des + ecb */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -190,7 +189,7 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
+ return cbc_desall_crypt(desc, CPACF_KMC_DEA_ENC, &walk);
}
static int cbc_des_decrypt(struct blkcipher_desc *desc,
@@ -200,13 +199,13 @@ static int cbc_des_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
+ return cbc_desall_crypt(desc, CPACF_KMC_DEA_DEC, &walk);
}
static struct crypto_alg cbc_des_alg = {
.cra_name = "cbc(des)",
.cra_driver_name = "cbc-des-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: des + cbc */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -258,20 +257,20 @@ static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_TDEA_192_ENC, ctx->key, dst, src, DES_BLOCK_SIZE);
}
static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
- crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
+ cpacf_km(CPACF_KM_TDEA_192_DEC, ctx->key, dst, src, DES_BLOCK_SIZE);
}
static struct crypto_alg des3_alg = {
.cra_name = "des3_ede",
.cra_driver_name = "des3_ede-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -295,7 +294,7 @@ static int ecb_des3_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk);
+ return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_ENC, ctx->key, &walk);
}
static int ecb_des3_decrypt(struct blkcipher_desc *desc,
@@ -306,13 +305,13 @@ static int ecb_des3_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk);
+ return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_DEC, ctx->key, &walk);
}
static struct crypto_alg ecb_des3_alg = {
.cra_name = "ecb(des3_ede)",
.cra_driver_name = "ecb-des3_ede-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: des3 + ecb */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -336,7 +335,7 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
+ return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_ENC, &walk);
}
static int cbc_des3_decrypt(struct blkcipher_desc *desc,
@@ -346,13 +345,13 @@ static int cbc_des3_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
+ return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_DEC, &walk);
}
static struct crypto_alg cbc_des3_alg = {
.cra_name = "cbc(des3_ede)",
.cra_driver_name = "cbc-des3_ede-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: des3 + cbc */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = DES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -407,8 +406,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
n = __ctrblk_init(ctrptr, nbytes);
else
n = DES_BLOCK_SIZE;
- ret = crypt_s390_kmctr(func, ctx->key, out, in,
- n, ctrptr);
+ ret = cpacf_kmctr(func, ctx->key, out, in, n, ctrptr);
if (ret < 0 || ret != n) {
if (ctrptr == ctrblk)
spin_unlock(&ctrblk_lock);
@@ -438,8 +436,8 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
if (nbytes) {
out = walk->dst.virt.addr;
in = walk->src.virt.addr;
- ret = crypt_s390_kmctr(func, ctx->key, buf, in,
- DES_BLOCK_SIZE, ctrbuf);
+ ret = cpacf_kmctr(func, ctx->key, buf, in,
+ DES_BLOCK_SIZE, ctrbuf);
if (ret < 0 || ret != DES_BLOCK_SIZE)
return -EIO;
memcpy(out, buf, nbytes);
@@ -458,7 +456,7 @@ static int ctr_des_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_ENC, ctx, &walk);
}
static int ctr_des_decrypt(struct blkcipher_desc *desc,
@@ -469,13 +467,13 @@ static int ctr_des_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_DEC, ctx, &walk);
}
static struct crypto_alg ctr_des_alg = {
.cra_name = "ctr(des)",
.cra_driver_name = "ctr-des-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: des + ctr */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -501,7 +499,7 @@ static int ctr_des3_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_ENC, ctx, &walk);
}
static int ctr_des3_decrypt(struct blkcipher_desc *desc,
@@ -512,13 +510,13 @@ static int ctr_des3_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk);
+ return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_DEC, ctx, &walk);
}
static struct crypto_alg ctr_des3_alg = {
.cra_name = "ctr(des3_ede)",
.cra_driver_name = "ctr-des3_ede-s390",
- .cra_priority = CRYPT_S390_COMPOSITE_PRIORITY,
+ .cra_priority = 400, /* combo: des3 + ede */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct s390_des_ctx),
@@ -540,8 +538,8 @@ static int __init des_s390_init(void)
{
int ret;
- if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) ||
- !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA))
+ if (!cpacf_query(CPACF_KM, CPACF_KM_DEA_ENC) ||
+ !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192_ENC))
return -EOPNOTSUPP;
ret = crypto_register_alg(&des_alg);
@@ -563,10 +561,8 @@ static int __init des_s390_init(void)
if (ret)
goto cbc_des3_err;
- if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
- crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT,
- CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+ if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA_ENC) &&
+ cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192_ENC)) {
ret = crypto_register_alg(&ctr_des_alg);
if (ret)
goto ctr_des_err;
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 26e14efd30a7..ab68de72e795 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -10,8 +10,7 @@
#include <crypto/internal/hash.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
-
-#include "crypt_s390.h"
+#include <asm/cpacf.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
@@ -72,8 +71,8 @@ static int ghash_update(struct shash_desc *desc,
src += n;
if (!dctx->bytes) {
- ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
- GHASH_BLOCK_SIZE);
+ ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
+ GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
}
@@ -81,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
n = srclen & ~(GHASH_BLOCK_SIZE - 1);
if (n) {
- ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
+ ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
if (ret != n)
return -EIO;
src += n;
@@ -106,7 +105,7 @@ static int ghash_flush(struct ghash_desc_ctx *dctx)
memset(pos, 0, dctx->bytes);
- ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+ ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
if (ret != GHASH_BLOCK_SIZE)
return -EIO;
@@ -137,7 +136,7 @@ static struct shash_alg ghash_alg = {
.base = {
.cra_name = "ghash",
.cra_driver_name = "ghash-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
@@ -147,8 +146,7 @@ static struct shash_alg ghash_alg = {
static int __init ghash_mod_init(void)
{
- if (!crypt_s390_func_available(KIMD_GHASH,
- CRYPT_S390_MSA | CRYPT_S390_MSA4))
+ if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_GHASH))
return -EOPNOTSUPP;
return crypto_register_shash(&ghash_alg);
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index b8045b97f4fb..41527b113f5a 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -23,8 +23,7 @@
#include <asm/debug.h>
#include <asm/uaccess.h>
#include <asm/timex.h>
-
-#include "crypt_s390.h"
+#include <asm/cpacf.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("IBM Corporation");
@@ -136,8 +135,8 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
else
h = ebuf;
/* generate sha256 from this page */
- if (crypt_s390_kimd(KIMD_SHA_256, h,
- pg, PAGE_SIZE) != PAGE_SIZE) {
+ if (cpacf_kimd(CPACF_KIMD_SHA_256, h,
+ pg, PAGE_SIZE) != PAGE_SIZE) {
prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
ret = -EIO;
goto out;
@@ -164,9 +163,9 @@ static void prng_tdes_add_entropy(void)
int ret;
for (i = 0; i < 16; i++) {
- ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
- (char *)entropy, (char *)entropy,
- sizeof(entropy));
+ ret = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+ (char *)entropy, (char *)entropy,
+ sizeof(entropy));
BUG_ON(ret < 0 || ret != sizeof(entropy));
memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
}
@@ -311,9 +310,8 @@ static int __init prng_sha512_selftest(void)
memset(&ws, 0, sizeof(ws));
/* initial seed */
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
- &ws, NULL, 0,
- seed, sizeof(seed));
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, &ws, NULL, 0,
+ seed, sizeof(seed));
if (ret < 0) {
pr_err("The prng self test seed operation for the "
"SHA-512 mode failed with rc=%d\n", ret);
@@ -331,18 +329,16 @@ static int __init prng_sha512_selftest(void)
}
/* generate random bytes */
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
- &ws, buf, sizeof(buf),
- NULL, 0);
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf), NULL, 0);
if (ret < 0) {
pr_err("The prng self test generate operation for "
"the SHA-512 mode failed with rc=%d\n", ret);
prng_errorflag = PRNG_SELFTEST_FAILED;
return -EIO;
}
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
- &ws, buf, sizeof(buf),
- NULL, 0);
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+ &ws, buf, sizeof(buf), NULL, 0);
if (ret < 0) {
pr_err("The prng self test generate operation for "
"the SHA-512 mode failed with rc=%d\n", ret);
@@ -396,9 +392,8 @@ static int __init prng_sha512_instantiate(void)
get_tod_clock_ext(seed + 48);
/* initial seed of the ppno drng */
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
- &prng_data->ppnows, NULL, 0,
- seed, sizeof(seed));
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+ &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
if (ret < 0) {
prng_errorflag = PRNG_SEED_FAILED;
ret = -EIO;
@@ -409,11 +404,9 @@ static int __init prng_sha512_instantiate(void)
bytes for the FIPS 140-2 Conditional Self Test */
if (fips_enabled) {
prng_data->prev = prng_data->buf + prng_chunk_size;
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
- &prng_data->ppnows,
- prng_data->prev,
- prng_chunk_size,
- NULL, 0);
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+ &prng_data->ppnows,
+ prng_data->prev, prng_chunk_size, NULL, 0);
if (ret < 0 || ret != prng_chunk_size) {
prng_errorflag = PRNG_GEN_FAILED;
ret = -EIO;
@@ -447,9 +440,8 @@ static int prng_sha512_reseed(void)
return ret;
/* do a reseed of the ppno drng with this bytestring */
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
- &prng_data->ppnows, NULL, 0,
- seed, sizeof(seed));
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+ &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
if (ret) {
prng_errorflag = PRNG_RESEED_FAILED;
return -EIO;
@@ -471,9 +463,8 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes)
}
/* PPNO generate */
- ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
- &prng_data->ppnows, buf, nbytes,
- NULL, 0);
+ ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+ &prng_data->ppnows, buf, nbytes, NULL, 0);
if (ret < 0 || ret != nbytes) {
prng_errorflag = PRNG_GEN_FAILED;
return -EIO;
@@ -555,8 +546,8 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
* Note: you can still get strict X9.17 conformity by setting
* prng_chunk_size to 8 bytes.
*/
- tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
- prng_data->buf, prng_data->buf, n);
+ tmp = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+ prng_data->buf, prng_data->buf, n);
if (tmp < 0 || tmp != n) {
ret = -EIO;
break;
@@ -669,11 +660,13 @@ static const struct file_operations prng_tdes_fops = {
static struct miscdevice prng_sha512_dev = {
.name = "prandom",
.minor = MISC_DYNAMIC_MINOR,
+ .mode = 0644,
.fops = &prng_sha512_fops,
};
static struct miscdevice prng_tdes_dev = {
.name = "prandom",
.minor = MISC_DYNAMIC_MINOR,
+ .mode = 0644,
.fops = &prng_tdes_fops,
};
@@ -813,14 +806,13 @@ static int __init prng_init(void)
int ret;
/* check if the CPU has a PRNG */
- if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
+ if (!cpacf_query(CPACF_KMC, CPACF_KMC_PRNG))
return -EOPNOTSUPP;
/* choose prng mode */
if (prng_mode != PRNG_MODE_TDES) {
/* check for MSA5 support for PPNO operations */
- if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN,
- CRYPT_S390_MSA5)) {
+ if (!cpacf_query(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
if (prng_mode == PRNG_MODE_SHA512) {
pr_err("The prng module cannot "
"start in SHA-512 mode\n");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 9208eadae9f0..5fbf91bbb478 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -28,8 +28,8 @@
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <crypto/sha.h>
+#include <asm/cpacf.h>
-#include "crypt_s390.h"
#include "sha.h"
static int sha1_init(struct shash_desc *desc)
@@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc)
sctx->state[3] = SHA1_H3;
sctx->state[4] = SHA1_H4;
sctx->count = 0;
- sctx->func = KIMD_SHA_1;
+ sctx->func = CPACF_KIMD_SHA_1;
return 0;
}
@@ -66,7 +66,7 @@ static int sha1_import(struct shash_desc *desc, const void *in)
sctx->count = ictx->count;
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
- sctx->func = KIMD_SHA_1;
+ sctx->func = CPACF_KIMD_SHA_1;
return 0;
}
@@ -82,7 +82,7 @@ static struct shash_alg alg = {
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -91,7 +91,7 @@ static struct shash_alg alg = {
static int __init sha1_s390_init(void)
{
- if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA))
+ if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_1))
return -EOPNOTSUPP;
return crypto_register_shash(&alg);
}
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 667888f5c964..10aac0b11988 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -18,8 +18,8 @@
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <crypto/sha.h>
+#include <asm/cpacf.h>
-#include "crypt_s390.h"
#include "sha.h"
static int sha256_init(struct shash_desc *desc)
@@ -35,7 +35,7 @@ static int sha256_init(struct shash_desc *desc)
sctx->state[6] = SHA256_H6;
sctx->state[7] = SHA256_H7;
sctx->count = 0;
- sctx->func = KIMD_SHA_256;
+ sctx->func = CPACF_KIMD_SHA_256;
return 0;
}
@@ -59,7 +59,7 @@ static int sha256_import(struct shash_desc *desc, const void *in)
sctx->count = ictx->count;
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = KIMD_SHA_256;
+ sctx->func = CPACF_KIMD_SHA_256;
return 0;
}
@@ -75,7 +75,7 @@ static struct shash_alg sha256_alg = {
.base = {
.cra_name = "sha256",
.cra_driver_name= "sha256-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -95,7 +95,7 @@ static int sha224_init(struct shash_desc *desc)
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
- sctx->func = KIMD_SHA_256;
+ sctx->func = CPACF_KIMD_SHA_256;
return 0;
}
@@ -112,7 +112,7 @@ static struct shash_alg sha224_alg = {
.base = {
.cra_name = "sha224",
.cra_driver_name= "sha224-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -123,7 +123,7 @@ static int __init sha256_s390_init(void)
{
int ret;
- if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA))
+ if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_256))
return -EOPNOTSUPP;
ret = crypto_register_shash(&sha256_alg);
if (ret < 0)
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 2ba66b1518f0..ea85757be407 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -19,9 +19,9 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
+#include <asm/cpacf.h>
#include "sha.h"
-#include "crypt_s390.h"
static int sha512_init(struct shash_desc *desc)
{
@@ -36,7 +36,7 @@ static int sha512_init(struct shash_desc *desc)
*(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL;
*(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL;
ctx->count = 0;
- ctx->func = KIMD_SHA_512;
+ ctx->func = CPACF_KIMD_SHA_512;
return 0;
}
@@ -64,7 +64,7 @@ static int sha512_import(struct shash_desc *desc, const void *in)
memcpy(sctx->state, ictx->state, sizeof(ictx->state));
memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
- sctx->func = KIMD_SHA_512;
+ sctx->func = CPACF_KIMD_SHA_512;
return 0;
}
@@ -80,7 +80,7 @@ static struct shash_alg sha512_alg = {
.base = {
.cra_name = "sha512",
.cra_driver_name= "sha512-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@@ -102,7 +102,7 @@ static int sha384_init(struct shash_desc *desc)
*(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL;
*(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL;
ctx->count = 0;
- ctx->func = KIMD_SHA_512;
+ ctx->func = CPACF_KIMD_SHA_512;
return 0;
}
@@ -119,7 +119,7 @@ static struct shash_alg sha384_alg = {
.base = {
.cra_name = "sha384",
.cra_driver_name= "sha384-s390",
- .cra_priority = CRYPT_S390_PRIORITY,
+ .cra_priority = 300,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_sha_ctx),
@@ -133,7 +133,7 @@ static int __init init(void)
{
int ret;
- if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA))
+ if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_512))
return -EOPNOTSUPP;
if ((ret = crypto_register_shash(&sha512_alg)) < 0)
goto out;
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 8620b0ec9c42..8e908166c3ee 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -15,8 +15,8 @@
#include <crypto/internal/hash.h>
#include <linux/module.h>
+#include <asm/cpacf.h>
#include "sha.h"
-#include "crypt_s390.h"
int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
{
@@ -35,7 +35,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
/* process one stored block */
if (index) {
memcpy(ctx->buf + index, data, bsize - index);
- ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize);
+ ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
if (ret != bsize)
return -EIO;
data += bsize - index;
@@ -45,8 +45,8 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
/* process as many blocks as possible */
if (len >= bsize) {
- ret = crypt_s390_kimd(ctx->func, ctx->state, data,
- len & ~(bsize - 1));
+ ret = cpacf_kimd(ctx->func, ctx->state, data,
+ len & ~(bsize - 1));
if (ret != (len & ~(bsize - 1)))
return -EIO;
data += ret;
@@ -89,7 +89,7 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
bits = ctx->count * 8;
memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
- ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end);
+ ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
if (ret != end)
return -EIO;
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index e24f2af4c73b..3f571ea89509 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,8 +1,8 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_FHANDLE=y
+CONFIG_USELIB=y
CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
@@ -11,19 +11,19 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
-CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
@@ -44,7 +44,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
CONFIG_LIVEPATCH=y
-CONFIG_MARCH_Z196=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_HZ_100=y
@@ -52,6 +51,14 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CLEANCACHE=y
+CONFIG_FRONTSWAP=y
+CONFIG_CMA=y
+CONFIG_ZSWAP=y
+CONFIG_ZBUD=m
+CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC_STAT=y
+CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@@ -61,7 +68,6 @@ CONFIG_UNIX=y
CONFIG_NET_KEY=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
-# CONFIG_INET_LRO is not set
CONFIG_L2TP=m
CONFIG_L2TP_DEBUGFS=m
CONFIG_VLAN_8021Q=y
@@ -144,6 +150,9 @@ CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
@@ -158,20 +167,21 @@ CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PI_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_RCU_TRACE=y
CONFIG_LATENCYTOP=y
CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
-CONFIG_TRACER_SNAPSHOT=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_UPROBE_EVENT=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_TRACE_ENUM_MAP_FILE=y
CONFIG_KPROBES_SANITY_TEST=y
-# CONFIG_STRICT_DEVMEM is not set
CONFIG_S390_PTDUMP=y
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
@@ -212,8 +222,6 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-CONFIG_CRYPTO_ZLIB=m
-CONFIG_CRYPTO_LZO=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ANSI_CPRNG=m
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 0f3da2cb2bd6..255c7eec4481 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -278,8 +278,8 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
sbi->uid = current_uid();
sbi->gid = current_gid();
sb->s_fs_info = sbi;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = HYPFS_MAGIC;
sb->s_op = &hypfs_s_ops;
if (hypfs_parse_options(data, sb))
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 4d7ccac5fd1d..22da3b34c655 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -15,4 +15,7 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+/* Read-only memory is marked before mark_rodata_ro() is called. */
+#define __ro_after_init __read_mostly
+
#endif
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 740364856355..d7f100c53f07 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -91,8 +91,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
* returns a 32-bit checksum
*/
static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- unsigned short len, unsigned short proto,
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
__wsum sum)
{
__u32 csum = (__force __u32)sum;
@@ -118,8 +117,7 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
*/
static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- unsigned short len, unsigned short proto,
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
__wsum sum)
{
return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index a0e71a501f7c..5687d62fb0cb 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -4,14 +4,23 @@
/* CLP common request & response block size */
#define CLP_BLK_SIZE PAGE_SIZE
+#define CLP_LPS_BASE 0
+#define CLP_LPS_PCI 2
+
struct clp_req_hdr {
u16 len;
u16 cmd;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
} __packed;
struct clp_rsp_hdr {
u16 len;
u16 rsp;
+ u32 fmt : 4;
+ u32 reserved1 : 28;
+ u64 reserved2;
} __packed;
/* CLP Response Codes */
@@ -25,4 +34,22 @@ struct clp_rsp_hdr {
#define CLP_RC_NODATA 0x0080 /* No data available */
#define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */
+/* Store logical-processor characteristics request */
+struct clp_req_slpc {
+ struct clp_req_hdr hdr;
+} __packed;
+
+struct clp_rsp_slpc {
+ struct clp_rsp_hdr hdr;
+ u32 reserved2[4];
+ u32 lpif[8];
+ u32 reserved3[8];
+ u32 lpic[8];
+} __packed;
+
+struct clp_req_rsp_slpc {
+ struct clp_req_slpc request;
+ struct clp_rsp_slpc response;
+} __packed;
+
#endif
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
new file mode 100644
index 000000000000..1a82cf26ee11
--- /dev/null
+++ b/arch/s390/include/asm/cpacf.h
@@ -0,0 +1,410 @@
+/*
+ * CP Assist for Cryptographic Functions (CPACF)
+ *
+ * Copyright IBM Corp. 2003, 2016
+ * Author(s): Thomas Spatzier
+ * Jan Glauber
+ * Harald Freudenberger (freude@de.ibm.com)
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_CPACF_H
+#define _ASM_S390_CPACF_H
+
+#include <asm/facility.h>
+
+/*
+ * Instruction opcodes for the CPACF instructions
+ */
+#define CPACF_KMAC 0xb91e /* MSA */
+#define CPACF_KM 0xb92e /* MSA */
+#define CPACF_KMC 0xb92f /* MSA */
+#define CPACF_KIMD 0xb93e /* MSA */
+#define CPACF_KLMD 0xb93f /* MSA */
+#define CPACF_PCC 0xb92c /* MSA4 */
+#define CPACF_KMCTR 0xb92d /* MSA4 */
+#define CPACF_PPNO 0xb93c /* MSA5 */
+
+/*
+ * Function codes for the KM (CIPHER MESSAGE)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_KM_QUERY 0x00
+#define CPACF_KM_DEA_ENC 0x01
+#define CPACF_KM_DEA_DEC 0x81
+#define CPACF_KM_TDEA_128_ENC 0x02
+#define CPACF_KM_TDEA_128_DEC 0x82
+#define CPACF_KM_TDEA_192_ENC 0x03
+#define CPACF_KM_TDEA_192_DEC 0x83
+#define CPACF_KM_AES_128_ENC 0x12
+#define CPACF_KM_AES_128_DEC 0x92
+#define CPACF_KM_AES_192_ENC 0x13
+#define CPACF_KM_AES_192_DEC 0x93
+#define CPACF_KM_AES_256_ENC 0x14
+#define CPACF_KM_AES_256_DEC 0x94
+#define CPACF_KM_XTS_128_ENC 0x32
+#define CPACF_KM_XTS_128_DEC 0xb2
+#define CPACF_KM_XTS_256_ENC 0x34
+#define CPACF_KM_XTS_256_DEC 0xb4
+
+/*
+ * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_KMC_QUERY 0x00
+#define CPACF_KMC_DEA_ENC 0x01
+#define CPACF_KMC_DEA_DEC 0x81
+#define CPACF_KMC_TDEA_128_ENC 0x02
+#define CPACF_KMC_TDEA_128_DEC 0x82
+#define CPACF_KMC_TDEA_192_ENC 0x03
+#define CPACF_KMC_TDEA_192_DEC 0x83
+#define CPACF_KMC_AES_128_ENC 0x12
+#define CPACF_KMC_AES_128_DEC 0x92
+#define CPACF_KMC_AES_192_ENC 0x13
+#define CPACF_KMC_AES_192_DEC 0x93
+#define CPACF_KMC_AES_256_ENC 0x14
+#define CPACF_KMC_AES_256_DEC 0x94
+#define CPACF_KMC_PRNG 0x43
+
+/*
+ * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_KMCTR_QUERY 0x00
+#define CPACF_KMCTR_DEA_ENC 0x01
+#define CPACF_KMCTR_DEA_DEC 0x81
+#define CPACF_KMCTR_TDEA_128_ENC 0x02
+#define CPACF_KMCTR_TDEA_128_DEC 0x82
+#define CPACF_KMCTR_TDEA_192_ENC 0x03
+#define CPACF_KMCTR_TDEA_192_DEC 0x83
+#define CPACF_KMCTR_AES_128_ENC 0x12
+#define CPACF_KMCTR_AES_128_DEC 0x92
+#define CPACF_KMCTR_AES_192_ENC 0x13
+#define CPACF_KMCTR_AES_192_DEC 0x93
+#define CPACF_KMCTR_AES_256_ENC 0x14
+#define CPACF_KMCTR_AES_256_DEC 0x94
+
+/*
+ * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_KIMD_QUERY 0x00
+#define CPACF_KIMD_SHA_1 0x01
+#define CPACF_KIMD_SHA_256 0x02
+#define CPACF_KIMD_SHA_512 0x03
+#define CPACF_KIMD_GHASH 0x41
+
+/*
+ * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_KLMD_QUERY 0x00
+#define CPACF_KLMD_SHA_1 0x01
+#define CPACF_KLMD_SHA_256 0x02
+#define CPACF_KLMD_SHA_512 0x03
+
+/*
+ * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_KMAC_QUERY 0x00
+#define CPACF_KMAC_DEA 0x01
+#define CPACF_KMAC_TDEA_128 0x02
+#define CPACF_KMAC_TDEA_192 0x03
+
+/*
+ * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * instruction (0x80 is the decipher modifier bit)
+ */
+#define CPACF_PPNO_QUERY 0x00
+#define CPACF_PPNO_SHA512_DRNG_GEN 0x03
+#define CPACF_PPNO_SHA512_DRNG_SEED 0x83
+
+/**
+ * cpacf_query() - check if a specific CPACF function is available
+ * @opcode: the opcode of the crypto instruction
+ * @func: the function code to test for
+ *
+ * Executes the query function for the given crypto instruction @opcode
+ * and checks if @func is available
+ *
+ * Returns 1 if @func is available for @opcode, 0 otherwise
+ */
+static inline void __cpacf_query(unsigned int opcode, unsigned char *status)
+{
+ typedef struct { unsigned char _[16]; } status_type;
+ register unsigned long r0 asm("0") = 0; /* query function */
+ register unsigned long r1 asm("1") = (unsigned long) status;
+
+ asm volatile(
+ /* Parameter registers are ignored, but may not be 0 */
+ "0: .insn rrf,%[opc] << 16,2,2,2,0\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : "=m" (*(status_type *) status)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
+ : "cc");
+}
+
+static inline int cpacf_query(unsigned int opcode, unsigned int func)
+{
+ unsigned char status[16];
+
+ switch (opcode) {
+ case CPACF_KMAC:
+ case CPACF_KM:
+ case CPACF_KMC:
+ case CPACF_KIMD:
+ case CPACF_KLMD:
+ if (!test_facility(17)) /* check for MSA */
+ return 0;
+ break;
+ case CPACF_PCC:
+ case CPACF_KMCTR:
+ if (!test_facility(77)) /* check for MSA4 */
+ return 0;
+ break;
+ case CPACF_PPNO:
+ if (!test_facility(57)) /* check for MSA5 */
+ return 0;
+ break;
+ default:
+ BUG();
+ }
+ __cpacf_query(opcode, status);
+ return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+/**
+ * cpacf_km() - executes the KM (CIPHER MESSAGE) instruction
+ * @func: the function code passed to KM; see CPACF_KM_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_km(long func, void *param,
+ u8 *dest, const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) dest;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KM)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kmc() - executes the KMC (CIPHER MESSAGE WITH CHAINING) instruction
+ * @func: the function code passed to KM; see CPACF_KMC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_kmc(long func, void *param,
+ u8 *dest, const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) dest;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,%[dst],%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3), [dst] "+a" (r4)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMC)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kimd() - executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ * instruction
+ * @func: the function code passed to KM; see CPACF_KIMD_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_kimd(long func, void *param,
+ const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_klmd() - executes the KLMD (COMPUTE LAST MESSAGE DIGEST) instruction
+ * @func: the function code passed to KM; see CPACF_KLMD_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_klmd(long func, void *param,
+ const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kmac() - executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ * @func: the function code passed to KM; see CPACF_KMAC_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Returns 0 for the query func, number of processed bytes for digest funcs
+ */
+static inline int cpacf_kmac(long func, void *param,
+ const u8 *src, long src_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,%[src]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMAC)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_kmctr() - executes the KMCTR (CIPHER MESSAGE WITH COUNTER) instruction
+ * @func: the function code passed to KMCTR; see CPACF_KMCTR_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Returns 0 for the query func, number of processed bytes for
+ * encryption/decryption funcs
+ */
+static inline int cpacf_kmctr(long func, void *param, u8 *dest,
+ const u8 *src, long src_len, u8 *counter)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) src;
+ register unsigned long r3 asm("3") = (unsigned long) src_len;
+ register unsigned long r4 asm("4") = (unsigned long) dest;
+ register unsigned long r6 asm("6") = (unsigned long) counter;
+
+ asm volatile(
+ "0: .insn rrf,%[opc] << 16,%[dst],%[src],%[ctr],0\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [src] "+a" (r2), [len] "+d" (r3),
+ [dst] "+a" (r4), [ctr] "+a" (r6)
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KMCTR)
+ : "cc", "memory");
+
+ return src_len - r3;
+}
+
+/**
+ * cpacf_ppno() - executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * instruction
+ * @func: the function code passed to PPNO; see CPACF_PPNO_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ *
+ * Returns 0 for the query func, number of random bytes stored in
+ * dest buffer for generate function
+ */
+static inline int cpacf_ppno(long func, void *param,
+ u8 *dest, long dest_len,
+ const u8 *seed, long seed_len)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+ register unsigned long r2 asm("2") = (unsigned long) dest;
+ register unsigned long r3 asm("3") = (unsigned long) dest_len;
+ register unsigned long r4 asm("4") = (unsigned long) seed;
+ register unsigned long r5 asm("5") = (unsigned long) seed_len;
+
+ asm volatile (
+ "0: .insn rre,%[opc] << 16,%[dst],%[seed]\n"
+ " brc 1,0b\n" /* handle partial completion */
+ : [dst] "+a" (r2), [dlen] "+d" (r3)
+ : [fc] "d" (r0), [pba] "a" (r1),
+ [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO)
+ : "cc", "memory");
+
+ return dest_len - r3;
+}
+
+/**
+ * cpacf_pcc() - executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION)
+ * instruction
+ * @func: the function code passed to PCC; see CPACF_KM_xxx defines
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Returns 0.
+ */
+static inline int cpacf_pcc(long func, void *param)
+{
+ register unsigned long r0 asm("0") = (unsigned long) func;
+ register unsigned long r1 asm("1") = (unsigned long) param;
+
+ asm volatile(
+ "0: .insn rre,%[opc] << 16,0,0\n" /* PCC opcode */
+ " brc 1,0b\n" /* handle partial completion */
+ :
+ : [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC)
+ : "cc", "memory");
+
+ return 0;
+}
+
+#endif /* _ASM_S390_CPACF_H */
diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h
index d8f9872b0e2d..4a9f35e0973f 100644
--- a/arch/s390/include/asm/device.h
+++ b/arch/s390/include/asm/device.h
@@ -3,5 +3,9 @@
*
* This file is released under the GPLv2
*/
-#include <asm-generic/device.h>
+struct dev_archdata {
+ struct dma_map_ops *dma_ops;
+};
+struct pdev_archdata {
+};
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index e64bfcb9702f..3249b7464889 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -11,11 +11,13 @@
#define DMA_ERROR_CODE (~(dma_addr_t) 0x0)
-extern struct dma_map_ops s390_dma_ops;
+extern struct dma_map_ops s390_pci_dma_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
- return &s390_dma_ops;
+ if (dev && dev->archdata.dma_ops)
+ return dev->archdata.dma_ops;
+ return &dma_noop_ops;
}
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 5e04f3cbd320..8ae236b0f80b 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc)
" la %0,0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "=d" (rc), "=d" (orig_fpc)
+ : "=d" (rc), "=&d" (orig_fpc)
: "d" (fpc), "0" (-EINVAL));
return rc;
}
diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h
index 14a8b0c14f87..fe937c9b6471 100644
--- a/arch/s390/include/asm/fpu/types.h
+++ b/arch/s390/include/asm/fpu/types.h
@@ -11,11 +11,13 @@
#include <asm/sigcontext.h>
struct fpu {
- __u32 fpc; /* Floating-point control */
+ __u32 fpc; /* Floating-point control */
+ void *regs; /* Pointer to the current save area */
union {
- void *regs;
- freg_t *fprs; /* Floating-point register save area */
- __vector128 *vxrs; /* Vector register save area */
+ /* Floating-point register save area */
+ freg_t fprs[__NUM_FPRS];
+ /* Vector register save area */
+ __vector128 vxrs[__NUM_VXRS];
};
};
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 836c56290499..64053d9ac3f2 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -12,7 +12,9 @@
#ifndef __ASSEMBLY__
-#define ftrace_return_address(n) __builtin_return_address(n)
+unsigned long return_address(int depth);
+
+#define ftrace_return_address(n) return_address(n)
void _mcount(void);
void ftrace_caller(void);
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
new file mode 100644
index 000000000000..d054c1b07a3c
--- /dev/null
+++ b/arch/s390/include/asm/gmap.h
@@ -0,0 +1,64 @@
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_GMAP_H
+#define _ASM_S390_GMAP_H
+
+/**
+ * struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ */
+struct gmap {
+ struct list_head list;
+ struct list_head crst_list;
+ struct mm_struct *mm;
+ struct radix_tree_root guest_to_host;
+ struct radix_tree_root host_to_guest;
+ spinlock_t guest_table_lock;
+ unsigned long *table;
+ unsigned long asce;
+ unsigned long asce_end;
+ void *private;
+ bool pfault_enabled;
+};
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+ struct list_head list;
+ void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
+};
+
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
+void gmap_free(struct gmap *gmap);
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+
+#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8959ebb6d2c9..ac82e8eb936d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -20,6 +20,7 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
+#include <linux/seqlock.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@@ -37,7 +38,7 @@
*/
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 4096
-#define KVM_HALT_POLL_NS_DEFAULT 0
+#define KVM_HALT_POLL_NS_DEFAULT 80000
/* s390-specific vcpu->requests bit members */
#define KVM_REQ_ENABLE_IBS 8
@@ -229,17 +230,11 @@ struct kvm_s390_itdb {
__u8 data[256];
} __packed;
-struct kvm_s390_vregs {
- __vector128 vrs[32];
- __u8 reserved200[512]; /* for future vector expansion */
-} __packed;
-
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct kvm_s390_itdb itdb; /* 0x0600 */
- __u8 reserved700[1280]; /* 0x0700 */
- struct kvm_s390_vregs vregs; /* 0x0c00 */
+ __u8 reserved700[2304]; /* 0x0700 */
} __packed;
struct kvm_vcpu_stat {
@@ -250,8 +245,10 @@ struct kvm_vcpu_stat {
u32 exit_stop_request;
u32 exit_validity;
u32 exit_instruction;
+ u32 exit_pei;
u32 halt_successful_poll;
u32 halt_attempted_poll;
+ u32 halt_poll_invalid;
u32 halt_wakeup;
u32 instruction_lctl;
u32 instruction_lctlg;
@@ -467,7 +464,7 @@ struct kvm_s390_irq_payload {
struct kvm_s390_local_interrupt {
spinlock_t lock;
struct kvm_s390_float_interrupt *float_int;
- wait_queue_head_t *wq;
+ struct swait_queue_head *wq;
atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
@@ -549,15 +546,20 @@ struct kvm_vcpu_arch {
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
- union {
- struct cpuid cpu_id;
- u64 stidp_data;
- };
struct gmap *gmap;
struct kvm_guestdbg_info_arch guestdbg;
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
+ bool cputm_enabled;
+ /*
+ * The seqcount protects updates to cputm_start and sie_block.cputm,
+ * this way we can have non-blocking reads with consistent values.
+ * Only the owning VCPU thread (vcpu->cpu) is allowed to change these
+ * values and to start/stop/enable/disable cpu timer accounting.
+ */
+ seqcount_t cputm_seqcount;
+ __u64 cputm_start;
};
struct kvm_vm_stat {
@@ -596,16 +598,12 @@ struct s390_io_adapter {
#define S390_ARCH_FAC_MASK_SIZE_U64 \
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
-struct kvm_s390_fac {
- /* facility list requested by guest */
- __u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
- /* facility mask supported by kvm & hosting machine */
- __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
-};
-
struct kvm_s390_cpu_model {
- struct kvm_s390_fac *fac;
- struct cpuid cpu_id;
+ /* facility mask supported by kvm & hosting machine */
+ __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
+ /* facility list requested by guest (in dma page) */
+ __u64 *fac_list;
+ u64 cpuid;
unsigned short ibc;
};
@@ -623,6 +621,16 @@ struct kvm_s390_crypto_cb {
__u8 reserved80[128]; /* 0x0080 */
};
+/*
+ * sie_page2 has to be allocated as DMA because fac_list and crycb need
+ * 31bit addresses in the sie control block.
+ */
+struct sie_page2 {
+ __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
+ struct kvm_s390_crypto_cb crycb; /* 0x0800 */
+ u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
+} __packed;
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -643,6 +651,7 @@ struct kvm_arch{
int ipte_lock_count;
struct mutex ipte_mutex;
spinlock_t start_stop_lock;
+ struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;
@@ -689,4 +698,6 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
+
#endif
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index a52b6cca873d..2c1213785892 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -19,25 +19,14 @@
#include <linux/module.h>
-#ifdef CONFIG_LIVEPATCH
static inline int klp_check_compiler_support(void)
{
return 0;
}
-static inline int klp_write_module_reloc(struct module *mod, unsigned long
- type, unsigned long loc, unsigned long value)
-{
- /* not supported yet */
- return -ENOSYS;
-}
-
static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
{
regs->psw.addr = ip;
}
-#else
-#error Include linux/livepatch.h, not asm/livepatch.h
-#endif
#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index d29ad9545b41..081b2ad99d73 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,7 +11,7 @@ typedef struct {
spinlock_t list_lock;
struct list_head pgtable_list;
struct list_head gmap_list;
- unsigned long asce_bits;
+ unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
/* The mmu context allocates 4K page tables. */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index e485817f7b1a..c837b79b455d 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
#endif
- if (mm->context.asce_limit == 0) {
+ switch (mm->context.asce_limit) {
+ case 1UL << 42:
+ /*
+ * forked 3-level task, fall through to set new asce with new
+ * mm->pgd
+ */
+ case 0:
/* context created by exec, set asce limit to 4TB */
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
mm->context.asce_limit = STACK_TOP_MAX;
- } else if (mm->context.asce_limit == (1UL << 31)) {
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ break;
+ case 1UL << 53:
+ /* forked 4-level task, set new asce with new mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ break;
+ case 1UL << 31:
+ /* forked 2-level compat task, set new asce with new mm->pgd */
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+ /* pgd_alloc() did not increase mm->nr_pmds */
mm_inc_nr_pmds(mm);
}
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
@@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk,
static inline void set_user_asce(struct mm_struct *mm)
{
- S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+ S390_lowcore.user_asce = mm->context.asce;
if (current->thread.mm_segment.ar4)
__ctl_load(S390_lowcore.user_asce, 7, 7);
set_cpu_flag(CIF_ASCE);
@@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
int cpu = smp_processor_id();
- S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
+ S390_lowcore.user_asce = next->context.asce;
if (prev == next)
return;
if (MACHINE_HAS_TLB_LC)
@@ -136,4 +152,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ /* by default, allow everything */
+ return true;
+}
+
+static inline bool arch_pte_access_permitted(pte_t pte, bool write)
+{
+ /* by default, allow everything */
+ return true;
+}
#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c873e682b67f..0da91c4d30fd 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -9,7 +9,6 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <asm-generic/pci.h>
-#include <asm-generic/pci-dma-compat.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -32,20 +31,42 @@ int pci_proc_domain(struct pci_bus *);
#define ZPCI_FC_BLOCKED 0x20
#define ZPCI_FC_DMA_ENABLED 0x10
+#define ZPCI_FMB_DMA_COUNTER_VALID (1 << 23)
+
+struct zpci_fmb_fmt0 {
+ u64 dma_rbytes;
+ u64 dma_wbytes;
+};
+
+struct zpci_fmb_fmt1 {
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 tx_packets;
+};
+
+struct zpci_fmb_fmt2 {
+ u64 consumed_work_units;
+ u64 max_work_units;
+};
+
struct zpci_fmb {
- u32 format : 8;
- u32 dma_valid : 1;
- u32 : 23;
+ u32 format : 8;
+ u32 fmt_ind : 24;
u32 samples;
u64 last_update;
- /* hardware counters */
+ /* common counters */
u64 ld_ops;
u64 st_ops;
u64 stb_ops;
u64 rpcit_ops;
- u64 dma_rbytes;
- u64 dma_wbytes;
-} __packed __aligned(16);
+ /* format specific counters */
+ union {
+ struct zpci_fmb_fmt0 fmt0;
+ struct zpci_fmb_fmt1 fmt1;
+ struct zpci_fmb_fmt2 fmt2;
+ };
+} __packed __aligned(128);
enum zpci_state {
ZPCI_FN_STATE_RESERVED,
@@ -66,7 +87,6 @@ struct s390_domain;
/* Private data per function */
struct zpci_dev {
- struct pci_dev *pdev;
struct pci_bus *bus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
@@ -192,7 +212,7 @@ int zpci_fmb_disable_device(struct zpci_dev *);
/* Debug */
int zpci_debug_init(void);
void zpci_debug_exit(void);
-void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
void zpci_debug_info(struct zpci_dev *, struct seq_file *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index dd78f92f1cce..e75c64cbcf08 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -49,9 +49,6 @@ struct clp_fh_list_entry {
/* List PCI functions request */
struct clp_req_list_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u64 resume_token;
u64 reserved2;
} __packed;
@@ -59,9 +56,6 @@ struct clp_req_list_pci {
/* List PCI functions response */
struct clp_rsp_list_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u64 resume_token;
u32 reserved2;
u16 max_fn;
@@ -73,9 +67,6 @@ struct clp_rsp_list_pci {
/* Query PCI function request */
struct clp_req_query_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u32 reserved2;
u64 reserved3;
@@ -84,9 +75,6 @@ struct clp_req_query_pci {
/* Query PCI function response */
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 : 64;
u16 vfn; /* virtual fn number */
u16 : 7;
u16 util_str_avail : 1; /* utility string available? */
@@ -108,21 +96,15 @@ struct clp_rsp_query_pci {
/* Query PCI function group request */
struct clp_req_query_pci_grp {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
- u32 : 24;
+ u32 reserved2 : 24;
u32 pfgid : 8; /* function group id */
- u32 reserved2;
- u64 reserved3;
+ u32 reserved3;
+ u64 reserved4;
} __packed;
/* Query PCI function group response */
struct clp_rsp_query_pci_grp {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u16 : 4;
u16 noi : 12; /* number of interrupts */
u8 version;
@@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp {
/* Set PCI function request */
struct clp_req_set_pci {
struct clp_req_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u16 reserved2;
u8 oc; /* operation controls */
@@ -154,9 +133,6 @@ struct clp_req_set_pci {
/* Set PCI function response */
struct clp_rsp_set_pci {
struct clp_rsp_hdr hdr;
- u32 fmt : 4; /* cmd request block format */
- u32 : 28;
- u64 reserved1;
u32 fh; /* function handle */
u32 reserved3;
u64 reserved4;
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 6d6556ca24aa..90240dfef76a 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -178,7 +178,6 @@
ret__; \
})
-#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
#include <asm-generic/percpu.h>
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index f897ec73dc8c..1f7ff85c5e4c 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -21,7 +21,7 @@
#define PMU_F_ERR_LSDA 0x0200
#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
extern __init const struct attribute_group **cpumf_cf_event_group(void);
extern ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr,
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d7cc79fb6191..da34cb6b1f3b 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
extern int page_table_allocate_pgste;
-int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
- unsigned long key, bool nq);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
-
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
typedef struct { char _[n]; } addrtype;
@@ -56,8 +52,8 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
return _REGION2_ENTRY_EMPTY;
}
-int crst_table_upgrade(struct mm_struct *, unsigned long limit);
-void crst_table_downgrade(struct mm_struct *, unsigned long limit);
+int crst_table_upgrade(struct mm_struct *);
+void crst_table_downgrade(struct mm_struct *);
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 64ead8091248..18d2beb89340 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr)
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- * dy..R...I...wr
+ * dy..R...I...rw
* prot-none, clean, old 00..1...1...00
* prot-none, clean, young 01..1...1...00
* prot-none, dirty, old 10..1...1...00
* prot-none, dirty, young 11..1...1...00
- * read-only, clean, old 00..1...1...01
- * read-only, clean, young 01..1...0...01
- * read-only, dirty, old 10..1...1...01
- * read-only, dirty, young 11..1...0...01
+ * read-only, clean, old 00..1...1...10
+ * read-only, clean, young 01..1...0...10
+ * read-only, dirty, old 10..1...1...10
+ * read-only, dirty, young 11..1...0...10
* read-write, clean, old 00..1...1...11
* read-write, clean, young 01..1...0...11
* read-write, dirty, old 10..0...1...11
@@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd)
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
-#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty);
-
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
@@ -631,208 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return pmd;
}
-static inline pgste_t pgste_get_lock(pte_t *ptep)
-{
- unsigned long new = 0;
-#ifdef CONFIG_PGSTE
- unsigned long old;
-
- preempt_disable();
- asm(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " nihh %0,0xff7f\n" /* clear PCL bit in old */
- " oihh %1,0x0080\n" /* set PCL bit in new */
- " csg %0,%1,%2\n"
- " jl 0b\n"
- : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
- : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
-#endif
- return __pgste(new);
-}
-
-static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- asm(
- " nihh %1,0xff7f\n" /* clear PCL bit */
- " stg %1,%0\n"
- : "=Q" (ptep[PTRS_PER_PTE])
- : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
- : "cc", "memory");
- preempt_enable();
-#endif
-}
-
-static inline pgste_t pgste_get(pte_t *ptep)
-{
- unsigned long pgste = 0;
-#ifdef CONFIG_PGSTE
- pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
-#endif
- return __pgste(pgste);
-}
-
-static inline void pgste_set(pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
-#endif
-}
-
-static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
- struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
- unsigned long address, bits, skey;
-
- if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
- return pgste;
- address = pte_val(*ptep) & PAGE_MASK;
- skey = (unsigned long) page_get_storage_key(address);
- bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
- /* Transfer page changed & referenced bit to guest bits in pgste */
- pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
- /* Copy page access key and fetch protection bit to pgste */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
-#endif
- return pgste;
-
-}
-
-static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
- struct mm_struct *mm)
-{
-#ifdef CONFIG_PGSTE
- unsigned long address;
- unsigned long nkey;
-
- if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
- return;
- VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
- address = pte_val(entry) & PAGE_MASK;
- /*
- * Set page access key and fetch protection bit from pgste.
- * The guest C/R information is still in the PGSTE, set real
- * key C/R to 0.
- */
- nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
- nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
- page_set_storage_key(address, nkey, 0);
-#endif
-}
-
-static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
-{
- if ((pte_val(entry) & _PAGE_PRESENT) &&
- (pte_val(entry) & _PAGE_WRITE) &&
- !(pte_val(entry) & _PAGE_INVALID)) {
- if (!MACHINE_HAS_ESOP) {
- /*
- * Without enhanced suppression-on-protection force
- * the dirty bit on for all writable ptes.
- */
- pte_val(entry) |= _PAGE_DIRTY;
- pte_val(entry) &= ~_PAGE_PROTECT;
- }
- if (!(pte_val(entry) & _PAGE_PROTECT))
- /* This pte allows write access, set user-dirty */
- pgste_val(pgste) |= PGSTE_UC_BIT;
- }
- *ptep = entry;
- return pgste;
-}
-
-/**
- * struct gmap_struct - guest address space
- * @crst_list: list of all crst tables used in the guest address space
- * @mm: pointer to the parent mm_struct
- * @guest_to_host: radix tree with guest to host address translation
- * @host_to_guest: radix tree with pointer to segment table entries
- * @guest_table_lock: spinlock to protect all entries in the guest page table
- * @table: pointer to the page directory
- * @asce: address space control element for gmap page table
- * @pfault_enabled: defines if pfaults are applicable for the guest
- */
-struct gmap {
- struct list_head list;
- struct list_head crst_list;
- struct mm_struct *mm;
- struct radix_tree_root guest_to_host;
- struct radix_tree_root host_to_guest;
- spinlock_t guest_table_lock;
- unsigned long *table;
- unsigned long asce;
- unsigned long asce_end;
- void *private;
- bool pfault_enabled;
-};
-
-/**
- * struct gmap_notifier - notify function block for page invalidation
- * @notifier_call: address of callback function
- */
-struct gmap_notifier {
- struct list_head list;
- void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
-};
-
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
-void gmap_free(struct gmap *gmap);
-void gmap_enable(struct gmap *gmap);
-void gmap_disable(struct gmap *gmap);
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long len);
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
-void __gmap_zap(struct gmap *, unsigned long gaddr);
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
-
-
-void gmap_register_ipte_notifier(struct gmap_notifier *);
-void gmap_unregister_ipte_notifier(struct gmap_notifier *);
-int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
-
-static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep, pgste_t pgste)
-{
-#ifdef CONFIG_PGSTE
- if (pgste_val(pgste) & PGSTE_IN_BIT) {
- pgste_val(pgste) &= ~PGSTE_IN_BIT;
- gmap_do_ipte_notify(mm, addr, ptep);
- }
-#endif
- return pgste;
-}
-
-/*
- * Certain architectures need to do special things when PTEs
- * within a page table are directly modified. Thus, the following
- * hook is made available.
- */
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t entry)
-{
- pgste_t pgste;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
- pgste_set_key(ptep, pgste, entry, mm);
- pgste = pgste_set_pte(ptep, pgste, entry);
- pgste_set_unlock(ptep, pgste);
- } else {
- *ptep = entry;
- }
-}
-
/*
* query functions pte_write/pte_dirty/pte_young only work if
* pte_present() is true. Undefined behaviour if not..
@@ -998,96 +787,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
} while (nr != 255);
}
-static inline void ptep_flush_direct(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- int active, count;
-
- if (pte_val(*ptep) & _PAGE_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __ptep_ipte_local(address, ptep);
- else
- __ptep_ipte(address, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
-
-static inline void ptep_flush_lazy(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
-{
- int active, count;
-
- if (pte_val(*ptep) & _PAGE_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
- pte_val(*ptep) |= _PAGE_INVALID;
- mm->context.flush_mm = 1;
- } else
- __ptep_ipte(address, ptep);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
-
/*
- * Get (and clear) the user dirty bit for a pte.
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ * 1) ptep_get_and_clear
+ * 2) set_pte_at
+ * 3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
*/
-static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep)
-{
- pgste_t pgste;
- pte_t pte;
- int dirty;
-
- if (!mm_has_pgste(mm))
- return 0;
- pgste = pgste_get_lock(ptep);
- dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
- pgste_val(pgste) &= ~PGSTE_UC_BIT;
- pte = *ptep;
- if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
- pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
- __ptep_ipte(addr, ptep);
- if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
- pte_val(pte) |= _PAGE_PROTECT;
- else
- pte_val(pte) |= _PAGE_INVALID;
- *ptep = pte;
- }
- pgste_set_unlock(ptep, pgste);
- return dirty;
-}
+pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte, oldpte;
- int young;
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
- }
-
- oldpte = pte = *ptep;
- ptep_flush_direct(vma->vm_mm, addr, ptep);
- young = pte_young(pte);
- pte = pte_mkold(pte);
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
+ pte_t pte = *ptep;
- return young;
+ pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte));
+ return pte_young(pte);
}
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
@@ -1097,104 +820,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
-/*
- * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
- * both clear the TLB for the unmapped pte. The reason is that
- * ptep_get_and_clear is used in common code (e.g. change_pte_range)
- * to modify an active pte. The sequence is
- * 1) ptep_get_and_clear
- * 2) set_pte_at
- * 3) flush_tlb_range
- * On s390 the tlb needs to get flushed with the modification of the pte
- * if the pte is active. The only way how this can be implemented is to
- * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
- * is a nop.
- */
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set_unlock(ptep, pgste);
- }
- return pte;
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
- unsigned long address,
- pte_t *ptep)
-{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_lazy(mm, address, ptep);
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set(ptep, pgste);
- }
- return pte;
-}
-
-static inline void ptep_modify_prot_commit(struct mm_struct *mm,
- unsigned long address,
- pte_t *ptep, pte_t pte)
-{
- pgste_t pgste;
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_get(ptep);
- pgste_set_key(ptep, pgste, pte, mm);
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
-}
+pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
- pte_t pte;
-
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- ptep_flush_direct(vma->vm_mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (mm_has_pgste(vma->vm_mm)) {
- if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
- _PGSTE_GPS_USAGE_UNUSED)
- pte_val(pte) |= _PAGE_UNUSED;
- pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
- pgste_set_unlock(ptep, pgste);
- }
- return pte;
+ return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
}
/*
@@ -1206,80 +847,66 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
*/
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
+ unsigned long addr,
pte_t *ptep, int full)
{
- pgste_t pgste;
- pte_t pte;
-
- if (!full && mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- pte = *ptep;
- if (!full)
- ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) = _PAGE_INVALID;
-
- if (!full && mm_has_pgste(mm)) {
- pgste = pgste_update_all(&pte, pgste, mm);
- pgste_set_unlock(ptep, pgste);
+ if (full) {
+ pte_t pte = *ptep;
+ *ptep = __pte(_PAGE_INVALID);
+ return pte;
}
- return pte;
+ return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- pgste_t pgste;
pte_t pte = *ptep;
- if (pte_write(pte)) {
- if (mm_has_pgste(mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
- }
-
- ptep_flush_lazy(mm, address, ptep);
- pte = pte_wrprotect(pte);
-
- if (mm_has_pgste(mm)) {
- pgste = pgste_set_pte(ptep, pgste, pte);
- pgste_set_unlock(ptep, pgste);
- } else
- *ptep = pte;
- }
- return pte;
+ if (pte_write(pte))
+ ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep,
+ unsigned long addr, pte_t *ptep,
pte_t entry, int dirty)
{
- pgste_t pgste;
- pte_t oldpte;
-
- oldpte = *ptep;
- if (pte_same(oldpte, entry))
+ if (pte_same(*ptep, entry))
return 0;
- if (mm_has_pgste(vma->vm_mm)) {
- pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
- }
+ ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
+ return 1;
+}
- ptep_flush_direct(vma->vm_mm, address, ptep);
+/*
+ * Additional functions to handle KVM guest page tables
+ */
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry);
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep , int reset);
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, bool nq);
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
- if (mm_has_pgste(vma->vm_mm)) {
- if (pte_val(oldpte) & _PAGE_INVALID)
- pgste_set_key(ptep, pgste, entry, vma->vm_mm);
- pgste = pgste_set_pte(ptep, pgste, entry);
- pgste_set_unlock(ptep, pgste);
- } else
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
+{
+ if (mm_has_pgste(mm))
+ ptep_set_pte_at(mm, addr, ptep, entry);
+ else
*ptep = entry;
- return 1;
}
/*
@@ -1476,54 +1103,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
: "cc" );
}
-static inline void pmdp_flush_direct(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
-{
- int active, count;
+pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
+pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
- return;
- if (!MACHINE_HAS_IDTE) {
- __pmdp_csp(pmdp);
- return;
- }
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
- cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- __pmdp_idte_local(address, pmdp);
- else
- __pmdp_idte(address, pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
-}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void pmdp_flush_lazy(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp,
+ pmd_t entry, int dirty)
{
- int active, count;
+ VM_BUG_ON(addr & ~HPAGE_MASK);
- if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
- return;
- active = (mm == current->active_mm) ? 1 : 0;
- count = atomic_add_return(0x10000, &mm->context.attach_count);
- if ((count & 0xffff) <= active) {
- pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
- mm->context.flush_mm = 1;
- } else if (MACHINE_HAS_IDTE)
- __pmdp_idte(address, pmdp);
- else
- __pmdp_csp(pmdp);
- atomic_sub(0x10000, &mm->context.attach_count);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
+ if (pmd_val(*pmdp) == pmd_val(entry))
+ return 0;
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry);
+ return 1;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t pmd = *pmdp;
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+ pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd));
+ return pmd_young(pmd);
+}
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ VM_BUG_ON(addr & ~HPAGE_MASK);
+ return pmdp_test_and_clear_young(vma, addr, pmdp);
+}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
@@ -1539,66 +1163,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
return pmd;
}
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- pmd_t pmd;
-
- pmd = *pmdp;
- pmdp_flush_direct(vma->vm_mm, address, pmdp);
- *pmdp = pmd_mkold(pmd);
- return pmd_young(pmd);
-}
-
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = *pmdp;
-
- pmdp_flush_direct(mm, address, pmdp);
- pmd_clear(pmdp);
- return pmd;
+ return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
- unsigned long address,
+ unsigned long addr,
pmd_t *pmdp, int full)
{
- pmd_t pmd = *pmdp;
-
- if (!full)
- pmdp_flush_lazy(mm, address, pmdp);
- pmd_clear(pmdp);
- return pmd;
+ if (full) {
+ pmd_t pmd = *pmdp;
+ *pmdp = __pmd(_SEGMENT_ENTRY_INVALID);
+ return pmd;
+ }
+ return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+ return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
}
#define __HAVE_ARCH_PMDP_INVALIDATE
static inline void pmdp_invalidate(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
- pmdp_flush_direct(vma->vm_mm, address, pmdp);
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
pmd_t pmd = *pmdp;
- if (pmd_write(pmd)) {
- pmdp_flush_direct(mm, address, pmdp);
- set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
- }
+ if (pmd_write(pmd))
+ pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd));
}
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
@@ -1617,6 +1223,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
}
+#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
return MACHINE_HAS_HPAGE ? 1 : 0;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 1c4fe129486d..9d4d311d7e52 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -105,7 +105,6 @@ typedef struct {
* Thread structure
*/
struct thread_struct {
- struct fpu fpu; /* FP and VX register save area */
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
@@ -120,6 +119,11 @@ struct thread_struct {
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
+ /*
+ * Warning: 'fpu' is dynamically-sized. It *MUST* be at
+ * the end.
+ */
+ struct fpu fpu; /* FP and VX register save area */
};
/* Flag to disable transactions. */
@@ -155,10 +159,9 @@ struct stack_frame {
#define ARCH_MIN_TASKALIGN 8
-extern __vector128 init_task_fpu_regs[__NUM_VXRS];
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
- .fpu.regs = (void *)&init_task_fpu_regs, \
+ .fpu.regs = (void *) init_task.thread.fpu.fprs, \
}
/*
@@ -175,7 +178,7 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
- crst_table_downgrade(current->mm, 1UL << 31); \
+ crst_table_downgrade(current->mm); \
execve_tail(); \
} while (0)
@@ -184,6 +187,10 @@ struct task_struct;
struct mm_struct;
struct seq_file;
+typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+void dump_trace(dump_trace_func_t func, void *data,
+ struct task_struct *task, unsigned long sp);
+
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
@@ -203,6 +210,14 @@ unsigned long get_wchan(struct task_struct *p);
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+static inline unsigned long current_stack_pointer(void)
+{
+ unsigned long sp;
+
+ asm volatile("la %0,0(15)" : "=a" (sp));
+ return sp;
+}
+
static inline unsigned short stap(void)
{
unsigned short cpu_address;
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 4b43ee7e6776..c75e4471e618 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -31,7 +31,7 @@
* This should be totally fair - if anything is waiting, a process that wants a
* lock will go to the back of the queue. When the currently active lock is
* released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consequtive readers at the
+ * that will be woken up; if there's a bunch of consecutive readers at the
* front, then they'll all be woken up, but no other readers will be.
*/
@@ -90,7 +90,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
/*
* lock for writing
*/
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline long ___down_write(struct rw_semaphore *sem)
{
signed long old, new, tmp;
@@ -104,13 +104,23 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
: "=&d" (old), "=&d" (new), "=Q" (sem->count)
: "Q" (sem->count), "m" (tmp)
: "cc", "memory");
- if (old != 0)
- rwsem_down_write_failed(sem);
+
+ return old;
}
static inline void __down_write(struct rw_semaphore *sem)
{
- __down_write_nested(sem, 0);
+ if (___down_write(sem))
+ rwsem_down_write_failed(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+ if (___down_write(sem))
+ if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+ return -EINTR;
+
+ return 0;
}
/*
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index bab456be9a4f..e4f6f73afe2f 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -69,9 +69,22 @@ struct sclp_info {
unsigned int max_cores;
unsigned long hsa_size;
unsigned long facilities;
+ unsigned int hmfai;
};
extern struct sclp_info sclp;
+struct zpci_report_error_header {
+ u8 version; /* Interface version byte */
+ u8 action; /* Action qualifier byte
+ * 1: Deconfigure and repair action requested
+ * (OpenCrypto Problem Call Home)
+ * 2: Informational Report
+ * (OpenCrypto Successful Diagnostics Execution)
+ */
+ u16 length; /* Length of Subsequent Data (up to 4K – SCLP header */
+ u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */
+} __packed;
+
int sclp_get_core_info(struct sclp_core_info *info);
int sclp_core_configure(u8 core);
int sclp_core_deconfigure(u8 core);
@@ -83,6 +96,7 @@ int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
+int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
void sclp_early_detect(void);
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
index 781a9cf9b002..e10f8337367b 100644
--- a/arch/s390/include/asm/seccomp.h
+++ b/arch/s390/include/asm/seccomp.h
@@ -13,4 +13,6 @@
#define __NR_seccomp_exit_32 __NR_exit
#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+#include <asm-generic/seccomp.h>
+
#endif /* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 69837225119e..c0f0efbb6ab5 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -101,6 +101,8 @@ extern void pfault_fini(void);
#define pfault_fini() do { } while (0)
#endif /* CONFIG_PFAULT */
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
+
extern void cmma_init(void);
extern void (*_machine_restart)(char *command);
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index ec60cf7fa0a2..1c8f33fca356 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -27,6 +27,7 @@
/* SIGP cpu status bits */
+#define SIGP_STATUS_INVALID_ORDER 0x00000002UL
#define SIGP_STATUS_CHECK_STOP 0x00000010UL
#define SIGP_STATUS_STOPPED 0x00000040UL
#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 2fffc2c27581..f15c0398c363 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -62,6 +62,7 @@ static inline struct thread_info *current_thread_info(void)
}
void arch_release_task_struct(struct task_struct *tsk);
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define THREAD_SIZE_ORDER THREAD_ORDER
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index ca148f7c3eaa..a2e6ef32e054 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -110,8 +110,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
static inline void __tlb_flush_kernel(void)
{
if (MACHINE_HAS_IDTE)
- __tlb_flush_idte((unsigned long) init_mm.pgd |
- init_mm.context.asce_bits);
+ __tlb_flush_idte(init_mm.context.asce);
else
__tlb_flush_global();
}
@@ -133,8 +132,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
static inline void __tlb_flush_kernel(void)
{
if (MACHINE_HAS_TLB_LC)
- __tlb_flush_idte_local((unsigned long) init_mm.pgd |
- init_mm.context.asce_bits);
+ __tlb_flush_idte_local(init_mm.context.asce);
else
__tlb_flush_local();
}
@@ -148,8 +146,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
* only ran on the local cpu.
*/
if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
- __tlb_flush_asce(mm, (unsigned long) mm->pgd |
- mm->context.asce_bits);
+ __tlb_flush_asce(mm, mm->context.asce);
else
__tlb_flush_full(mm);
}
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9dd4cc47ddc7..e0900ddf91dd 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -79,18 +79,12 @@ struct exception_table_entry
int insn, fixup;
};
-static inline unsigned long extable_insn(const struct exception_table_entry *x)
-{
- return (unsigned long)&x->insn + x->insn;
-}
-
static inline unsigned long extable_fixup(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
+#define ARCH_HAS_RELATIVE_EXTABLE
/**
* __copy_from_user: - Copy a block of data from user space, with less checking.
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
index c82eb12a5b18..c988df744a70 100644
--- a/arch/s390/include/asm/xor.h
+++ b/arch/s390/include/asm/xor.h
@@ -1 +1,20 @@
-#include <asm-generic/xor.h>
+/*
+ * Optimited xor routines
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#ifndef _ASM_S390_XOR_H
+#define _ASM_S390_XOR_H
+
+extern struct xor_block_template xor_block_xc;
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+do { \
+ xor_speed(&xor_block_xc); \
+} while (0)
+
+#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc)
+
+#endif /* _ASM_S390_XOR_H */
diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h
new file mode 100644
index 000000000000..ab72d9d24373
--- /dev/null
+++ b/arch/s390/include/uapi/asm/clp.h
@@ -0,0 +1,28 @@
+/*
+ * ioctl interface for /dev/clp
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_CLP_H
+#define _ASM_CLP_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+struct clp_req {
+ unsigned int c : 1;
+ unsigned int r : 1;
+ unsigned int lps : 6;
+ unsigned int cmd : 8;
+ unsigned int : 16;
+ unsigned int reserved;
+ __u64 data_p;
+};
+
+#define CLP_IOCTL_MAGIC 'c'
+
+#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 5812a3b2df9e..1340311dab77 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -187,6 +187,36 @@ typedef struct format_data_t {
#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */
#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */
+/*
+ * struct format_check_t
+ * represents all data necessary to evaluate the format of
+ * different tracks of a dasd
+ */
+typedef struct format_check_t {
+ /* Input */
+ struct format_data_t expect;
+
+ /* Output */
+ unsigned int result; /* Error indication (DASD_FMT_ERR_*) */
+ unsigned int unit; /* Track that is in error */
+ unsigned int rec; /* Record that is in error */
+ unsigned int num_records; /* Records in the track in error */
+ unsigned int blksize; /* Blocksize of first record in error */
+ unsigned int key_length; /* Key length of first record in error */
+} format_check_t;
+
+/* Values returned in format_check_t when a format error is detected: */
+/* Too few records were found on a single track */
+#define DASD_FMT_ERR_TOO_FEW_RECORDS 1
+/* Too many records were found on a single track */
+#define DASD_FMT_ERR_TOO_MANY_RECORDS 2
+/* Blocksize/data-length of a record was wrong */
+#define DASD_FMT_ERR_BLKSIZE 3
+/* A record ID is defined by cylinder, head, and record number (CHR). */
+/* On mismatch, this error is set */
+#define DASD_FMT_ERR_RECORD_ID 4
+/* If key-length was != 0 */
+#define DASD_FMT_ERR_KEY_LENGTH 5
/*
* struct attrib_data_t
@@ -288,6 +318,8 @@ struct dasd_snid_ioctl_data {
/* Get Sense Path Group ID (SNID) data */
#define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
+/* Check device format according to format_check_t */
+#define BIODASDCHECKFMT _IOWR(DASD_IOCTL_LETTER, 2, format_check_t)
#define BIODASDSYMMIO _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index fe84bd5fe7ce..3b8e99ef9d58 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
+#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -154,6 +155,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_PFAULT (1UL << 5)
#define KVM_SYNC_VRS (1UL << 6)
#define KVM_SYNC_RICCB (1UL << 7)
+#define KVM_SYNC_FPRS (1UL << 8)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -168,9 +170,12 @@ struct kvm_sync_regs {
__u64 pft; /* pfault token [PFAULT] */
__u64 pfs; /* pfault select [PFAULT] */
__u64 pfc; /* pfault compare [PFAULT] */
- __u64 vrs[32][2]; /* vector registers */
+ union {
+ __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
+ __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
+ };
__u8 reserved[512]; /* for future vector expansion */
- __u32 fpc; /* only valid with vector registers */
+ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
__u8 padding[52]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
};
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index ee69c0854c88..8fb5d4a6dd25 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -7,6 +7,7 @@
{ 0x9c, "DIAG (0x9c) time slice end directed" }, \
{ 0x204, "DIAG (0x204) logical-cpu utilization" }, \
{ 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x288, "DIAG (0x288) watchdog functions" }, \
{ 0x308, "DIAG (0x308) ipl functions" }, \
{ 0x500, "DIAG (0x500) KVM virtio functions" }, \
{ 0x501, "DIAG (0x501) KVM breakpoint" }
@@ -15,14 +16,19 @@
{ 0x01, "SIGP sense" }, \
{ 0x02, "SIGP external call" }, \
{ 0x03, "SIGP emergency signal" }, \
+ { 0x04, "SIGP start" }, \
{ 0x05, "SIGP stop" }, \
{ 0x06, "SIGP restart" }, \
{ 0x09, "SIGP stop and store status" }, \
{ 0x0b, "SIGP initial cpu reset" }, \
+ { 0x0c, "SIGP cpu reset" }, \
{ 0x0d, "SIGP set prefix" }, \
{ 0x0e, "SIGP store status at address" }, \
{ 0x12, "SIGP set architecture" }, \
- { 0x15, "SIGP sense running" }
+ { 0x13, "SIGP conditional emergency signal" }, \
+ { 0x15, "SIGP sense running" }, \
+ { 0x16, "SIGP set multithreading"}, \
+ { 0x17, "SIGP store additional status ait address"}
#define icpt_prog_codes \
{ 0x0001, "Prog Operation" }, \
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index d02e89d14fef..41b51c2f4f1b 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -94,4 +94,6 @@
#define SO_ATTACH_REUSEPORT_CBPF 51
#define SO_ATTACH_REUSEPORT_EBPF 52
+#define SO_CNX_ADVICE 53
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index ab3aa6875a59..4384bc797a54 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -311,7 +311,9 @@
#define __NR_shutdown 373
#define __NR_mlock2 374
#define __NR_copy_file_range 375
-#define NR_syscalls 376
+#define __NR_preadv2 376
+#define __NR_pwritev2 377
+#define NR_syscalls 378
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 53bbc9e8b281..1f95cc1faeb7 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <asm/idle.h>
#include <asm/vdso.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
/*
* Make sure that the compiler is new enough. We want a compiler that
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 8ba32436effe..77a84bd78be2 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -72,7 +72,6 @@ void show_cacheinfo(struct seq_file *m)
if (!test_facility(34))
return;
- get_online_cpus();
this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
cache = this_cpu_ci->info_list + idx;
@@ -86,7 +85,6 @@ void show_cacheinfo(struct seq_file *m)
seq_printf(m, "associativity=%d", cache->ways_of_associativity);
seq_puts(m, "\n");
}
- put_online_cpus();
}
static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 7f768914fb4f..7f48e568ac64 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
(((unsigned long)response + rlen) >> 31)) {
lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
if (!lowbuf) {
- pr_warning("The cpcmd kernel function failed to "
- "allocate a response buffer\n");
+ pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
return -ENOMEM;
}
spin_lock_irqsave(&cpcmd_lock, flags);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 3986c9f62191..29df8484282b 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -173,7 +173,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count)
/*
* Copy memory of the old, dumped system to a user space virtual address
*/
-int copy_oldmem_user(void __user *dst, void *src, size_t count)
+static int copy_oldmem_user(void __user *dst, void *src, size_t count)
{
unsigned long from, len;
int rc;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c890a5589e59..aa12de72fd47 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area,
/* Since debugfs currently does not support uid/gid other than root, */
/* we do not allow gid/uid != 0 until we get support for that. */
if ((uid != 0) || (gid != 0))
- pr_warning("Root becomes the owner of all s390dbf files "
- "in sysfs\n");
+ pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
BUG_ON(!initialized);
mutex_lock(&debug_mutex);
@@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view,
new_level = debug_get_uint(str);
}
if(new_level < 0) {
- pr_warning("%s is not a valid level for a debug "
- "feature\n", str);
+ pr_warn("%s is not a valid level for a debug feature\n", str);
rc = -EINVAL;
} else {
debug_set_level(id, new_level);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 62973efd214a..8cb9bfdd3ea8 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
}
if (separator)
ptr += sprintf(ptr, "%c", separator);
- /*
- * Use four '%' characters below because of the
- * following two conversions:
- *
- * 1) sprintf: %%%%r -> %%r
- * 2) printk : %%r -> %r
- */
if (operand->flags & OPERAND_GPR)
- ptr += sprintf(ptr, "%%%%r%i", value);
+ ptr += sprintf(ptr, "%%r%i", value);
else if (operand->flags & OPERAND_FPR)
- ptr += sprintf(ptr, "%%%%f%i", value);
+ ptr += sprintf(ptr, "%%f%i", value);
else if (operand->flags & OPERAND_AR)
- ptr += sprintf(ptr, "%%%%a%i", value);
+ ptr += sprintf(ptr, "%%a%i", value);
else if (operand->flags & OPERAND_CR)
- ptr += sprintf(ptr, "%%%%c%i", value);
+ ptr += sprintf(ptr, "%%c%i", value);
else if (operand->flags & OPERAND_VR)
- ptr += sprintf(ptr, "%%%%v%i", value);
+ ptr += sprintf(ptr, "%%v%i", value);
else if (operand->flags & OPERAND_PCREL)
ptr += sprintf(ptr, "%lx", (signed int) value
+ addr);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 02bd02ff648b..69f9908ac44c 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -11,6 +11,7 @@
#include <linux/export.h>
#include <linux/kdebug.h>
#include <linux/ptrace.h>
+#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <asm/processor.h>
@@ -19,28 +20,28 @@
#include <asm/ipl.h>
/*
- * For show_trace we have tree different stack to consider:
+ * For dump_trace we have tree different stack to consider:
* - the panic stack which is used if the kernel stack has overflown
* - the asynchronous interrupt stack (cpu related)
* - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stack and can potentially
+ * The stack trace can start at any of the three stacks and can potentially
* touch all of them. The order is: panic stack, async stack, sync stack.
*/
static unsigned long
-__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
+ unsigned long low, unsigned long high)
{
struct stack_frame *sf;
struct pt_regs *regs;
- unsigned long addr;
while (1) {
if (sp < low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- addr = sf->gprs[8];
- printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
/* Follow the backchain. */
while (1) {
+ if (func(data, sf->gprs[8]))
+ return sp;
low = sp;
sp = sf->back_chain;
if (!sp)
@@ -48,46 +49,82 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high)
if (sp <= low || sp > high - sizeof(*sf))
return sp;
sf = (struct stack_frame *) sp;
- addr = sf->gprs[8];
- printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
}
/* Zero backchain detected, check for interrupt frame. */
sp = (unsigned long) (sf + 1);
if (sp <= low || sp > high - sizeof(*regs))
return sp;
regs = (struct pt_regs *) sp;
- addr = regs->psw.addr;
- printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ if (!user_mode(regs)) {
+ if (func(data, regs->psw.addr))
+ return sp;
+ }
low = sp;
sp = regs->gprs[15];
}
}
-static void show_trace(struct task_struct *task, unsigned long *stack)
+void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
+ unsigned long sp)
{
- const unsigned long frame_size =
- STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- register unsigned long __r15 asm ("15");
- unsigned long sp;
+ unsigned long frame_size;
- sp = (unsigned long) stack;
- if (!sp)
- sp = task ? task->thread.ksp : __r15;
- printk("Call Trace:\n");
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
- sp = __show_trace(sp,
+ sp = __dump_trace(func, data, sp,
S390_lowcore.panic_stack + frame_size - 4096,
S390_lowcore.panic_stack + frame_size);
#endif
- sp = __show_trace(sp,
+ sp = __dump_trace(func, data, sp,
S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
S390_lowcore.async_stack + frame_size);
if (task)
- __show_trace(sp, (unsigned long) task_stack_page(task),
- (unsigned long) task_stack_page(task) + THREAD_SIZE);
+ __dump_trace(func, data, sp,
+ (unsigned long)task_stack_page(task),
+ (unsigned long)task_stack_page(task) + THREAD_SIZE);
else
- __show_trace(sp, S390_lowcore.thread_info,
+ __dump_trace(func, data, sp,
+ S390_lowcore.thread_info,
S390_lowcore.thread_info + THREAD_SIZE);
+}
+EXPORT_SYMBOL_GPL(dump_trace);
+
+struct return_address_data {
+ unsigned long address;
+ int depth;
+};
+
+static int __return_address(void *data, unsigned long address)
+{
+ struct return_address_data *rd = data;
+
+ if (rd->depth--)
+ return 0;
+ rd->address = address;
+ return 1;
+}
+
+unsigned long return_address(int depth)
+{
+ struct return_address_data rd = { .depth = depth + 2 };
+
+ dump_trace(__return_address, &rd, NULL, current_stack_pointer());
+ return rd.address;
+}
+EXPORT_SYMBOL_GPL(return_address);
+
+static int show_address(void *data, unsigned long address)
+{
+ printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ return 0;
+}
+
+static void show_trace(struct task_struct *task, unsigned long sp)
+{
+ if (!sp)
+ sp = task ? task->thread.ksp : current_stack_pointer();
+ printk("Call Trace:\n");
+ dump_trace(show_address, NULL, task, sp);
if (!task)
task = current;
debug_show_held_locks(task);
@@ -95,15 +132,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack)
void show_stack(struct task_struct *task, unsigned long *sp)
{
- register unsigned long *__r15 asm ("15");
unsigned long *stack;
int i;
- if (!sp)
- stack = task ? (unsigned long *) task->thread.ksp : __r15;
- else
- stack = sp;
-
+ stack = sp;
+ if (!stack) {
+ if (!task)
+ stack = (unsigned long *)current_stack_pointer();
+ else
+ stack = (unsigned long *)task->thread.ksp;
+ }
for (i = 0; i < 20; i++) {
if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
break;
@@ -112,7 +150,7 @@ void show_stack(struct task_struct *task, unsigned long *sp)
printk("%016lx ", *stack++);
}
printk("\n");
- show_trace(task, sp);
+ show_trace(task, (unsigned long)sp);
}
static void show_last_breaking_event(struct pt_regs *regs)
@@ -121,13 +159,9 @@ static void show_last_breaking_event(struct pt_regs *regs)
printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
}
-static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
-{
- return (regs->psw.mask & bits) / ((~bits + 1) & bits);
-}
-
void show_registers(struct pt_regs *regs)
{
+ struct psw_bits *psw = &psw_bits(regs->psw);
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
@@ -136,13 +170,9 @@ void show_registers(struct pt_regs *regs)
printk(" (%pSR)", (void *)regs->psw.addr);
printk("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
- "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
- mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
- mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
- mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
- mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
- mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
- printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+ "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
+ psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
+ printk(" RI:%x EA:%x", psw->ri, psw->eaba);
printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
printk(" %016lx %016lx %016lx %016lx\n",
@@ -160,7 +190,7 @@ void show_regs(struct pt_regs *regs)
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_trace(NULL, regs->gprs[15]);
show_last_breaking_event(regs);
}
@@ -184,9 +214,8 @@ void die(struct pt_regs *regs, const char *str)
#ifdef CONFIG_SMP
printk("SMP ");
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC");
printk("\n");
notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
print_modules();
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c55576bbaa1f..a0684de5a93b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -448,7 +448,6 @@ void __init startup_init(void)
rescue_initrd();
clear_bss_section();
init_kernel_storage_key();
- lockdep_init();
lockdep_off();
setup_lowcore_early();
setup_facility_list();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index cd5a191381b9..2d47f9cfcb36 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -186,6 +186,7 @@ ENTRY(__switch_to)
stg %r5,__LC_THREAD_INFO # store thread info of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
+ /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
@@ -1199,114 +1200,12 @@ cleanup_critical:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bor %r14
- clg %r9,BASED(.Lcleanup_save_fpu_regs_done)
- jhe 5f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_fp)
- jhe 4f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high)
- jhe 3f
- clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low)
- jhe 2f
- clg %r9,BASED(.Lcleanup_save_fpu_fpc_end)
- jhe 1f
- lg %r2,__LC_CURRENT
- aghi %r2,__TASK_thread
-0: # Store floating-point controls
- stfpc __THREAD_FPU_fpc(%r2)
-1: # Load register save area and check if VX is active
- lg %r3,__THREAD_FPU_regs(%r2)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- jz 4f # no VX -> store FP regs
-2: # Store vector registers (V0-V15)
- VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
-3: # Store vector registers (V16-V31)
- VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
- j 5f # -> done, set CIF_FPU flag
-4: # Store floating-point registers
- std 0,0(%r3)
- std 1,8(%r3)
- std 2,16(%r3)
- std 3,24(%r3)
- std 4,32(%r3)
- std 5,40(%r3)
- std 6,48(%r3)
- std 7,56(%r3)
- std 8,64(%r3)
- std 9,72(%r3)
- std 10,80(%r3)
- std 11,88(%r3)
- std 12,96(%r3)
- std 13,104(%r3)
- std 14,112(%r3)
- std 15,120(%r3)
-5: # Set CIF_FPU flag
- oi __LC_CPU_FLAGS+7,_CIF_FPU
- lg %r9,48(%r11) # return from save_fpu_regs
+ larl %r9,save_fpu_regs
br %r14
-.Lcleanup_save_fpu_fpc_end:
- .quad .Lsave_fpu_regs_fpc_end
-.Lcleanup_save_fpu_regs_vx_low:
- .quad .Lsave_fpu_regs_vx_low
-.Lcleanup_save_fpu_regs_vx_high:
- .quad .Lsave_fpu_regs_vx_high
-.Lcleanup_save_fpu_regs_fp:
- .quad .Lsave_fpu_regs_fp
-.Lcleanup_save_fpu_regs_done:
- .quad .Lsave_fpu_regs_done
.Lcleanup_load_fpu_regs:
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- bnor %r14
- clg %r9,BASED(.Lcleanup_load_fpu_regs_done)
- jhe 1f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_fp)
- jhe 2f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
- jhe 3f
- clg %r9,BASED(.Lcleanup_load_fpu_regs_vx)
- jhe 4f
- lg %r4,__LC_CURRENT
- aghi %r4,__TASK_thread
- lfpc __THREAD_FPU_fpc(%r4)
- TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
- lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
- jz 2f # -> no VX, load FP regs
-4: # Load V0 ..V15 registers
- VLM %v0,%v15,0,%r4
-3: # Load V16..V31 registers
- VLM %v16,%v31,256,%r4
- j 1f
-2: # Load floating-point registers
- ld 0,0(%r4)
- ld 1,8(%r4)
- ld 2,16(%r4)
- ld 3,24(%r4)
- ld 4,32(%r4)
- ld 5,40(%r4)
- ld 6,48(%r4)
- ld 7,56(%r4)
- ld 8,64(%r4)
- ld 9,72(%r4)
- ld 10,80(%r4)
- ld 11,88(%r4)
- ld 12,96(%r4)
- ld 13,104(%r4)
- ld 14,112(%r4)
- ld 15,120(%r4)
-1: # Clear CIF_FPU bit
- ni __LC_CPU_FLAGS+7,255-_CIF_FPU
- lg %r9,48(%r11) # return from load_fpu_regs
+ larl %r9,load_fpu_regs
br %r14
-.Lcleanup_load_fpu_regs_vx:
- .quad .Lload_fpu_regs_vx
-.Lcleanup_load_fpu_regs_vx_high:
- .quad .Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp:
- .quad .Lload_fpu_regs_fp
-.Lcleanup_load_fpu_regs_done:
- .quad .Lload_fpu_regs_done
/*
* Integer constants
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index b7019ab74070..bedd2f55d860 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -1,6 +1,7 @@
#ifndef _ENTRY_H
#define _ENTRY_H
+#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/signal.h>
#include <asm/ptrace.h>
@@ -75,4 +76,7 @@ long sys_s390_personality(unsigned int personality);
long sys_s390_runtime_instr(int command, int signum);
long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f20abdb5630a..d14069d4b88d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2064,12 +2064,5 @@ void s390_reset_system(void)
S390_lowcore.program_new_psw.addr =
(unsigned long) s390_base_pgm_handler;
- /*
- * Clear subchannel ID and number to signal new kernel that no CCW or
- * SCSI IPL has been done (for kexec and kdump)
- */
- S390_lowcore.subchannel_id = 0;
- S390_lowcore.subchannel_nr = 0;
-
do_reset_calls();
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index f41d5208aaf7..c373a1d41d10 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -164,8 +164,7 @@ void do_softirq_own_stack(void)
{
unsigned long old, new;
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (old));
+ old = current_stack_pointer();
/* Check against async. stack address range. */
new = S390_lowcore.async_stack;
if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 2f1b7217c25c..0e64f08d3d69 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -43,13 +43,13 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
switch (action) {
case PM_SUSPEND_PREPARE:
case PM_HIBERNATION_PREPARE:
- if (crashk_res.start)
- crash_map_reserved_pages();
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
break;
case PM_POST_SUSPEND:
case PM_POST_HIBERNATION:
- if (crashk_res.start)
- crash_unmap_reserved_pages();
+ if (kexec_crash_image)
+ arch_kexec_protect_crashkres();
break;
default:
return NOTIFY_DONE;
@@ -60,6 +60,8 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
static int __init machine_kdump_pm_init(void)
{
pm_notifier(machine_kdump_pm_cb, 0);
+ /* Create initial mapping for crashkernel memory */
+ arch_kexec_unprotect_crashkres();
return 0;
}
arch_initcall(machine_kdump_pm_init);
@@ -146,6 +148,8 @@ static int kdump_csum_valid(struct kimage *image)
#endif
}
+#ifdef CONFIG_CRASH_DUMP
+
/*
* Map or unmap crashkernel memory
*/
@@ -167,21 +171,25 @@ static void crash_map_pages(int enable)
}
/*
- * Map crashkernel memory
+ * Unmap crashkernel memory
*/
-void crash_map_reserved_pages(void)
+void arch_kexec_protect_crashkres(void)
{
- crash_map_pages(1);
+ if (crashk_res.end)
+ crash_map_pages(0);
}
/*
- * Unmap crashkernel memory
+ * Map crashkernel memory
*/
-void crash_unmap_reserved_pages(void)
+void arch_kexec_unprotect_crashkres(void)
{
- crash_map_pages(0);
+ if (crashk_res.end)
+ crash_map_pages(1);
}
+#endif
+
/*
* Give back memory to hypervisor before new kdump is loaded
*/
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 7873e171457c..fbc07891f9e7 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -51,6 +51,10 @@ void *module_alloc(unsigned long size)
void module_arch_freeing_init(struct module *mod)
{
+ if (is_livepatch_module(mod) &&
+ mod->state == MODULE_STATE_LIVE)
+ return;
+
vfree(mod->arch.syminfo);
mod->arch.syminfo = NULL;
}
@@ -425,7 +429,5 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
jump_label_apply_nops(me);
- vfree(me->arch.syminfo);
- me->arch.syminfo = NULL;
return 0;
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 929c147e07b4..7ec63b1d920d 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event)
/* Validate the counter that is assigned to this event.
* Because the counter facility can use numerous counters at the
- * same time without constraints, it is not necessary to explicity
+ * same time without constraints, it is not necessary to explicitly
* validate event groups (event->group_leader != event).
*/
err = validate_event(hwc);
@@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
/* Performance monitoring unit for s390x */
static struct pmu cpumf_pmu = {
+ .task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
.pmu_enable = cpumf_pmu_enable,
.pmu_disable = cpumf_pmu_disable,
.event_init = cpumf_pmu_event_init,
@@ -665,17 +667,21 @@ static struct pmu cpumf_pmu = {
static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
void *hcpu)
{
- unsigned int cpu = (long) hcpu;
int flags;
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
flags = PMC_INIT;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
break;
case CPU_DOWN_PREPARE:
flags = PMC_RELEASE;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
break;
default:
break;
@@ -704,12 +710,6 @@ static int __init cpumf_pmu_init(void)
goto out;
}
- /* The CPU measurement counter facility does not have overflow
- * interrupts to do sampling. Sampling must be provided by
- * external means, for example, by timers.
- */
- cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3d8da1e742c2..a8e832166417 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
/*
* A non-zero guest program parameter indicates a guest
* sample.
- * Note that some early samples might be misaccounted to
- * the host.
+ * Note that some early samples or samples from guests without
+ * lpp usage would be misaccounted to the host. We use the asn
+ * value as a heuristic to detect most of these guest samples.
+ * If the value differs from the host hpp value, we assume
+ * it to be a KVM guest.
*/
- if (sfr->basic.gpp)
+ if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp)
sde_regs->in_guest = 1;
overflow = 0;
@@ -1507,7 +1510,6 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
static int cpumf_pmu_notifier(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- unsigned int cpu = (long) hcpu;
int flags;
/* Ignore the notification if no events are scheduled on the PMU.
@@ -1518,13 +1520,17 @@ static int cpumf_pmu_notifier(struct notifier_block *self,
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
+ case CPU_DOWN_FAILED:
flags = PMC_INIT;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
break;
case CPU_DOWN_PREPARE:
flags = PMC_RELEASE;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
break;
default:
break;
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 0943b11a2f6e..87035fa58bbe 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -222,67 +222,23 @@ static int __init service_level_perf_register(void)
}
arch_initcall(service_level_perf_register);
-/* See also arch/s390/kernel/traps.c */
-static unsigned long __store_trace(struct perf_callchain_entry *entry,
- unsigned long sp,
- unsigned long low, unsigned long high)
+static int __perf_callchain_kernel(void *data, unsigned long address)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry, sf->gprs[8]);
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- perf_callchain_store(entry, sf->gprs[8]);
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- perf_callchain_store(entry, sf->gprs[8]);
- low = sp;
- sp = regs->gprs[15];
- }
+ struct perf_callchain_entry_ctx *entry = data;
+
+ perf_callchain_store(entry, address);
+ return 0;
}
-void perf_callchain_kernel(struct perf_callchain_entry *entry,
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- unsigned long head, frame_size;
- struct stack_frame *head_sf;
-
if (user_mode(regs))
return;
-
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- head = regs->gprs[15];
- head_sf = (struct stack_frame *) head;
-
- if (!head_sf || !head_sf->back_chain)
- return;
-
- head = head_sf->back_chain;
- head = __store_trace(entry, head,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size);
-
- __store_trace(entry, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
+ dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
}
-/* Perf defintions for PMU event attributes in sysfs */
+/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
{
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 2bba7df4ac51..bba4fa74b321 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -7,6 +7,7 @@
* Denis Joseph Barrow,
*/
+#include <linux/elf-randomize.h>
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/sched.h>
@@ -37,9 +38,6 @@
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
-/* FPU save area for the init task */
-__vector128 init_task_fpu_regs[__NUM_VXRS] __init_task_data;
-
/*
* Return saved PC of a blocked thread. used in kernel/sched.
* resume in entry.S does not create a new stack frame, it
@@ -70,9 +68,10 @@ extern void kernel_thread_starter(void);
/*
* Free current thread data structures etc..
*/
-void exit_thread(void)
+void exit_thread(struct task_struct *tsk)
{
- exit_thread_runtime_instr();
+ if (tsk == current)
+ exit_thread_runtime_instr();
}
void flush_thread(void)
@@ -85,35 +84,19 @@ void release_thread(struct task_struct *dead_task)
void arch_release_task_struct(struct task_struct *tsk)
{
- /* Free either the floating-point or the vector register save area */
- kfree(tsk->thread.fpu.regs);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- size_t fpu_regs_size;
-
- *dst = *src;
-
- /*
- * If the vector extension is available, it is enabled for all tasks,
- * and, thus, the FPU register save area must be allocated accordingly.
- */
- fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
- : sizeof(freg_t) * __NUM_FPRS;
- dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
- if (!dst->thread.fpu.regs)
- return -ENOMEM;
-
/*
* Save the floating-point or vector register state of the current
* task and set the CIF_FPU flag to lazy restore the FPU register
* state when returning to user space.
*/
save_fpu_regs();
- dst->thread.fpu.fpc = current->thread.fpu.fpc;
- memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
+ memcpy(dst, src, arch_task_struct_size);
+ dst->thread.fpu.regs = dst->thread.fpu.fprs;
return 0;
}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 647128d5b983..de7451065c34 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -6,6 +6,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/cpufeature.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -84,7 +85,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_puts(m, "\n");
show_cacheinfo(m);
}
- get_online_cpus();
if (cpu_online(n)) {
struct cpuid *id = &per_cpu(cpu_id, n);
seq_printf(m, "processor %li: "
@@ -93,23 +93,31 @@ static int show_cpuinfo(struct seq_file *m, void *v)
"machine = %04X\n",
n, id->version, id->ident, id->machine);
}
- put_online_cpus();
return 0;
}
+static inline void *c_update(loff_t *pos)
+{
+ if (*pos)
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
+ return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
+}
+
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+ get_online_cpus();
+ return c_update(pos);
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
- return c_start(m, pos);
+ return c_update(pos);
}
static void c_stop(struct seq_file *m, void *v)
{
+ put_online_cpus();
}
const struct seq_operations cpuinfo_op = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 9220db5c996a..f31939147ccd 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -327,6 +327,7 @@ static void __init setup_lowcore(void)
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
lc->thread_info = (unsigned long) &init_thread_union;
+ lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
@@ -374,17 +375,17 @@ static void __init setup_lowcore(void)
static struct resource code_resource = {
.name = "Kernel code",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource data_resource = {
.name = "Kernel data",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource bss_resource = {
.name = "Kernel bss",
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
};
static struct resource __initdata *standard_resources[] = {
@@ -408,7 +409,7 @@ static void __init setup_resources(void)
for_each_memblock(memory, reg) {
res = alloc_bootmem_low(sizeof(*res));
- res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+ res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
res->start = reg->base;
@@ -779,6 +780,7 @@ static int __init setup_hwcaps(void)
strcpy(elf_platform, "zEC12");
break;
case 0x2964:
+ case 0x2965:
strcpy(elf_platform, "z13");
break;
}
@@ -807,6 +809,22 @@ static void __init setup_randomness(void)
}
/*
+ * Find the correct size for the task_struct. This depends on
+ * the size of the struct fpu at the end of the thread_struct
+ * which is embedded in the task_struct.
+ */
+static void __init setup_task_size(void)
+{
+ int task_size = sizeof(struct task_struct);
+
+ if (!MACHINE_HAS_VX) {
+ task_size -= sizeof(__vector128) * __NUM_VXRS;
+ task_size += sizeof(freg_t) * __NUM_FPRS;
+ }
+ arch_task_struct_size = task_size;
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -844,6 +862,7 @@ void __init setup_arch(char **cmdline_p)
os_info_init();
setup_ipl();
+ setup_task_size();
/* Do some memory reservations *before* memory is added to memblock */
reserve_memory_end();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3c65a8eae34d..7b89a7572100 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -798,7 +798,7 @@ static void smp_start_secondary(void *cpuvoid)
set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST);
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
/* Upping and downing of CPUs */
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
pcpu_attach_task(pcpu, tidle);
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
- while (!cpu_online(cpu) || !cpu_active(cpu))
+ while (!cpu_online(cpu))
cpu_relax();
return 0;
}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 8f64ebd63767..44f84b23d4e5 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -10,78 +10,39 @@
#include <linux/kallsyms.h>
#include <linux/module.h>
-static unsigned long save_context_stack(struct stack_trace *trace,
- unsigned long sp,
- unsigned long low,
- unsigned long high,
- int savesched)
+static int __save_address(void *data, unsigned long address, int nosched)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
- unsigned long addr;
+ struct stack_trace *trace = data;
- while(1) {
- if (sp < low || sp > high)
- return sp;
- sf = (struct stack_frame *)sp;
- while(1) {
- addr = sf->gprs[8];
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *)sp;
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long)(sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *)sp;
- addr = regs->psw.addr;
- if (savesched || !in_sched_functions(addr)) {
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = addr;
- else
- trace->skip--;
- }
- if (trace->nr_entries >= trace->max_entries)
- return sp;
- low = sp;
- sp = regs->gprs[15];
+ if (nosched && in_sched_functions(address))
+ return 0;
+ if (trace->skip > 0) {
+ trace->skip--;
+ return 0;
}
+ if (trace->nr_entries < trace->max_entries) {
+ trace->entries[trace->nr_entries++] = address;
+ return 0;
+ }
+ return 1;
}
-static void __save_stack_trace(struct stack_trace *trace, unsigned long sp)
+static int save_address(void *data, unsigned long address)
{
- unsigned long new_sp, frame_size;
+ return __save_address(data, address, 0);
+}
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- new_sp = save_context_stack(trace, sp,
- S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
- S390_lowcore.panic_stack + frame_size, 1);
- new_sp = save_context_stack(trace, new_sp,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size, 1);
- save_context_stack(trace, new_sp,
- S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE, 1);
+static int save_address_nosched(void *data, unsigned long address)
+{
+ return __save_address(data, address, 1);
}
void save_stack_trace(struct stack_trace *trace)
{
- register unsigned long r15 asm ("15");
unsigned long sp;
- sp = r15;
- __save_stack_trace(trace, sp);
+ sp = current_stack_pointer();
+ dump_trace(save_address, trace, NULL, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
@@ -89,16 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- unsigned long sp, low, high;
+ unsigned long sp;
sp = tsk->thread.ksp;
- if (tsk == current) {
- /* Get current stack pointer. */
- asm volatile("la %0,0(15)" : "=a" (sp));
- }
- low = (unsigned long) task_stack_page(tsk);
- high = (unsigned long) task_pt_regs(tsk);
- save_context_stack(trace, sp, low, high, 0);
+ if (tsk == current)
+ sp = current_stack_pointer();
+ dump_trace(save_address_nosched, trace, tsk, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
@@ -109,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
unsigned long sp;
sp = kernel_stack_pointer(regs);
- __save_stack_trace(trace, sp);
+ dump_trace(save_address, trace, NULL, sp);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 293d8b98fd52..9b59e6212d8f 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -384,3 +384,5 @@ SYSCALL(sys_recvmsg,compat_sys_recvmsg)
SYSCALL(sys_shutdown,sys_shutdown)
SYSCALL(sys_mlock2,compat_sys_mlock2)
SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
+SYSCALL(sys_preadv2,compat_sys_preadv2)
+SYSCALL(sys_pwritev2,compat_sys_pwritev2)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 99f84ac31307..9409d32f285e 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -499,8 +499,7 @@ static void etr_reset(void)
if (etr_port0_online && etr_port1_online)
set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
} else if (etr_port0_online || etr_port1_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an ETR interface\n");
+ pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
etr_port0_online = etr_port1_online = 0;
}
}
@@ -1433,7 +1432,7 @@ device_initcall(etr_init_sysfs);
/*
* Server Time Protocol (STP) code.
*/
-static int stp_online;
+static bool stp_online;
static struct stp_sstpi stp_info;
static void *stp_page;
@@ -1444,11 +1443,7 @@ static struct timer_list stp_timer;
static int __init early_parse_stp(char *p)
{
- if (strncmp(p, "off", 3) == 0)
- stp_online = 0;
- else if (strncmp(p, "on", 2) == 0)
- stp_online = 1;
- return 0;
+ return kstrtobool(p, &stp_online);
}
early_param("stp", early_parse_stp);
@@ -1464,8 +1459,7 @@ static void __init stp_reset(void)
if (rc == 0)
set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
else if (stp_online) {
- pr_warning("The real or virtual hardware system does "
- "not provide an STP interface\n");
+ pr_warn("The real or virtual hardware system does not provide an STP interface\n");
free_page((unsigned long) stp_page);
stp_page = NULL;
stp_online = 0;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 40b8102fdadb..64298a867589 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -37,7 +37,7 @@ static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
-static int topology_enabled = 1;
+static bool topology_enabled = true;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
@@ -444,10 +444,7 @@ static const struct cpumask *cpu_book_mask(int cpu)
static int __init early_parse_topology(char *p)
{
- if (strncmp(p, "off", 3))
- return 0;
- topology_enabled = 0;
- return 0;
+ return kstrtobool(p, &topology_enabled);
}
early_param("topology", early_parse_topology);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 017eb03daee2..dd97a3e8a34a 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -22,8 +22,6 @@
#include <asm/fpu/api.h>
#include "entry.h"
-int show_unhandled_signals = 1;
-
static inline void __user *get_trap_ip(struct pt_regs *regs)
{
unsigned long address;
@@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs)
return (void __user *) (address - (regs->int_code >> 16));
}
-static inline void report_user_fault(struct pt_regs *regs, int signr)
-{
- if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
- return;
- if (!unhandled_signal(current, signr))
- return;
- if (!printk_ratelimit())
- return;
- printk("User process fault: interruption code %04x ilc:%d ",
- regs->int_code & 0xffff, regs->int_code >> 17);
- print_vma_addr("in ", regs->psw.addr);
- printk("\n");
- show_regs(regs);
-}
-
int is_valid_bugaddr(unsigned long addr)
{
return 1;
@@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
info.si_code = si_code;
info.si_addr = get_trap_ip(regs);
force_sig_info(si_signo, &info, current);
- report_user_fault(regs, si_signo);
+ report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
fixup = search_exception_tables(regs->psw.addr);
@@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap);
void default_trap_handler(struct pt_regs *regs)
{
if (user_mode(regs)) {
- report_user_fault(regs, SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 0);
do_exit(SIGSEGV);
} else
die(regs, "Unknown program exception");
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 94495cac8be3..5904abf6b1ae 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -216,7 +216,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* it at vdso_base which is the "natural" base for it, but we might
* fail and end up putting it elsewhere.
*/
- down_write(&mm->mmap_sem);
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
rc = vdso_base;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 445657fe658c..0f41a8286378 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -28,6 +28,7 @@ SECTIONS
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
} :text = 0x0700
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index dafc44f519c3..856e30d8463f 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -18,6 +18,8 @@
#include <asm/cpu_mf.h>
#include <asm/smp.h>
+#include "entry.h"
+
static void virt_timer_expire(void);
static LIST_HEAD(virt_timer_list);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5ea5af3c7db7..b1900239b0ab 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_INVALID_WAKEUPS
select SRCU
select KVM_VFIO
---help---
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 05f7de9869a9..1ea4095b67d7 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -14,6 +14,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/pgalloc.h>
+#include <asm/gmap.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d30db40437dc..66938d283b77 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
- int write)
+ enum gacc_mode mode)
{
union alet alet;
struct ale ale;
@@ -454,7 +454,7 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
}
}
- if (ale.fo == 1 && write)
+ if (ale.fo == 1 && mode == GACC_STORE)
return PGM_PROTECTION;
asce->val = aste.asce;
@@ -477,25 +477,28 @@ enum {
};
static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
- ar_t ar, int write)
+ ar_t ar, enum gacc_mode mode)
{
int rc;
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
struct trans_exc_code_bits *tec_bits;
memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw_bits(*psw).as;
+ tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw.as;
- if (!psw_bits(*psw).t) {
+ if (!psw.t) {
asce->val = 0;
asce->r = 1;
return 0;
}
- switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+ if (mode == GACC_IFETCH)
+ psw.as = psw.as == PSW_AS_HOME ? PSW_AS_HOME : PSW_AS_PRIMARY;
+
+ switch (psw.as) {
case PSW_AS_PRIMARY:
asce->val = vcpu->arch.sie_block->gcr[1];
return 0;
@@ -506,7 +509,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
asce->val = vcpu->arch.sie_block->gcr[13];
return 0;
case PSW_AS_ACCREG:
- rc = ar_translation(vcpu, asce, ar, write);
+ rc = ar_translation(vcpu, asce, ar, mode);
switch (rc) {
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -538,7 +541,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @gva: guest virtual address
* @gpa: points to where guest physical (absolute) address should be stored
* @asce: effective asce
- * @write: indicates if access is a write access
+ * @mode: indicates the access mode to be used
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecture.
@@ -554,7 +557,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, const union asce asce,
- int write)
+ enum gacc_mode mode)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
@@ -699,7 +702,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
- if (write && dat_protection)
+ if (mode == GACC_STORE && dat_protection)
return PGM_PROTECTION;
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
@@ -728,7 +731,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
unsigned long *pages, unsigned long nr_pages,
- const union asce asce, int write)
+ const union asce asce, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -740,13 +743,13 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
tec_bits->addr = ga >> PAGE_SHIFT;
- if (write && lap_enabled && is_low_address(ga)) {
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
pgm->code = PGM_PROTECTION;
return pgm->code;
}
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
- rc = guest_translate(vcpu, ga, pages, asce, write);
+ rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
if (rc == PGM_PROTECTION)
@@ -768,7 +771,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
}
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
- unsigned long len, int write)
+ unsigned long len, enum gacc_mode mode)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned long _len, nr_pages, gpa, idx;
@@ -780,7 +783,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
- rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ rc = get_vcpu_asce(vcpu, &asce, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -792,11 +795,11 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (write)
+ if (mode == GACC_STORE)
rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
else
rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
@@ -812,7 +815,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
- void *data, unsigned long len, int write)
+ void *data, unsigned long len, enum gacc_mode mode)
{
unsigned long _len, gpa;
int rc = 0;
@@ -820,7 +823,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
- if (write)
+ if (mode)
rc = write_guest_abs(vcpu, gpa, data, _len);
else
rc = read_guest_abs(vcpu, gpa, data, _len);
@@ -841,7 +844,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* has to take care of this.
*/
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long *gpa, int write)
+ unsigned long *gpa, enum gacc_mode mode)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -851,19 +854,19 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
gva = kvm_s390_logical_to_effective(vcpu, gva);
tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ rc = get_vcpu_asce(vcpu, &asce, ar, mode);
tec->addr = gva >> PAGE_SHIFT;
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (write) {
+ if (mode == GACC_STORE) {
rc = pgm->code = PGM_PROTECTION;
return rc;
}
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, asce, write);
+ rc = guest_translate(vcpu, gva, gpa, asce, mode);
if (rc > 0) {
if (rc == PGM_PROTECTION)
tec->b61 = 1;
@@ -883,7 +886,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
* check_gva_range - test a range of guest virtual addresses for accessibility
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long length, int is_write)
+ unsigned long length, enum gacc_mode mode)
{
unsigned long gpa;
unsigned long currlen;
@@ -892,7 +895,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
ipte_lock(vcpu);
while (length > 0 && !rc) {
currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
- rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+ rc = guest_translate_address(vcpu, gva, ar, &gpa, mode);
gva += currlen;
length -= currlen;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index ef03726cc661..df0a79dd8159 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -155,16 +155,22 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
+enum gacc_mode {
+ GACC_FETCH,
+ GACC_STORE,
+ GACC_IFETCH,
+};
+
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
- ar_t ar, unsigned long *gpa, int write);
+ ar_t ar, unsigned long *gpa, enum gacc_mode mode);
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
- unsigned long length, int is_write);
+ unsigned long length, enum gacc_mode mode);
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
- unsigned long len, int write);
+ unsigned long len, enum gacc_mode mode);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
- void *data, unsigned long len, int write);
+ void *data, unsigned long len, enum gacc_mode mode);
/**
* write_guest - copy data from kernel space to guest space
@@ -215,7 +221,7 @@ static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, 1);
+ return access_guest(vcpu, ga, ar, data, len, GACC_STORE);
}
/**
@@ -235,7 +241,27 @@ static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, ar, data, len, 0);
+ return access_guest(vcpu, ga, ar, data, len, GACC_FETCH);
+}
+
+/**
+ * read_guest_instr - copy instruction data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from the current psw address (guest space) to @data (kernel
+ * space).
+ *
+ * The behaviour of read_guest_instr is identical to read_guest, except that
+ * instruction data will be read from primary space when in home-space or
+ * address-space mode.
+ */
+static inline __must_check
+int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len)
+{
+ return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len,
+ GACC_IFETCH);
}
/**
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index d697312ce9ee..e8c6843b9600 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -17,7 +17,7 @@
/*
* Extends the address range given by *start and *stop to include the address
* range starting with estart and the length len. Takes care of overflowing
- * intervals and tries to minimize the overall intervall size.
+ * intervals and tries to minimize the overall interval size.
*/
static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
{
@@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
return;
/*
- * If the guest is not interrested in branching events, we can savely
+ * If the guest is not interested in branching events, we can safely
* limit them to the PER address range.
*/
if (!(*cr9 & PER_EVENT_BRANCH))
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index d53c10753c46..252157181302 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -38,17 +38,32 @@ static const intercept_handler_t instruction_handlers[256] = {
[0xeb] = kvm_s390_handle_eb,
};
-void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
{
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+ u8 ilen = 0;
- /* Use the length of the EXECUTE instruction if necessary */
- if (sie_block->icptstatus & 1) {
- ilc = (sie_block->icptstatus >> 4) & 0x6;
- if (!ilc)
- ilc = 4;
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* instruction only stored for these icptcodes */
+ ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
+ /* Use the length of the EXECUTE instruction if necessary */
+ if (sie_block->icptstatus & 1) {
+ ilen = (sie_block->icptstatus >> 4) & 0x6;
+ if (!ilen)
+ ilen = 4;
+ }
+ break;
+ case ICPT_PROGI:
+ /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
+ ilen = vcpu->arch.sie_block->pgmilc & 0x6;
+ break;
}
- sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+ return ilen;
}
static int handle_noop(struct kvm_vcpu *vcpu)
@@ -121,11 +136,13 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
-static void __extract_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
+static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
{
- memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
- pgm_info->code = vcpu->arch.sie_block->iprcc;
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = vcpu->arch.sie_block->iprcc,
+ /* the PSW has already been rewound */
+ .flags = KVM_S390_PGM_FLAGS_NO_REWIND,
+ };
switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
@@ -138,7 +155,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -146,7 +163,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
@@ -154,32 +171,33 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
- pgm_info->op_access_id = vcpu->arch.sie_block->oai;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.op_access_id = vcpu->arch.sie_block->oai;
break;
case PGM_MONITOR:
- pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
- pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
+ pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_VECTOR_PROCESSING:
case PGM_DATA:
- pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+ pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
break;
case PGM_PROTECTION:
- pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
- pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+ pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
+ pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
break;
default:
break;
}
if (vcpu->arch.sie_block->iprcc & PGM_PER) {
- pgm_info->per_code = vcpu->arch.sie_block->perc;
- pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
- pgm_info->per_address = vcpu->arch.sie_block->peraddr;
- pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+ pgm_info.per_code = vcpu->arch.sie_block->perc;
+ pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
+ pgm_info.per_address = vcpu->arch.sie_block->peraddr;
+ pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
}
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
/*
@@ -208,7 +226,6 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
static int handle_prog(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_pgm_info pgm_info;
psw_t psw;
int rc;
@@ -234,8 +251,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
if (rc)
return rc;
- __extract_prog_irq(vcpu, &pgm_info);
- return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+ return inject_prog_on_prog_intercept(vcpu);
}
/**
@@ -302,7 +318,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the source is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
- reg2, &srcaddr, 0);
+ reg2, &srcaddr, GACC_FETCH);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -311,20 +327,22 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the destination is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
- reg1, &dstaddr, 1);
+ reg1, &dstaddr, GACC_STORE);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
if (rc != 0)
return rc;
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
return 0;
}
static int handle_partial_execution(struct kvm_vcpu *vcpu)
{
+ vcpu->stat.exit_pei++;
+
if (vcpu->arch.sie_block->ipa == 0xb254) /* MVPG */
return handle_mvpg_pei(vcpu);
if (vcpu->arch.sie_block->ipa >> 8 == 0xae) /* SIGP */
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f88ca72c3a77..5a80af740d3e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -23,6 +23,7 @@
#include <asm/uaccess.h>
#include <asm/sclp.h>
#include <asm/isc.h>
+#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace-s390.h"
@@ -182,8 +183,9 @@ static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
{
- return (vcpu->arch.sie_block->cputm >> 63) &&
- cpu_timer_interrupts_enabled(vcpu);
+ if (!cpu_timer_interrupts_enabled(vcpu))
+ return 0;
+ return kvm_s390_get_cpu_timer(vcpu) >> 63;
}
static inline int is_ioirq(unsigned long irq_type)
@@ -335,23 +337,6 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu)
set_intercept_indicators_stop(vcpu);
}
-static u16 get_ilc(struct kvm_vcpu *vcpu)
-{
- switch (vcpu->arch.sie_block->icptcode) {
- case ICPT_INST:
- case ICPT_INSTPROGI:
- case ICPT_OPEREXC:
- case ICPT_PARTEXEC:
- case ICPT_IOINST:
- /* last instruction only stored for these icptcodes */
- return insn_length(vcpu->arch.sie_block->ipa >> 8);
- case ICPT_PROGI:
- return vcpu->arch.sie_block->pgmilc;
- default:
- return 0;
- }
-}
-
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -588,7 +573,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_pgm_info pgm_info;
int rc = 0, nullifying = false;
- u16 ilc = get_ilc(vcpu);
+ u16 ilen;
spin_lock(&li->lock);
pgm_info = li->irq.pgm;
@@ -596,8 +581,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
memset(&li->irq.pgm, 0, sizeof(pgm_info));
spin_unlock(&li->lock);
- VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
- pgm_info.code, ilc);
+ ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
+ VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
+ pgm_info.code, ilen);
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
@@ -681,10 +667,11 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
(u8 *) __LC_PER_ACCESS_ID);
}
- if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
- kvm_s390_rewind_psw(vcpu, ilc);
+ if (nullifying && !(pgm_info.flags & KVM_S390_PGM_FLAGS_NO_REWIND))
+ kvm_s390_rewind_psw(vcpu, ilen);
- rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+ /* bit 1+2 of the target are the ilc, so we can directly use ilen */
+ rc |= put_guest_lc(vcpu, ilen, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
(u64 *) __LC_LAST_BREAK);
rc |= put_guest_lc(vcpu, pgm_info.code,
@@ -923,9 +910,35 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
}
+static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
+{
+ u64 now, cputm, sltime = 0;
+
+ if (ckc_interrupts_enabled(vcpu)) {
+ now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
+ sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+ /* already expired or overflow? */
+ if (!sltime || vcpu->arch.sie_block->ckc <= now)
+ return 0;
+ if (cpu_timer_interrupts_enabled(vcpu)) {
+ cputm = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (cputm >> 63)
+ return 0;
+ return min(sltime, tod_to_ns(cputm));
+ }
+ } else if (cpu_timer_interrupts_enabled(vcpu)) {
+ sltime = kvm_s390_get_cpu_timer(vcpu);
+ /* already expired? */
+ if (sltime >> 63)
+ return 0;
+ }
+ return sltime;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
- u64 now, sltime;
+ u64 sltime;
vcpu->stat.exit_wait_state++;
@@ -938,22 +951,20 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; /* disabled wait */
}
- if (!ckc_interrupts_enabled(vcpu)) {
+ if (!ckc_interrupts_enabled(vcpu) &&
+ !cpu_timer_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
__set_cpu_idle(vcpu);
goto no_timer;
}
- now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
- sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-
- /* underflow */
- if (vcpu->arch.sie_block->ckc < now)
+ sltime = __calculate_sltime(vcpu);
+ if (!sltime)
return 0;
__set_cpu_idle(vcpu);
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
- VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
+ VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
@@ -966,13 +977,18 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- if (waitqueue_active(&vcpu->wq)) {
+ /*
+ * We cannot move this into the if, as the CPU might be already
+ * in kvm_vcpu_block without having the waitqueue set (polling)
+ */
+ vcpu->valid_wakeup = true;
+ if (swait_active(&vcpu->wq)) {
/*
* The vcpu gave up the cpu voluntarily, mark it as a good
* yield-candidate.
*/
vcpu->preempted = true;
- wake_up_interruptible(&vcpu->wq);
+ swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
}
@@ -980,18 +996,16 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- u64 now, sltime;
+ u64 sltime;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
- now = kvm_s390_get_tod_clock_fast(vcpu->kvm);
- sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+ sltime = __calculate_sltime(vcpu);
/*
* If the monotonic clock runs faster than the tod clock we might be
* woken up too early and have to go back to sleep to avoid deadlocks.
*/
- if (vcpu->arch.sie_block->ckc > now &&
- hrtimer_forward_now(timer, ns_to_ktime(sltime)))
+ if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime)))
return HRTIMER_RESTART;
kvm_s390_vcpu_wakeup(vcpu);
return HRTIMER_NORESTART;
@@ -1059,8 +1073,16 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
irq->u.pgm.code, 0);
+ if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) {
+ /* auto detection if no valid ILC was given */
+ irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
+ irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
+ irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
+ }
+
if (irq->u.pgm.code == PGM_PER) {
li->irq.pgm.code |= PGM_PER;
+ li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify PER related information */
li->irq.pgm.per_address = irq->u.pgm.per_address;
li->irq.pgm.per_code = irq->u.pgm.per_code;
@@ -1069,6 +1091,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
} else if (!(irq->u.pgm.code & PGM_PER)) {
li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) |
irq->u.pgm.code;
+ li->irq.pgm.flags = irq->u.pgm.flags;
/* only modify non-PER information */
li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code;
li->irq.pgm.mon_code = irq->u.pgm.mon_code;
@@ -2016,6 +2039,27 @@ static int modify_io_adapter(struct kvm_device *dev,
return ret;
}
+static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
+
+{
+ const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+ u32 schid;
+
+ if (attr->flags)
+ return -EINVAL;
+ if (attr->attr != sizeof(schid))
+ return -EINVAL;
+ if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
+ return -EFAULT;
+ kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
+ /*
+ * If userspace is conforming to the architecture, we can have at most
+ * one pending I/O interrupt per subchannel, so this is effectively a
+ * clear all.
+ */
+ return 0;
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -2049,6 +2093,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_ADAPTER_MODIFY:
r = modify_io_adapter(dev, attr);
break;
+ case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+ r = clear_io_irq(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2056,6 +2103,23 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
return r;
}
+static int flic_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_FLIC_GET_ALL_IRQS:
+ case KVM_DEV_FLIC_ENQUEUE:
+ case KVM_DEV_FLIC_CLEAR_IRQS:
+ case KVM_DEV_FLIC_APF_ENABLE:
+ case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+ case KVM_DEV_FLIC_ADAPTER_REGISTER:
+ case KVM_DEV_FLIC_ADAPTER_MODIFY:
+ case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+ return 0;
+ }
+ return -ENXIO;
+}
+
static int flic_create(struct kvm_device *dev, u32 type)
{
if (!dev)
@@ -2077,6 +2141,7 @@ struct kvm_device_ops kvm_flic_ops = {
.name = "kvm-flic",
.get_attr = flic_get_attr,
.set_attr = flic_set_attr,
+ .has_attr = flic_has_attr,
.create = flic_create,
.destroy = flic_destroy,
};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 03dfe9c667f4..43f2a2b80490 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -30,6 +30,7 @@
#include <asm/lowcore.h>
#include <asm/etr.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
#include <asm/isc.h>
@@ -60,10 +61,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_external_request", VCPU_STAT(exit_external_request) },
{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
{ "exit_instruction", VCPU_STAT(exit_instruction) },
+ { "exit_pei", VCPU_STAT(exit_pei) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
+ { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
@@ -117,9 +120,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
};
/* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[] = {
- 0xffe6fffbfcfdfc40UL,
- 0x005e800000000000UL,
+unsigned long kvm_s390_fac_list_mask[16] = {
+ 0xffe6000000000000UL,
+ 0x005e000000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -158,6 +161,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
kvm->arch.epoch -= *delta;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.sie_block->epoch -= *delta;
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start += *delta;
}
}
return NOTIFY_OK;
@@ -274,16 +279,17 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
unsigned long address;
struct gmap *gmap = kvm->arch.gmap;
- down_read(&gmap->mm->mmap_sem);
/* Loop over all guest pages */
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
- if (gmap_test_and_clear_dirty(address, gmap))
+ if (test_and_clear_guest_dirty(gmap->mm, address))
mark_page_dirty(kvm, cur_gfn);
+ if (fatal_signal_pending(current))
+ return;
+ cond_resched();
}
- up_read(&gmap->mm->mmap_sem);
}
/* Section: vm related */
@@ -352,8 +358,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (MACHINE_HAS_VX) {
- set_kvm_facility(kvm->arch.model.fac->mask, 129);
- set_kvm_facility(kvm->arch.model.fac->list, 129);
+ set_kvm_facility(kvm->arch.model.fac_mask, 129);
+ set_kvm_facility(kvm->arch.model.fac_list, 129);
r = 0;
} else
r = -EINVAL;
@@ -367,8 +373,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
if (atomic_read(&kvm->online_vcpus)) {
r = -EBUSY;
} else if (test_facility(64)) {
- set_kvm_facility(kvm->arch.model.fac->mask, 64);
- set_kvm_facility(kvm->arch.model.fac->list, 64);
+ set_kvm_facility(kvm->arch.model.fac_mask, 64);
+ set_kvm_facility(kvm->arch.model.fac_list, 64);
r = 0;
}
mutex_unlock(&kvm->lock);
@@ -634,6 +640,7 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_processor *proc;
+ u16 lowest_ibc, unblocked_ibc;
int ret = 0;
mutex_lock(&kvm->lock);
@@ -648,10 +655,18 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
}
if (!copy_from_user(proc, (void __user *)attr->addr,
sizeof(*proc))) {
- memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
- sizeof(struct cpuid));
- kvm->arch.model.ibc = proc->ibc;
- memcpy(kvm->arch.model.fac->list, proc->fac_list,
+ kvm->arch.model.cpuid = proc->cpuid;
+ lowest_ibc = sclp.ibc >> 16 & 0xfff;
+ unblocked_ibc = sclp.ibc & 0xfff;
+ if (lowest_ibc && proc->ibc) {
+ if (proc->ibc > unblocked_ibc)
+ kvm->arch.model.ibc = unblocked_ibc;
+ else if (proc->ibc < lowest_ibc)
+ kvm->arch.model.ibc = lowest_ibc;
+ else
+ kvm->arch.model.ibc = proc->ibc;
+ }
+ memcpy(kvm->arch.model.fac_list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
ret = -EFAULT;
@@ -683,9 +698,10 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
ret = -ENOMEM;
goto out;
}
- memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
+ proc->cpuid = kvm->arch.model.cpuid;
proc->ibc = kvm->arch.model.ibc;
- memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
+ memcpy(&proc->fac_list, kvm->arch.model.fac_list,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
ret = -EFAULT;
kfree(proc);
@@ -705,7 +721,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
}
get_cpu_id((struct cpuid *) &mach->cpuid);
mach->ibc = sclp.ibc;
- memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+ memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1076,22 +1092,21 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm)
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
-static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
+static u64 kvm_s390_get_initial_cpuid(void)
{
- get_cpu_id(cpu_id);
- cpu_id->version = 0xff;
+ struct cpuid cpuid;
+
+ get_cpu_id(&cpuid);
+ cpuid.version = 0xff;
+ return *((u64 *) &cpuid);
}
-static int kvm_s390_crypto_init(struct kvm *kvm)
+static void kvm_s390_crypto_init(struct kvm *kvm)
{
if (!test_kvm_facility(kvm, 76))
- return 0;
-
- kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
- GFP_KERNEL | GFP_DMA);
- if (!kvm->arch.crypto.crycb)
- return -ENOMEM;
+ return;
+ kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
/* Enable AES/DEA protected key functions by default */
@@ -1101,8 +1116,6 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
-
- return 0;
}
static void sca_dispose(struct kvm *kvm)
@@ -1156,37 +1169,30 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_err;
- /*
- * The architectural maximum amount of facilities is 16 kbit. To store
- * this amount, 2 kbyte of memory is required. Thus we need a full
- * page to hold the guest facility list (arch.model.fac->list) and the
- * facility mask (arch.model.fac->mask). Its address size has to be
- * 31 bits and word aligned.
- */
- kvm->arch.model.fac =
- (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
- if (!kvm->arch.model.fac)
+ kvm->arch.sie_page2 =
+ (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ if (!kvm->arch.sie_page2)
goto out_err;
/* Populate the facility mask initially. */
- memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+ memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
if (i < kvm_s390_fac_list_mask_size())
- kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
+ kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
else
- kvm->arch.model.fac->mask[i] = 0UL;
+ kvm->arch.model.fac_mask[i] = 0UL;
}
/* Populate the facility list initially. */
- memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+ kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
+ memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
- kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
+ kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
- if (kvm_s390_crypto_init(kvm) < 0)
- goto out_err;
+ kvm_s390_crypto_init(kvm);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
@@ -1222,8 +1228,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
out_err:
- kfree(kvm->arch.crypto.crycb);
- free_page((unsigned long)kvm->arch.model.fac);
+ free_page((unsigned long)kvm->arch.sie_page2);
debug_unregister(kvm->arch.dbf);
sca_dispose(kvm);
KVM_EVENT(3, "creation of vm failed: %d", rc);
@@ -1269,10 +1274,9 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
- free_page((unsigned long)kvm->arch.model.fac);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
- kfree(kvm->arch.crypto.crycb);
+ free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
@@ -1414,8 +1418,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_PFAULT;
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
- if (test_kvm_facility(vcpu->kvm, 129))
+ /* fprs can be synchronized via vrs, even if the guest has no vx. With
+ * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ */
+ if (MACHINE_HAS_VX)
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ else
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1423,6 +1432,93 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ vcpu->arch.cputm_start = 0;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(vcpu->arch.cputm_enabled);
+ vcpu->arch.cputm_enabled = true;
+ __start_cpu_timer_accounting(vcpu);
+}
+
+/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
+static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
+ __stop_cpu_timer_accounting(vcpu);
+ vcpu->arch.cputm_enabled = false;
+}
+
+static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __enable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ __disable_cpu_timer_accounting(vcpu);
+ preempt_enable();
+}
+
+/* set the cpu timer - may only be called from the VCPU thread itself */
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
+{
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
+ if (vcpu->arch.cputm_enabled)
+ vcpu->arch.cputm_start = get_tod_clock_fast();
+ vcpu->arch.sie_block->cputm = cputm;
+ raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
+ preempt_enable();
+}
+
+/* update and get the cpu timer - can also be called from other VCPU threads */
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned int seq;
+ __u64 value;
+
+ if (unlikely(!vcpu->arch.cputm_enabled))
+ return vcpu->arch.sie_block->cputm;
+
+ preempt_disable(); /* protect from TOD sync and vcpu_load/put */
+ do {
+ seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
+ /*
+ * If the writer would ever execute a read in the critical
+ * section, e.g. in irq context, we have a deadlock.
+ */
+ WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
+ value = vcpu->arch.sie_block->cputm;
+ /* if cputm_start is 0, accounting is being started/stopped */
+ if (likely(vcpu->arch.cputm_start))
+ value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
+ } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
+ preempt_enable();
+ return value;
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Save host register state */
@@ -1430,10 +1526,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
- /* Depending on MACHINE_HAS_VX, data stored to vrs either
- * has vector register or floating point register format.
- */
- current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ if (MACHINE_HAS_VX)
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ else
+ current->thread.fpu.regs = vcpu->run->s.regs.fprs;
current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
@@ -1443,10 +1539,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __start_cpu_timer_accounting(vcpu);
+ vcpu->cpu = cpu;
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ vcpu->cpu = -1;
+ if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
+ __stop_cpu_timer_accounting(vcpu);
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
@@ -1468,7 +1570,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw.mask = 0UL;
vcpu->arch.sie_block->gpsw.addr = 0UL;
kvm_s390_set_prefix(vcpu, 0);
- vcpu->arch.sie_block->cputm = 0UL;
+ kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0UL;
vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
@@ -1536,9 +1638,9 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
{
struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
- vcpu->arch.cpu_id = model->cpu_id;
vcpu->arch.sie_block->ibc = model->ibc;
- vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+ if (test_kvm_facility(vcpu->kvm, 7))
+ vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -1556,11 +1658,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- vcpu->arch.sie_block->ecb = 6;
+ vcpu->arch.sie_block->ecb = 0x02;
+ if (test_kvm_facility(vcpu->kvm, 9))
+ vcpu->arch.sie_block->ecb |= 0x04;
if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= 0x10;
- vcpu->arch.sie_block->ecb2 = 8;
+ if (test_kvm_facility(vcpu->kvm, 8))
+ vcpu->arch.sie_block->ecb2 |= 0x08;
vcpu->arch.sie_block->eca = 0xC1002000U;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
@@ -1616,6 +1721,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
@@ -1715,7 +1821,7 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
- r = put_user(vcpu->arch.sie_block->cputm,
+ r = put_user(kvm_s390_get_cpu_timer(vcpu),
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CLOCK_COMP:
@@ -1753,6 +1859,7 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
struct kvm_one_reg *reg)
{
int r = -EINVAL;
+ __u64 val;
switch (reg->id) {
case KVM_REG_S390_TODPR:
@@ -1764,8 +1871,9 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
- r = get_user(vcpu->arch.sie_block->cputm,
- (u64 __user *)reg->addr);
+ r = get_user(val, (u64 __user *)reg->addr);
+ if (!r)
+ kvm_s390_set_cpu_timer(vcpu, val);
break;
case KVM_REG_S390_CLOCK_COMP:
r = get_user(vcpu->arch.sie_block->ckc,
@@ -2158,8 +2266,10 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
{
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- u8 opcode;
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = PGM_ADDRESSING,
+ };
+ u8 opcode, ilen;
int rc;
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
@@ -2173,12 +2283,21 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
* to look up the current opcode to get the length of the instruction
* to be able to forward the PSW.
*/
- rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
- if (rc)
- return kvm_s390_inject_prog_cond(vcpu, rc);
- psw->addr = __rewind_psw(*psw, -insn_length(opcode));
-
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = read_guest_instr(vcpu, &opcode, 1);
+ ilen = insn_length(opcode);
+ if (rc < 0) {
+ return rc;
+ } else if (rc) {
+ /* Instruction-Fetching Exceptions - we can't detect the ilen.
+ * Forward by arbitrary ilc, injection will take care of
+ * nullification if necessary.
+ */
+ pgm_info = vcpu->arch.pgm;
+ ilen = 4;
+ }
+ pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
+ kvm_s390_forward_psw(vcpu, ilen);
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
@@ -2244,10 +2363,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
*/
local_irq_disable();
__kvm_guest_enter();
+ __disable_cpu_timer_accounting(vcpu);
local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
local_irq_disable();
+ __enable_cpu_timer_accounting(vcpu);
__kvm_guest_exit();
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2271,7 +2392,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
@@ -2293,7 +2414,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
- kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+ kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
@@ -2325,6 +2446,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
sync_regs(vcpu, kvm_run);
+ enable_cpu_timer_accounting(vcpu);
might_fault();
rc = __vcpu_run(vcpu);
@@ -2344,6 +2466,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
+ disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
@@ -2364,7 +2487,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
unsigned char archmode = 1;
freg_t fprs[NUM_FPRS];
unsigned int px;
- u64 clkcomp;
+ u64 clkcomp, cputm;
int rc;
px = kvm_s390_get_prefix(vcpu);
@@ -2386,7 +2509,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
fprs, 128);
} else {
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
- vcpu->run->s.regs.vrs, 128);
+ vcpu->run->s.regs.fprs, 128);
}
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
@@ -2398,8 +2521,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
&vcpu->run->s.regs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
+ cputm = kvm_s390_get_cpu_timer(vcpu);
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
- &vcpu->arch.sie_block->cputm, 8);
+ &cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
&clkcomp, 8);
@@ -2605,7 +2729,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_FETCH);
break;
}
r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
@@ -2616,7 +2741,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar,
+ mop->size, GACC_STORE);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
@@ -2861,13 +2987,31 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
return;
}
+static inline unsigned long nonhyp_mask(int i)
+{
+ unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
+
+ return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
+}
+
+void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
+{
+ vcpu->valid_wakeup = false;
+}
+
static int __init kvm_s390_init(void)
{
+ int i;
+
if (!sclp.has_sief2) {
pr_info("SIE not available\n");
return -ENODEV;
}
+ for (i = 0; i < 16; i++)
+ kvm_s390_fac_list_mask[i] |=
+ S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
+
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index df1abada1f36..8621ab00ec8e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,6 +19,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/facility.h>
+#include <asm/processor.h>
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
@@ -53,6 +54,11 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
+static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_WAIT;
+}
+
static inline int kvm_is_ucontrol(struct kvm *kvm)
{
#ifdef CONFIG_KVM_S390_UCONTROL
@@ -154,8 +160,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
/* test availability of facility in a kvm instance */
static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
{
- return __test_facility(nr, kvm->arch.model.fac->mask) &&
- __test_facility(nr, kvm->arch.model.fac->list);
+ return __test_facility(nr, kvm->arch.model.fac_mask) &&
+ __test_facility(nr, kvm->arch.model.fac_list);
}
static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
@@ -212,8 +218,22 @@ int kvm_s390_reinject_io_int(struct kvm *kvm,
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in intercept.c */
-void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu);
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+static inline void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+ sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilen);
+}
+static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
+{
+ kvm_s390_rewind_psw(vcpu, -ilen);
+}
+static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
+{
+ kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
+}
/* implemented in priv.c */
int is_valid_psw(psw_t *psw);
@@ -248,6 +268,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
unsigned long kvm_s390_fac_list_mask_size(void);
extern unsigned long kvm_s390_fac_list_mask[];
+void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
+__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ed74e86d9b9e..95916fa7c670 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -23,6 +23,7 @@
#include <asm/sysinfo.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/gmap.h>
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
@@ -173,7 +174,7 @@ static int handle_skey(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return 0;
}
@@ -184,7 +185,7 @@ static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
if (psw_bits(vcpu->arch.sie_block->gpsw).p)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
- kvm_s390_rewind_psw(vcpu, 4);
+ kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
@@ -354,7 +355,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
* We need to shift the lower 32 facility bits (bit 0-31) from a u64
* into a u32 memory representation. They will remain bits 0-31.
*/
- fac = *vcpu->kvm->arch.model.fac->list >> 32;
+ fac = *vcpu->kvm->arch.model.fac_list >> 32;
rc = write_guest_lc(vcpu, offsetof(struct lowcore, stfl_fac_list),
&fac, sizeof(fac));
if (rc)
@@ -438,7 +439,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
static int handle_stidp(struct kvm_vcpu *vcpu)
{
- u64 stidp_data = vcpu->arch.stidp_data;
+ u64 stidp_data = vcpu->kvm->arch.model.cpuid;
u64 operand2;
int rc;
ar_t ar;
@@ -669,8 +670,9 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Only provide non-quiescing support if the host supports it */
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+ /* Only provide non-quiescing support if enabled for the guest */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
+ !test_kvm_facility(vcpu->kvm, 14))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* No support for conditional-SSKE */
@@ -743,7 +745,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
{
/* entries expected to be 1FF */
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
- unsigned long *cbrlo, cbrle;
+ unsigned long *cbrlo;
struct gmap *gmap;
int i;
@@ -759,22 +761,14 @@ static int handle_essa(struct kvm_vcpu *vcpu)
if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* Rewind PSW to repeat the ESSA instruction */
- kvm_s390_rewind_psw(vcpu, 4);
+ /* Retry the ESSA instruction */
+ kvm_s390_retry_instr(vcpu);
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
down_read(&gmap->mm->mmap_sem);
- for (i = 0; i < entries; ++i) {
- cbrle = cbrlo[i];
- if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
- /* invalid entry */
- break;
- /* try to free backing */
- __gmap_zap(gmap, cbrle);
- }
+ for (i = 0; i < entries; ++i)
+ __gmap_zap(gmap, cbrlo[i]);
up_read(&gmap->mm->mmap_sem);
- if (i < entries)
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
return 0;
}
@@ -981,11 +975,12 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_lock(vcpu);
- ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE);
if (ret == PGM_PROTECTION) {
/* Write protected? Try again with read-only... */
cc = 1;
- ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa,
+ GACC_FETCH);
}
if (ret) {
if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 77c22d685c7a..28ea0cab1f1b 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -240,6 +240,12 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
struct kvm_s390_local_interrupt *li;
int rc;
+ if (!test_kvm_facility(vcpu->kvm, 9)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_ORDER;
+ return SIGP_CC_STATUS_STORED;
+ }
+
li = &dst_vcpu->arch.local_int;
if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
/* running */
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 0e8fefe5b0ce..1d1af31e8354 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
#
lib-y += delay.o string.o uaccess.o find.o
-obj-y += mem.o
+obj-y += mem.o xor.o
lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index d4549c964589..e5f50a7d2f4e 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -105,6 +105,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
if (_raw_compare_and_swap(&lp->lock, 0, cpu))
return;
local_irq_restore(flags);
+ continue;
}
/* Check if the lock owner is running. */
if (first_diag && cpu_is_preempted(~owner)) {
diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c
new file mode 100644
index 000000000000..7d94e3ec34a9
--- /dev/null
+++ b/arch/s390/lib/xor.c
@@ -0,0 +1,134 @@
+/*
+ * Optimized xor_block operation for RAID4/5
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/raid/xor.h>
+
+static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ "3:\n"
+ : : "d" (bytes), "a" (p1), "a" (p2)
+ : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4)
+ : : "0", "1", "cc", "memory");
+}
+
+static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ /* Get around a gcc oddity */
+ register unsigned long *reg7 asm ("7") = p5;
+
+ asm volatile(
+ " larl 1,2f\n"
+ " aghi %0,-1\n"
+ " jm 3f\n"
+ " srlg 0,%0,8\n"
+ " ltgr 0,0\n"
+ " jz 1f\n"
+ "0: xc 0(256,%1),0(%2)\n"
+ " xc 0(256,%1),0(%3)\n"
+ " xc 0(256,%1),0(%4)\n"
+ " xc 0(256,%1),0(%5)\n"
+ " la %1,256(%1)\n"
+ " la %2,256(%2)\n"
+ " la %3,256(%3)\n"
+ " la %4,256(%4)\n"
+ " la %5,256(%5)\n"
+ " brctg 0,0b\n"
+ "1: ex %0,0(1)\n"
+ " ex %0,6(1)\n"
+ " ex %0,12(1)\n"
+ " ex %0,18(1)\n"
+ " j 3f\n"
+ "2: xc 0(1,%1),0(%2)\n"
+ " xc 0(1,%1),0(%3)\n"
+ " xc 0(1,%1),0(%4)\n"
+ " xc 0(1,%1),0(%5)\n"
+ "3:\n"
+ : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4),
+ "+a" (reg7)
+ : : "0", "1", "cc", "memory");
+}
+
+struct xor_block_template xor_block_xc = {
+ .name = "xc",
+ .do_2 = xor_xc_2,
+ .do_3 = xor_xc_3,
+ .do_4 = xor_xc_4,
+ .do_5 = xor_xc_5,
+};
+EXPORT_SYMBOL(xor_block_xc);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 839592ca265c..0aa0ad165d8b 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,9 +2,11 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
-obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o
+obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
+obj-y += page-states.o gup.o pageattr.o mem_detect.o
+obj-y += pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_PGSTE) += gmap.o
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
deleted file mode 100644
index 18c8b819b0aa..000000000000
--- a/arch/s390/mm/extable.c
+++ /dev/null
@@ -1,85 +0,0 @@
-#include <linux/module.h>
-#include <linux/sort.h>
-#include <asm/uaccess.h>
-
-/*
- * Search one exception table for an entry corresponding to the
- * given instruction address, and return the address of the entry,
- * or NULL if none is found.
- * We use a binary search, and thus we assume that the table is
- * already sorted.
- */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- const struct exception_table_entry *mid;
- unsigned long addr;
-
- while (first <= last) {
- mid = ((last - first) >> 1) + first;
- addr = extable_insn(mid);
- if (addr < value)
- first = mid + 1;
- else if (addr > value)
- last = mid - 1;
- else
- return mid;
- }
- return NULL;
-}
-
-/*
- * The exception table needs to be sorted so that the binary
- * search that we use to find entries in it works properly.
- * This is used both for the kernel exception table and for
- * the exception tables of modules that get loaded.
- *
- */
-static int cmp_ex(const void *a, const void *b)
-{
- const struct exception_table_entry *x = a, *y = b;
-
- /* This compare is only valid after normalization. */
- return x->insn - y->insn;
-}
-
-void sort_extable(struct exception_table_entry *start,
- struct exception_table_entry *finish)
-{
- struct exception_table_entry *p;
- int i;
-
- /* Normalize entries to being relative to the start of the section */
- for (p = start, i = 0; p < finish; p++, i += 8) {
- p->insn += i;
- p->fixup += i + 4;
- }
- sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
- /* Denormalize all entries */
- for (p = start, i = 0; p < finish; p++, i += 8) {
- p->insn -= i;
- p->fixup -= i + 4;
- }
-}
-
-#ifdef CONFIG_MODULES
-/*
- * If the exception table is sorted, any referring to the module init
- * will be at the beginning or the end.
- */
-void trim_init_extable(struct module *m)
-{
- /* Trim the beginning */
- while (m->num_exentries &&
- within_module_init(extable_insn(&m->extable[0]), m)) {
- m->extable++;
- m->num_exentries--;
- }
- /* Trim the end */
- while (m->num_exentries &&
- within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
- m->num_exentries--;
-}
-#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index a1bf4ad8925d..02042b6b66bf 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg)
goto out_free;
}
if (diag_cc > 1) {
- pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc);
+ pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc);
rc = dcss_diag_translate_rc (vmrc);
goto out_free;
}
@@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
goto out_resource;
}
if (diag_cc > 1) {
- pr_warning("Loading DCSS %s failed with rc=%ld\n", name,
- end_addr);
+ pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
@@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_unlock;
}
if (atomic_read (&seg->ref_count) != 1) {
- pr_warning("DCSS %s is in use and cannot be reloaded\n",
- name);
+ pr_warn("DCSS %s is in use and cannot be reloaded\n", name);
rc = -EAGAIN;
goto out_unlock;
}
@@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared)
seg->res->flags |= IORESOURCE_READONLY;
if (request_resource(&iomem_resource, seg->res)) {
- pr_warning("DCSS %s overlaps with used memory resources "
- "and cannot be reloaded\n", name);
+ pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n",
+ name);
rc = -EBUSY;
kfree(seg->res);
goto out_del_mem;
@@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared)
goto out_del_res;
}
if (diag_cc > 1) {
- pr_warning("Reloading DCSS %s failed with rc=%ld\n", name,
- end_addr);
+ pr_warn("Reloading DCSS %s failed with rc=%ld\n",
+ name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
goto out_del_res;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 791a4146052c..19288c1b36d3 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/asm-offsets.h>
#include <asm/diag.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
@@ -183,6 +184,8 @@ static void dump_fault_info(struct pt_regs *regs)
{
unsigned long asce;
+ pr_alert("Failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
pr_alert("Fault in ");
switch (regs->int_parm_long & 3) {
case 3:
@@ -218,7 +221,9 @@ static void dump_fault_info(struct pt_regs *regs)
dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
}
-static inline void report_user_fault(struct pt_regs *regs, long signr)
+int show_unhandled_signals = 1;
+
+void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
return;
@@ -230,9 +235,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
regs->int_code & 0xffff, regs->int_code >> 17);
print_vma_addr(KERN_CONT "in ", regs->psw.addr);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
- dump_fault_info(regs);
+ if (is_mm_fault)
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -244,8 +248,9 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
struct siginfo si;
- report_user_fault(regs, SIGSEGV);
+ report_user_fault(regs, SIGSEGV, 1);
si.si_signo = SIGSEGV;
+ si.si_errno = 0;
si.si_code = si_code;
si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
force_sig_info(SIGSEGV, &si, current);
@@ -272,8 +277,6 @@ static noinline void do_no_context(struct pt_regs *regs)
else
printk(KERN_ALERT "Unable to handle kernel paging request"
" in virtual user address space\n");
- printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
- regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
@@ -629,6 +632,29 @@ void pfault_fini(void)
static DEFINE_SPINLOCK(pfault_lock);
static LIST_HEAD(pfault_list);
+#define PF_COMPLETE 0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule(). It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
static void pfault_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
@@ -637,10 +663,9 @@ static void pfault_interrupt(struct ext_code ext_code,
pid_t pid;
/*
- * Get the external interruption subcode & pfault
- * initial/completion signal bit. VM stores this
- * in the 'cpu address' field associated with the
- * external interrupt.
+ * Get the external interruption subcode & pfault initial/completion
+ * signal bit. VM stores this in the 'cpu address' field associated
+ * with the external interrupt.
*/
subcode = ext_code.subcode;
if ((subcode & 0xff00) != __SUBCODE_MASK)
@@ -656,7 +681,7 @@ static void pfault_interrupt(struct ext_code ext_code,
if (!tsk)
return;
spin_lock(&pfault_lock);
- if (subcode & 0x0080) {
+ if (subcode & PF_COMPLETE) {
/* signal bit is set -> a page has been swapped in by VM */
if (tsk->thread.pfault_wait == 1) {
/* Initial interrupt was faster than the completion
@@ -685,8 +710,7 @@ static void pfault_interrupt(struct ext_code ext_code,
goto out;
if (tsk->thread.pfault_wait == 1) {
/* Already on the list with a reference: put to sleep */
- __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- set_tsk_need_resched(tsk);
+ goto block;
} else if (tsk->thread.pfault_wait == -1) {
/* Completion interrupt was faster than the initial
* interrupt (pfault_wait == -1). Set pfault_wait
@@ -701,7 +725,11 @@ static void pfault_interrupt(struct ext_code ext_code,
get_task_struct(tsk);
tsk->thread.pfault_wait = 1;
list_add(&tsk->thread.list, &pfault_list);
- __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+block:
+ /* Since this must be a userspace fault, there
+ * is no kernel task state to trample. Rely on the
+ * return to userspace schedule() to block. */
+ __set_current_state(TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
}
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
new file mode 100644
index 000000000000..cace818d86eb
--- /dev/null
+++ b/arch/s390/mm/gmap.c
@@ -0,0 +1,774 @@
+/*
+ * KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum address of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+{
+ struct gmap *gmap;
+ struct page *page;
+ unsigned long *table;
+ unsigned long etype, atype;
+
+ if (limit < (1UL << 31)) {
+ limit = (1UL << 31) - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < (1UL << 42)) {
+ limit = (1UL << 42) - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < (1UL << 53)) {
+ limit = (1UL << 53) - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
+ gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+ if (!gmap)
+ goto out;
+ INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
+ gmap->mm = mm;
+ page = alloc_pages(GFP_KERNEL, 2);
+ if (!page)
+ goto out_free;
+ page->index = 0;
+ list_add(&page->lru, &gmap->crst_list);
+ table = (unsigned long *) page_to_phys(page);
+ crst_table_init(table, etype);
+ gmap->table = table;
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ down_write(&mm->mmap_sem);
+ list_add(&gmap->list, &mm->context.gmap_list);
+ up_write(&mm->mmap_sem);
+ return gmap;
+
+out_free:
+ kfree(gmap);
+out:
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, gmap->asce);
+ else
+ __tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+ struct page *page, *next;
+
+ /* Flush tlb. */
+ if (MACHINE_HAS_IDTE)
+ __tlb_flush_asce(gmap->mm, gmap->asce);
+ else
+ __tlb_flush_global();
+
+ /* Free all segment & region tables. */
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+ __free_pages(page, 2);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+ down_write(&gmap->mm->mmap_sem);
+ list_del(&gmap->list);
+ up_write(&gmap->mm->mmap_sem);
+ kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+ S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
+{
+ struct page *page;
+ unsigned long *new;
+
+ /* since we dont free the gmap table until gmap_free we can unlock */
+ page = alloc_pages(GFP_KERNEL, 2);
+ if (!page)
+ return -ENOMEM;
+ new = (unsigned long *) page_to_phys(page);
+ crst_table_init(new, init);
+ spin_lock(&gmap->mm->page_table_lock);
+ if (*table & _REGION_ENTRY_INVALID) {
+ list_add(&page->lru, &gmap->crst_list);
+ *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+ (*table & _REGION_ENTRY_TYPE_MASK);
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ if (page)
+ __free_pages(page, 2);
+ return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset, mask;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+ page = virt_to_page((void *)((unsigned long) entry & mask));
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_INVALID);
+ *entry = _SEGMENT_ENTRY_INVALID;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ if ((to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || to + len < to)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_map_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+ unsigned long to, unsigned long len)
+{
+ unsigned long off;
+ int flush;
+
+ if ((from | to | len) & (PMD_SIZE - 1))
+ return -EINVAL;
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
+ return -EINVAL;
+
+ flush = 0;
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE) {
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
+ }
+ up_write(&gmap->mm->mmap_sem);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ if (off >= len)
+ return 0;
+ gmap_unmap_segment(gmap, to, len);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long rc;
+
+ down_read(&gmap->mm->mmap_sem);
+ rc = __gmap_translate(gmap, gaddr);
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ unsigned long *table;
+ spinlock_t *ptl;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ int rc;
+
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr >> 53) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & 0xffe0000000000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr >> 42) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & 0xfffffc0000000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr >> 31) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & 0xffffffff80000000UL))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr >> 20) & 0x7ff;
+ /* Walk the parent mm page table */
+ mm = gmap->mm;
+ pgd = pgd_offset(mm, vmaddr);
+ VM_BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
+ /* large pmds cannot yet be handled */
+ if (pmd_large(*pmd))
+ return -EFAULT;
+ /* Link gmap segment table entry location to page table. */
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_INVALID) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc)
+ *table = pmd_val(*pmd);
+ } else
+ rc = 0;
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
+{
+ unsigned long vmaddr;
+ int rc;
+ bool unlocked;
+
+ down_read(&gmap->mm->mmap_sem);
+
+retry:
+ unlocked = false;
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ &unlocked)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ /*
+ * In the case that fixup_user_fault unlocked the mmap_sem during
+ * faultin redo __gmap_translate to not race with a map/unmap_segment.
+ */
+ if (unlocked)
+ goto retry;
+
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (vmaddr) {
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+ if (likely(ptep))
+ ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
+ pte_unmap_unlock(ptep, ptl);
+ }
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+ unsigned long gaddr, vmaddr, size;
+ struct vm_area_struct *vma;
+
+ down_read(&gmap->mm->mmap_sem);
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ continue;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size, NULL);
+ }
+ up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_add(&nb->list, &gmap_notifier_list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+ spin_lock(&gmap_notifier_lock);
+ list_del_init(&nb->list);
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+{
+ unsigned long addr;
+ spinlock_t *ptl;
+ pte_t *ptep;
+ bool unlocked;
+ int rc = 0;
+
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ return -EINVAL;
+ down_read(&gmap->mm->mmap_sem);
+ while (len) {
+ unlocked = false;
+ /* Convert gmap address and connect the page tables */
+ addr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(addr)) {
+ rc = addr;
+ break;
+ }
+ /* Get the page mapped */
+ if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
+ &unlocked)) {
+ rc = -EFAULT;
+ break;
+ }
+ /* While trying to map mmap_sem got unlocked. Let us retry */
+ if (unlocked)
+ continue;
+ rc = __gmap_link(gmap, gaddr, addr);
+ if (rc)
+ break;
+ /* Walk the process page table, lock and get pte pointer */
+ ptep = get_locked_pte(gmap->mm, addr, &ptl);
+ VM_BUG_ON(!ptep);
+ /* Set notification bit in the pgste of the pte */
+ if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+ ptep_set_notify(gmap->mm, addr, ptep);
+ gaddr += PAGE_SIZE;
+ len -= PAGE_SIZE;
+ }
+ pte_unmap_unlock(ptep, ptl);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * ptep_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
+{
+ unsigned long offset, gaddr;
+ unsigned long *table;
+ struct gmap_notifier *nb;
+ struct gmap *gmap;
+
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (4096 / sizeof(pte_t));
+ spin_lock(&gmap_notifier_lock);
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (!table)
+ continue;
+ gaddr = __gmap_segment_gaddr(table) + offset;
+ list_for_each_entry(nb, &gmap_notifier_list, list)
+ nb->notifier_call(gmap, gaddr);
+ }
+ spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(ptep_notify);
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct vm_area_struct *vma;
+ unsigned long addr;
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (addr = vma->vm_start;
+ addr < vma->vm_end;
+ addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
+ vma->vm_flags &= ~VM_HUGEPAGE;
+ vma->vm_flags |= VM_NOHUGEPAGE;
+ }
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ /* Do we have pgstes? if yes, we are done */
+ if (mm_has_pgste(mm))
+ return 0;
+ /* Fail if the page tables are 2K */
+ if (!mm_alloc_pgste(mm))
+ return -EINVAL;
+ down_write(&mm->mmap_sem);
+ mm->context.has_pgste = 1;
+ /* split thp mappings and disable thp for future mappings */
+ thp_split_mm(mm);
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ /*
+ * Remove all zero page mappings,
+ * after establishing a policy to forbid zero page mappings
+ * following faults for that page will get fresh anonymous pages
+ */
+ if (is_zero_pfn(pte_pfn(*pte)))
+ ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
+ /* Clear storage key */
+ ptep_zap_key(walk->mm, addr, pte);
+ return 0;
+}
+
+int s390_enable_skey(void)
+{
+ struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc = 0;
+
+ down_write(&mm->mmap_sem);
+ if (mm_use_skey(mm))
+ goto out_up;
+
+ mm->context.use_skey = 1;
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags)) {
+ mm->context.use_skey = 0;
+ rc = -ENOMEM;
+ goto out_up;
+ }
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ ptep_zap_unused(walk->mm, addr, pte, 1);
+ return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+ down_write(&mm->mmap_sem);
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+ up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 13dab0c1645c..a8a6765f1a51 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -20,9 +20,9 @@
static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
+ struct page *head, *page;
unsigned long mask;
pte_t *ptep, pte;
- struct page *page;
mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
@@ -37,12 +37,14 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
+ put_page(head);
return 0;
}
+ VM_BUG_ON_PAGE(compound_head(page) != head, page);
pages[*nr] = page;
(*nr)++;
@@ -210,7 +212,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- struct mm_struct *mm = current->mm;
int nr, ret;
might_sleep();
@@ -222,8 +223,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
- ret = get_user_pages_unlocked(current, mm, start,
- nr_pages - nr, write, 0, pages);
+ ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
/* Have to be a bit careful with return values */
if (nr > 0)
ret = (ret < 0) ? nr : ret + nr;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index f81096b6940d..1b5e8983f4f3 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
pmd_t *pmdp = (pmd_t *) ptep;
- pte_t pte = huge_ptep_get(ptep);
+ pmd_t old;
- pmdp_flush_direct(mm, addr, pmdp);
- pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
- return pte;
+ old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ return __pmd_to_pte(old);
}
pte_t *huge_pte_alloc(struct mm_struct *mm,
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 73e290337092..2489b2e917c8 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -89,7 +89,8 @@ void __init paging_init(void)
asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION3_ENTRY_EMPTY;
}
- S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
+ init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
+ S390_lowcore.kernel_asce = init_mm.context.asce;
clear_table((unsigned long *) init_mm.pgd, pgd_type,
sizeof(unsigned long)*2048);
vmem_map_init();
@@ -108,6 +109,13 @@ void __init paging_init(void)
free_area_init_nodes(max_zone_pfns);
}
+void mark_rodata_ro(void)
+{
+ /* Text and rodata are already protected. Nothing to do here. */
+ pr_info("Write protecting the kernel read-only data: %luk\n",
+ ((unsigned long)&_eshared - (unsigned long)&_stext) >> 10);
+}
+
void __init mem_init(void)
{
if (MACHINE_HAS_TLB_LC)
@@ -126,9 +134,6 @@ void __init mem_init(void)
setup_zero_pages(); /* Setup zeroed pages. */
mem_init_print_info(NULL);
- printk("Write protected kernel read-only data: %#lx - %#lx\n",
- (unsigned long)&_stext,
- PFN_ALIGN((unsigned long)&_eshared) - 1);
}
void free_initmem(void)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 45c4daa49930..eb9df2822da1 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -22,6 +22,7 @@
* Started by Ingo Molnar <mingo@elte.hu>
*/
+#include <linux/elf-randomize.h>
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -174,7 +175,7 @@ int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
if (!(flags & MAP_FIXED))
addr = 0;
if ((addr + len) >= TASK_SIZE)
- return crst_table_upgrade(current->mm, TASK_MAX_SIZE);
+ return crst_table_upgrade(current->mm);
return 0;
}
@@ -191,7 +192,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
return area;
if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
/* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
+ rc = crst_table_upgrade(mm);
if (rc)
return (unsigned long) rc;
area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -213,7 +214,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
return area;
if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) {
/* Upgrade the page table to 4 levels and retry. */
- rc = crst_table_upgrade(mm, TASK_MAX_SIZE);
+ rc = crst_table_upgrade(mm);
if (rc)
return (unsigned long) rc;
area = arch_get_unmapped_area_topdown(filp, addr, len,
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 749c98407b41..f2a5c29a97e9 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr)
static void change_page_attr(unsigned long addr, int numpages,
pte_t (*set) (pte_t))
{
- pte_t *ptep, pte;
+ pte_t *ptep;
int i;
for (i = 0; i < numpages; i++) {
ptep = walk_page_table(addr);
if (WARN_ON_ONCE(!ptep))
break;
- pte = *ptep;
- pte = set(pte);
- __ptep_ipte(addr, ptep);
- *ptep = pte;
+ *ptep = set(*ptep);
addr += PAGE_SIZE;
}
+ __tlb_flush_kernel();
}
int set_memory_ro(unsigned long addr, int numpages)
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
new file mode 100644
index 000000000000..e2565d2d0c32
--- /dev/null
+++ b/arch/s390/mm/pgalloc.c
@@ -0,0 +1,331 @@
+/*
+ * Page table allocation functions
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+#ifdef CONFIG_PGSTE
+
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+ {
+ .procname = "allocate_pgste",
+ .data = &page_table_allocate_pgste,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ .extra1 = &page_table_allocate_pgste_min,
+ .extra2 = &page_table_allocate_pgste_max,
+ },
+ { }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+ {
+ .procname = "vm",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = page_table_sysctl,
+ },
+ { }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+ return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#endif /* CONFIG_PGSTE */
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
+
+ if (!page)
+ return NULL;
+ return (unsigned long *) page_to_phys(page);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ free_pages((unsigned long) table, 2);
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+ struct mm_struct *mm = arg;
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ set_user_asce(mm);
+ }
+ __tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm)
+{
+ unsigned long *table, *pgd;
+
+ /* upgrade should only happen from 3 to 4 levels */
+ BUG_ON(mm->context.asce_limit != (1UL << 42));
+
+ table = crst_table_alloc(mm);
+ if (!table)
+ return -ENOMEM;
+
+ spin_lock_bh(&mm->page_table_lock);
+ pgd = (unsigned long *) mm->pgd;
+ crst_table_init(table, _REGION2_ENTRY_EMPTY);
+ pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ mm->task_size = mm->context.asce_limit;
+ spin_unlock_bh(&mm->page_table_lock);
+
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+ return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+
+ /* downgrade should only happen from 3 to 2 levels (compat only) */
+ BUG_ON(mm->context.asce_limit != (1UL << 42));
+
+ if (current->active_mm == mm) {
+ clear_user_asce();
+ __tlb_flush_mm(mm);
+ }
+
+ pgd = mm->pgd;
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ mm->context.asce_limit = 1UL << 31;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+ mm->task_size = mm->context.asce_limit;
+ crst_table_free(mm, (unsigned long *) pgd);
+
+ if (current->active_mm == mm)
+ set_user_asce(mm);
+}
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+ unsigned int old, new;
+
+ do {
+ old = atomic_read(v);
+ new = old ^ bits;
+ } while (atomic_cmpxchg(v, old, new) != old);
+ return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+ unsigned long *table;
+ struct page *page;
+ unsigned int mask, bit;
+
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.list_lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_mapcount);
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_mapcount, 1U << bit);
+ list_del(&page->lru);
+ }
+ }
+ spin_unlock_bh(&mm->context.list_lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_set(&page->_mapcount, 3);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ } else {
+ /* Return the first 2K fragment of the page */
+ atomic_set(&page->_mapcount, 1);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE);
+ spin_lock_bh(&mm->context.list_lock);
+ list_add(&page->lru, &mm->context.pgtable_list);
+ spin_unlock_bh(&mm->context.list_lock);
+ }
+ return table;
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+ struct page *page;
+ unsigned int bit, mask;
+
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask != 0)
+ return;
+ }
+
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ struct page *page;
+ unsigned int bit, mask;
+
+ mm = tlb->mm;
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (mm_alloc_pgste(mm)) {
+ gmap_unlink(mm, table, vmaddr);
+ table = (unsigned long *) (__pa(table) | 3);
+ tlb_remove_table(tlb, table);
+ return;
+ }
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ if (mask & 3)
+ list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ table = (unsigned long *) (__pa(table) | (1U << bit));
+ tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+ switch (mask) {
+ case 0: /* pmd or pud */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ break;
+ }
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+ /* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ /*
+ * This isn't an RCU grace period and hence the page-tables cannot be
+ * assumed to be actually RCU-freed.
+ *
+ * It is however sufficient for software page-table walkers that rely
+ * on IRQ disabling. See the comment near struct mmu_table_batch.
+ */
+ smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ __tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+ struct mmu_table_batch *batch;
+ int i;
+
+ batch = container_of(head, struct mmu_table_batch, rcu);
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ if (*batch) {
+ call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+ *batch = NULL;
+ }
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+ struct mmu_table_batch **batch = &tlb->batch;
+
+ tlb->mm->context.flush_mm = 1;
+ if (*batch == NULL) {
+ *batch = (struct mmu_table_batch *)
+ __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+ if (*batch == NULL) {
+ __tlb_flush_mm_lazy(tlb->mm);
+ tlb_remove_table_one(table);
+ return;
+ }
+ (*batch)->nr = 0;
+ }
+ (*batch)->tables[(*batch)->nr++] = table;
+ if ((*batch)->nr == MAX_TABLE_BATCH)
+ tlb_flush_mmu(tlb);
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5109827883ac..9f0ce0e6eeb4 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,591 +24,397 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-unsigned long *crst_table_alloc(struct mm_struct *mm)
-{
- struct page *page = alloc_pages(GFP_KERNEL, 2);
-
- if (!page)
- return NULL;
- return (unsigned long *) page_to_phys(page);
+static inline pte_t ptep_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ int active, count;
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __ptep_ipte_local(addr, ptep);
+ else
+ __ptep_ipte(addr, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-void crst_table_free(struct mm_struct *mm, unsigned long *table)
+static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- free_pages((unsigned long) table, 2);
+ int active, count;
+ pte_t old;
+
+ old = *ptep;
+ if (unlikely(pte_val(old) & _PAGE_INVALID))
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pte_val(*ptep) |= _PAGE_INVALID;
+ mm->context.flush_mm = 1;
+ } else
+ __ptep_ipte(addr, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-static void __crst_table_upgrade(void *arg)
+static inline pgste_t pgste_get_lock(pte_t *ptep)
{
- struct mm_struct *mm = arg;
+ unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+ unsigned long old;
- if (current->active_mm == mm) {
- clear_user_asce();
- set_user_asce(mm);
- }
- __tlb_flush_local();
+ preempt_disable();
+ asm(
+ " lg %0,%2\n"
+ "0: lgr %1,%0\n"
+ " nihh %0,0xff7f\n" /* clear PCL bit in old */
+ " oihh %1,0x0080\n" /* set PCL bit in new */
+ " csg %0,%1,%2\n"
+ " jl 0b\n"
+ : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+ : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+ return __pgste(new);
}
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
{
- unsigned long *table, *pgd;
- unsigned long entry;
- int flush;
-
- BUG_ON(limit > TASK_MAX_SIZE);
- flush = 0;
-repeat:
- table = crst_table_alloc(mm);
- if (!table)
- return -ENOMEM;
- spin_lock_bh(&mm->page_table_lock);
- if (mm->context.asce_limit < limit) {
- pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit <= (1UL << 31)) {
- entry = _REGION3_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- } else {
- entry = _REGION2_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION2;
- }
- crst_table_init(table, entry);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->task_size = mm->context.asce_limit;
- table = NULL;
- flush = 1;
- }
- spin_unlock_bh(&mm->page_table_lock);
- if (table)
- crst_table_free(mm, table);
- if (mm->context.asce_limit < limit)
- goto repeat;
- if (flush)
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return 0;
+#ifdef CONFIG_PGSTE
+ asm(
+ " nihh %1,0xff7f\n" /* clear PCL bit */
+ " stg %1,%0\n"
+ : "=Q" (ptep[PTRS_PER_PTE])
+ : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+ : "cc", "memory");
+ preempt_enable();
+#endif
}
-void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_get(pte_t *ptep)
{
- pgd_t *pgd;
-
- if (current->active_mm == mm) {
- clear_user_asce();
- __tlb_flush_mm(mm);
- }
- while (mm->context.asce_limit > limit) {
- pgd = mm->pgd;
- switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
- case _REGION_ENTRY_TYPE_R2:
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- break;
- case _REGION_ENTRY_TYPE_R3:
- mm->context.asce_limit = 1UL << 31;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_SEGMENT;
- break;
- default:
- BUG();
- }
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->task_size = mm->context.asce_limit;
- crst_table_free(mm, (unsigned long *) pgd);
- }
- if (current->active_mm == mm)
- set_user_asce(mm);
+ unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+ pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+ return __pgste(pgste);
}
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
#ifdef CONFIG_PGSTE
+ *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
-/**
- * gmap_alloc - allocate a guest address space
- * @mm: pointer to the parent mm_struct
- * @limit: maximum address of the gmap address space
- *
- * Returns a guest address space structure.
- */
-struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
+ struct mm_struct *mm)
{
- struct gmap *gmap;
- struct page *page;
- unsigned long *table;
- unsigned long etype, atype;
-
- if (limit < (1UL << 31)) {
- limit = (1UL << 31) - 1;
- atype = _ASCE_TYPE_SEGMENT;
- etype = _SEGMENT_ENTRY_EMPTY;
- } else if (limit < (1UL << 42)) {
- limit = (1UL << 42) - 1;
- atype = _ASCE_TYPE_REGION3;
- etype = _REGION3_ENTRY_EMPTY;
- } else if (limit < (1UL << 53)) {
- limit = (1UL << 53) - 1;
- atype = _ASCE_TYPE_REGION2;
- etype = _REGION2_ENTRY_EMPTY;
- } else {
- limit = -1UL;
- atype = _ASCE_TYPE_REGION1;
- etype = _REGION1_ENTRY_EMPTY;
- }
- gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
- if (!gmap)
- goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
- INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
- INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
- spin_lock_init(&gmap->guest_table_lock);
- gmap->mm = mm;
- page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- goto out_free;
- page->index = 0;
- list_add(&page->lru, &gmap->crst_list);
- table = (unsigned long *) page_to_phys(page);
- crst_table_init(table, etype);
- gmap->table = table;
- gmap->asce = atype | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | __pa(table);
- gmap->asce_end = limit;
- down_write(&mm->mmap_sem);
- list_add(&gmap->list, &mm->context.gmap_list);
- up_write(&mm->mmap_sem);
- return gmap;
-
-out_free:
- kfree(gmap);
-out:
- return NULL;
+#ifdef CONFIG_PGSTE
+ unsigned long address, bits, skey;
+
+ if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
+ return pgste;
+ address = pte_val(pte) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(address);
+ bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+ /* Transfer page changed & referenced bit to guest bits in pgste */
+ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
+ /* Copy page access key and fetch protection bit to pgste */
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+ pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+ return pgste;
+
}
-EXPORT_SYMBOL_GPL(gmap_alloc);
-static void gmap_flush_tlb(struct gmap *gmap)
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+ struct mm_struct *mm)
{
- if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, gmap->asce);
- else
- __tlb_flush_global();
+#ifdef CONFIG_PGSTE
+ unsigned long address;
+ unsigned long nkey;
+
+ if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+ return;
+ VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+ address = pte_val(entry) & PAGE_MASK;
+ /*
+ * Set page access key and fetch protection bit from pgste.
+ * The guest C/R information is still in the PGSTE, set real
+ * key C/R to 0.
+ */
+ nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+ page_set_storage_key(address, nkey, 0);
+#endif
}
-static void gmap_radix_tree_free(struct radix_tree_root *root)
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
{
- struct radix_tree_iter iter;
- unsigned long indices[16];
- unsigned long index;
- void **slot;
- int i, nr;
-
- /* A radix tree is freed by deleting all of its entries */
- index = 0;
- do {
- nr = 0;
- radix_tree_for_each_slot(slot, root, &iter, index) {
- indices[nr] = iter.index;
- if (++nr == 16)
- break;
- }
- for (i = 0; i < nr; i++) {
- index = indices[i];
- radix_tree_delete(root, index);
+#ifdef CONFIG_PGSTE
+ if ((pte_val(entry) & _PAGE_PRESENT) &&
+ (pte_val(entry) & _PAGE_WRITE) &&
+ !(pte_val(entry) & _PAGE_INVALID)) {
+ if (!MACHINE_HAS_ESOP) {
+ /*
+ * Without enhanced suppression-on-protection force
+ * the dirty bit on for all writable ptes.
+ */
+ pte_val(entry) |= _PAGE_DIRTY;
+ pte_val(entry) &= ~_PAGE_PROTECT;
}
- } while (nr > 0);
+ if (!(pte_val(entry) & _PAGE_PROTECT))
+ /* This pte allows write access, set user-dirty */
+ pgste_val(pgste) |= PGSTE_UC_BIT;
+ }
+#endif
+ *ptep = entry;
+ return pgste;
}
-/**
- * gmap_free - free a guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_free(struct gmap *gmap)
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, pgste_t pgste)
{
- struct page *page, *next;
-
- /* Flush tlb. */
- if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, gmap->asce);
- else
- __tlb_flush_global();
-
- /* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, 2);
- gmap_radix_tree_free(&gmap->guest_to_host);
- gmap_radix_tree_free(&gmap->host_to_guest);
- down_write(&gmap->mm->mmap_sem);
- list_del(&gmap->list);
- up_write(&gmap->mm->mmap_sem);
- kfree(gmap);
+#ifdef CONFIG_PGSTE
+ if (pgste_val(pgste) & PGSTE_IN_BIT) {
+ pgste_val(pgste) &= ~PGSTE_IN_BIT;
+ ptep_notify(mm, addr, ptep);
+ }
+#endif
+ return pgste;
}
-EXPORT_SYMBOL_GPL(gmap_free);
-/**
- * gmap_enable - switch primary space to the guest address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_enable(struct gmap *gmap)
+static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
{
- S390_lowcore.gmap = (unsigned long) gmap;
+ pgste_t pgste = __pgste(0);
+
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get_lock(ptep);
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+ }
+ return pgste;
}
-EXPORT_SYMBOL_GPL(gmap_enable);
-/**
- * gmap_disable - switch back to the standard primary address space
- * @gmap: pointer to the guest address space structure
- */
-void gmap_disable(struct gmap *gmap)
+static inline void ptep_xchg_commit(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ pgste_t pgste, pte_t old, pte_t new)
{
- S390_lowcore.gmap = 0UL;
+ if (mm_has_pgste(mm)) {
+ if (pte_val(old) & _PAGE_INVALID)
+ pgste_set_key(ptep, pgste, new, mm);
+ if (pte_val(new) & _PAGE_INVALID) {
+ pgste = pgste_update_all(old, pgste, mm);
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(old) |= _PAGE_UNUSED;
+ }
+ pgste = pgste_set_pte(ptep, pgste, new);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = new;
+ }
}
-EXPORT_SYMBOL_GPL(gmap_disable);
-/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
- */
-static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
- unsigned long init, unsigned long gaddr)
+pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
{
- struct page *page;
- unsigned long *new;
-
- /* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, 2);
- if (!page)
- return -ENOMEM;
- new = (unsigned long *) page_to_phys(page);
- crst_table_init(new, init);
- spin_lock(&gmap->mm->page_table_lock);
- if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
- *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
- (*table & _REGION_ENTRY_TYPE_MASK);
- page->index = gaddr;
- page = NULL;
- }
- spin_unlock(&gmap->mm->page_table_lock);
- if (page)
- __free_pages(page, 2);
- return 0;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_direct(mm, addr, ptep);
+ ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ return old;
}
+EXPORT_SYMBOL(ptep_xchg_direct);
-/**
- * __gmap_segment_gaddr - find virtual address from segment pointer
- * @entry: pointer to a segment table entry in the guest address space
- *
- * Returns the virtual address in the guest address space for the segment
- */
-static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t new)
{
- struct page *page;
- unsigned long offset, mask;
-
- offset = (unsigned long) entry / sizeof(unsigned long);
- offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
- mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
- page = virt_to_page((void *)((unsigned long) entry & mask));
- return page->index + offset;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_lazy(mm, addr, ptep);
+ ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
+ return old;
}
+EXPORT_SYMBOL(ptep_xchg_lazy);
-/**
- * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
- * @gmap: pointer to the guest address space structure
- * @vmaddr: address in the host process address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- unsigned long *entry;
- int flush = 0;
-
- spin_lock(&gmap->guest_table_lock);
- entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
- if (entry) {
- flush = (*entry != _SEGMENT_ENTRY_INVALID);
- *entry = _SEGMENT_ENTRY_INVALID;
+ pgste_t pgste;
+ pte_t old;
+
+ pgste = ptep_xchg_start(mm, addr, ptep);
+ old = ptep_flush_lazy(mm, addr, ptep);
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_update_all(old, pgste, mm);
+ pgste_set(ptep, pgste);
}
- spin_unlock(&gmap->guest_table_lock);
- return flush;
+ return old;
}
+EXPORT_SYMBOL(ptep_modify_prot_start);
-/**
- * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
- * @gmap: pointer to the guest address space structure
- * @gaddr: address in the guest address space
- *
- * Returns 1 if a TLB flush is required
- */
-static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
{
- unsigned long vmaddr;
+ pgste_t pgste;
- vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+ if (mm_has_pgste(mm)) {
+ pgste = pgste_get(ptep);
+ pgste_set_key(ptep, pgste, pte, mm);
+ pgste = pgste_set_pte(ptep, pgste, pte);
+ pgste_set_unlock(ptep, pgste);
+ } else {
+ *ptep = pte;
+ }
}
+EXPORT_SYMBOL(ptep_modify_prot_commit);
-/**
- * gmap_unmap_segment - unmap segment from the guest address space
- * @gmap: pointer to the guest address space structure
- * @to: address in the guest address space
- * @len: length of the memory area to unmap
- *
- * Returns 0 if the unmap succeeded, -EINVAL if not.
- */
-int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
{
- unsigned long off;
- int flush;
-
- if ((to | len) & (PMD_SIZE - 1))
- return -EINVAL;
- if (len == 0 || to + len < to)
- return -EINVAL;
-
- flush = 0;
- down_write(&gmap->mm->mmap_sem);
- for (off = 0; off < len; off += PMD_SIZE)
- flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- up_write(&gmap->mm->mmap_sem);
- if (flush)
- gmap_flush_tlb(gmap);
- return 0;
+ int active, count;
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ if (!MACHINE_HAS_IDTE) {
+ __pmdp_csp(pmdp);
+ return old;
+ }
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+ cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+ __pmdp_idte_local(addr, pmdp);
+ else
+ __pmdp_idte(addr, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
+}
+
+static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ int active, count;
+ pmd_t old;
+
+ old = *pmdp;
+ if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
+ return old;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+ mm->context.flush_mm = 1;
+ } else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(addr, pmdp);
+ else
+ __pmdp_csp(pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
+ return old;
}
-EXPORT_SYMBOL_GPL(gmap_unmap_segment);
-
-/**
- * gmap_mmap_segment - map a segment to the guest address space
- * @gmap: pointer to the guest address space structure
- * @from: source address in the parent address space
- * @to: target address in the guest address space
- * @len: length of the memory area to map
- *
- * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
- */
-int gmap_map_segment(struct gmap *gmap, unsigned long from,
- unsigned long to, unsigned long len)
+
+pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
{
- unsigned long off;
- int flush;
-
- if ((from | to | len) & (PMD_SIZE - 1))
- return -EINVAL;
- if (len == 0 || from + len < from || to + len < to ||
- from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
- return -EINVAL;
-
- flush = 0;
- down_write(&gmap->mm->mmap_sem);
- for (off = 0; off < len; off += PMD_SIZE) {
- /* Remove old translation */
- flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- /* Store new translation */
- if (radix_tree_insert(&gmap->guest_to_host,
- (to + off) >> PMD_SHIFT,
- (void *) from + off))
- break;
- }
- up_write(&gmap->mm->mmap_sem);
- if (flush)
- gmap_flush_tlb(gmap);
- if (off >= len)
- return 0;
- gmap_unmap_segment(gmap, to, len);
- return -ENOMEM;
+ pmd_t old;
+
+ old = pmdp_flush_direct(mm, addr, pmdp);
+ *pmdp = new;
+ return old;
}
-EXPORT_SYMBOL_GPL(gmap_map_segment);
-
-/**
- * __gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_direct);
+
+pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t new)
{
- unsigned long vmaddr;
+ pmd_t old;
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
- return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+ old = pmdp_flush_lazy(mm, addr, pmdp);
+ *pmdp = new;
+ return old;
}
-EXPORT_SYMBOL_GPL(__gmap_translate);
-
-/**
- * gmap_translate - translate a guest address to a user space address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- *
- * Returns user space address which corresponds to the guest address or
- * -EFAULT if no such mapping exists.
- * This function does not establish potentially missing page table entries.
- */
-unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+EXPORT_SYMBOL(pmdp_xchg_lazy);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
{
- unsigned long rc;
+ struct list_head *lh = (struct list_head *) pgtable;
- down_read(&gmap->mm->mmap_sem);
- rc = __gmap_translate(gmap, gaddr);
- up_read(&gmap->mm->mmap_sem);
- return rc;
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
-EXPORT_SYMBOL_GPL(gmap_translate);
-/**
- * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
- * @table: pointer to the host page table
- * @vmaddr: vm address associated with the host page table
- */
-static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
- unsigned long vmaddr)
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
- struct gmap *gmap;
- int flush;
+ struct list_head *lh;
+ pgtable_t pgtable;
+ pte_t *ptep;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
- list_for_each_entry(gmap, &mm->context.gmap_list, list) {
- flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
- if (flush)
- gmap_flush_tlb(gmap);
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
}
+ ptep = (pte_t *) pgtable;
+ pte_val(*ptep) = _PAGE_INVALID;
+ ptep++;
+ pte_val(*ptep) = _PAGE_INVALID;
+ return pgtable;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-/**
- * gmap_link - set up shadow page tables to connect a host to a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @vmaddr: vm address
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
- * when this function gets called.
- */
-int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+#ifdef CONFIG_PGSTE
+void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t entry)
{
- struct mm_struct *mm;
- unsigned long *table;
- spinlock_t *ptl;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- int rc;
-
- /* Create higher level tables in the gmap page table */
- table = gmap->table;
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
- table += (gaddr >> 53) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
- gaddr & 0xffe0000000000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
- table += (gaddr >> 42) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
- gaddr & 0xfffffc0000000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
- table += (gaddr >> 31) & 0x7ff;
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
- gaddr & 0xffffffff80000000UL))
- return -ENOMEM;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- }
- table += (gaddr >> 20) & 0x7ff;
- /* Walk the parent mm page table */
- mm = gmap->mm;
- pgd = pgd_offset(mm, vmaddr);
- VM_BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, vmaddr);
- VM_BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, vmaddr);
- VM_BUG_ON(pmd_none(*pmd));
- /* large pmds cannot yet be handled */
- if (pmd_large(*pmd))
- return -EFAULT;
- /* Link gmap segment table entry location to page table. */
- rc = radix_tree_preload(GFP_KERNEL);
- if (rc)
- return rc;
- ptl = pmd_lock(mm, pmd);
- spin_lock(&gmap->guest_table_lock);
- if (*table == _SEGMENT_ENTRY_INVALID) {
- rc = radix_tree_insert(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT, table);
- if (!rc)
- *table = pmd_val(*pmd);
- } else
- rc = 0;
- spin_unlock(&gmap->guest_table_lock);
- spin_unlock(ptl);
- radix_tree_preload_end();
- return rc;
+ pgste_t pgste;
+
+ /* the mm_has_pgste() check is done in set_pte_at() */
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+ pgste_set_key(ptep, pgste, entry, mm);
+ pgste = pgste_set_pte(ptep, pgste, entry);
+ pgste_set_unlock(ptep, pgste);
}
-/**
- * gmap_fault - resolve a fault on a guest address
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: guest address
- * @fault_flags: flags to pass down to handle_mm_fault()
- *
- * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
- * if the vm address is already mapped to a different guest segment.
- */
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
+void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- unsigned long vmaddr;
- int rc;
- bool unlocked;
-
- down_read(&gmap->mm->mmap_sem);
-
-retry:
- unlocked = false;
- vmaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(vmaddr)) {
- rc = vmaddr;
- goto out_up;
- }
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
- &unlocked)) {
- rc = -EFAULT;
- goto out_up;
- }
- /*
- * In the case that fixup_user_fault unlocked the mmap_sem during
- * faultin redo __gmap_translate to not race with a map/unmap_segment.
- */
- if (unlocked)
- goto retry;
+ pgste_t pgste;
- rc = __gmap_link(gmap, gaddr, vmaddr);
-out_up:
- up_read(&gmap->mm->mmap_sem);
- return rc;
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) |= PGSTE_IN_BIT;
+ pgste_set_unlock(ptep, pgste);
}
-EXPORT_SYMBOL_GPL(gmap_fault);
-static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
{
if (!non_swap_entry(entry))
dec_mm_counter(mm, MM_SWAPENTS);
@@ -620,225 +426,99 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
free_swap_and_cache(entry);
}
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, int reset)
{
- unsigned long vmaddr, ptev, pgstev;
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ unsigned long pgstev;
pgste_t pgste;
+ pte_t pte;
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- return;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (unlikely(!ptep))
- return;
- pte = *ptep;
- if (!pte_swap(pte))
- goto out_pte;
/* Zap unused and logically-zero pages */
pgste = pgste_get_lock(ptep);
pgstev = pgste_val(pgste);
- ptev = pte_val(pte);
- if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
- ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
- gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
- pte_clear(gmap->mm, vmaddr, ptep);
- }
+ pte = *ptep;
+ if (!reset && pte_swap(pte) &&
+ ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
+ (pgstev & _PGSTE_GPS_ZERO))) {
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
+ pte_clear(mm, addr, ptep);
+ }
+ if (reset)
+ pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
pgste_set_unlock(ptep, pgste);
-out_pte:
- pte_unmap_unlock(ptep, ptl);
}
-EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- down_read(&gmap->mm->mmap_sem);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range(vma, vmaddr, size, NULL);
- }
- up_read(&gmap->mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
-static LIST_HEAD(gmap_notifier_list);
-static DEFINE_SPINLOCK(gmap_notifier_lock);
+ unsigned long ptev;
+ pgste_t pgste;
-/**
- * gmap_register_ipte_notifier - register a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_register_ipte_notifier(struct gmap_notifier *nb)
-{
- spin_lock(&gmap_notifier_lock);
- list_add(&nb->list, &gmap_notifier_list);
- spin_unlock(&gmap_notifier_lock);
+ /* Clear storage key */
+ pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+ PGSTE_GR_BIT | PGSTE_GC_BIT);
+ ptev = pte_val(*ptep);
+ if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+ page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+ pgste_set_unlock(ptep, pgste);
}
-EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
-/**
- * gmap_unregister_ipte_notifier - remove a pte invalidation callback
- * @nb: pointer to the gmap notifier block
- */
-void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
-{
- spin_lock(&gmap_notifier_lock);
- list_del_init(&nb->list);
- spin_unlock(&gmap_notifier_lock);
-}
-EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
-
-/**
- * gmap_ipte_notify - mark a range of ptes for invalidation notification
- * @gmap: pointer to guest mapping meta data structure
- * @gaddr: virtual address in the guest address space
- * @len: size of area
- *
- * Returns 0 if for each page in the given range a gmap mapping exists and
- * the invalidation notification could be set. If the gmap mapping is missing
- * for one or more pages -EFAULT is returned. If no memory could be allocated
- * -ENOMEM is returned. This function establishes missing page table entries.
+/*
+ * Test and reset if a guest page is dirty
*/
-int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
- unsigned long addr;
spinlock_t *ptl;
- pte_t *ptep, entry;
pgste_t pgste;
- bool unlocked;
- int rc = 0;
-
- if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
- return -EINVAL;
- down_read(&gmap->mm->mmap_sem);
- while (len) {
- unlocked = false;
- /* Convert gmap address and connect the page tables */
- addr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(addr)) {
- rc = addr;
- break;
- }
- /* Get the page mapped */
- if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE,
- &unlocked)) {
- rc = -EFAULT;
- break;
- }
- /* While trying to map mmap_sem got unlocked. Let us retry */
- if (unlocked)
- continue;
- rc = __gmap_link(gmap, gaddr, addr);
- if (rc)
- break;
- /* Walk the process page table, lock and get pte pointer */
- ptep = get_locked_pte(gmap->mm, addr, &ptl);
- VM_BUG_ON(!ptep);
- /* Set notification bit in the pgste of the pte */
- entry = *ptep;
- if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
- pgste = pgste_get_lock(ptep);
- pgste_val(pgste) |= PGSTE_IN_BIT;
- pgste_set_unlock(ptep, pgste);
- gaddr += PAGE_SIZE;
- len -= PAGE_SIZE;
- }
- pte_unmap_unlock(ptep, ptl);
- }
- up_read(&gmap->mm->mmap_sem);
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_ipte_notify);
-
-/**
- * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
- * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
- * @pte: pointer to the page table entry
- *
- * This function is assumed to be called with the page table lock held
- * for the pte to notify.
- */
-void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
-{
- unsigned long offset, gaddr;
- unsigned long *table;
- struct gmap_notifier *nb;
- struct gmap *gmap;
-
- offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- offset = offset * (4096 / sizeof(pte_t));
- spin_lock(&gmap_notifier_lock);
- list_for_each_entry(gmap, &mm->context.gmap_list, list) {
- table = radix_tree_lookup(&gmap->host_to_guest,
- vmaddr >> PMD_SHIFT);
- if (!table)
- continue;
- gaddr = __gmap_segment_gaddr(table) + offset;
- list_for_each_entry(nb, &gmap_notifier_list, list)
- nb->notifier_call(gmap, gaddr);
+ pte_t *ptep;
+ pte_t pte;
+ bool dirty;
+
+ ptep = get_locked_pte(mm, addr, &ptl);
+ if (unlikely(!ptep))
+ return false;
+
+ pgste = pgste_get_lock(ptep);
+ dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+ pgste_val(pgste) &= ~PGSTE_UC_BIT;
+ pte = *ptep;
+ if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+ __ptep_ipte(addr, ptep);
+ if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+ pte_val(pte) |= _PAGE_PROTECT;
+ else
+ pte_val(pte) |= _PAGE_INVALID;
+ *ptep = pte;
}
- spin_unlock(&gmap_notifier_lock);
+ pgste_set_unlock(ptep, pgste);
+
+ spin_unlock(ptl);
+ return dirty;
}
-EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
- unsigned long key, bool nq)
+ unsigned char key, bool nq)
{
+ unsigned long keyul;
spinlock_t *ptl;
pgste_t old, new;
pte_t *ptep;
- bool unlocked;
down_read(&mm->mmap_sem);
-retry:
- unlocked = false;
ptep = get_locked_pte(mm, addr, &ptl);
if (unlikely(!ptep)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
- if (!(pte_val(*ptep) & _PAGE_INVALID) &&
- (pte_val(*ptep) & _PAGE_PROTECT)) {
- pte_unmap_unlock(ptep, ptl);
- /*
- * We do not really care about unlocked. We will retry either
- * way. But this allows fixup_user_fault to enable userfaultfd.
- */
- if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE,
- &unlocked)) {
- up_read(&mm->mmap_sem);
- return -EFAULT;
- }
- goto retry;
- }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
- pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
- pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+ keyul = (unsigned long) key;
+ pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+ pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long address, bits, skey;
@@ -863,13 +543,12 @@ retry:
}
EXPORT_SYMBOL(set_guest_storage_key);
-unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
{
+ unsigned char key;
spinlock_t *ptl;
pgste_t pgste;
pte_t *ptep;
- uint64_t physaddr;
- unsigned long key = 0;
down_read(&mm->mmap_sem);
ptep = get_locked_pte(mm, addr, &ptl);
@@ -880,13 +559,12 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
pgste = pgste_get_lock(ptep);
if (pte_val(*ptep) & _PAGE_INVALID) {
- key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+ key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
} else {
- physaddr = pte_val(*ptep) & PAGE_MASK;
- key = page_get_storage_key(physaddr);
+ key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
/* Reflect guest's logical view, not physical */
if (pgste_val(pgste) & PGSTE_GR_BIT)
@@ -901,471 +579,4 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
return key;
}
EXPORT_SYMBOL(get_guest_storage_key);
-
-static int page_table_allocate_pgste_min = 0;
-static int page_table_allocate_pgste_max = 1;
-int page_table_allocate_pgste = 0;
-EXPORT_SYMBOL(page_table_allocate_pgste);
-
-static struct ctl_table page_table_sysctl[] = {
- {
- .procname = "allocate_pgste",
- .data = &page_table_allocate_pgste,
- .maxlen = sizeof(int),
- .mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
- .extra1 = &page_table_allocate_pgste_min,
- .extra2 = &page_table_allocate_pgste_max,
- },
- { }
-};
-
-static struct ctl_table page_table_sysctl_dir[] = {
- {
- .procname = "vm",
- .maxlen = 0,
- .mode = 0555,
- .child = page_table_sysctl,
- },
- { }
-};
-
-static int __init page_table_register_sysctl(void)
-{
- return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
-}
-__initcall(page_table_register_sysctl);
-
-#else /* CONFIG_PGSTE */
-
-static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
- unsigned long vmaddr)
-{
-}
-
-#endif /* CONFIG_PGSTE */
-
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
- unsigned int old, new;
-
- do {
- old = atomic_read(v);
- new = old ^ bits;
- } while (atomic_cmpxchg(v, old, new) != old);
- return new;
-}
-
-/*
- * page table entry allocation/free routines.
- */
-unsigned long *page_table_alloc(struct mm_struct *mm)
-{
- unsigned long *table;
- struct page *page;
- unsigned int mask, bit;
-
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.list_lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- mask = atomic_read(&page->_mapcount);
- mask = (mask | (mask >> 4)) & 3;
- if (mask != 3) {
- table = (unsigned long *) page_to_phys(page);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_mapcount, 1U << bit);
- list_del(&page->lru);
- }
- }
- spin_unlock_bh(&mm->context.list_lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- /* Initialize page table */
- table = (unsigned long *) page_to_phys(page);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- atomic_set(&page->_mapcount, 3);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_set(&page->_mapcount, 1);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE);
- spin_lock_bh(&mm->context.list_lock);
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- }
- return table;
-}
-
-void page_table_free(struct mm_struct *mm, unsigned long *table)
-{
- struct page *page;
- unsigned int bit, mask;
-
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.list_lock);
- mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
- if (mask & 3)
- list_add(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask != 0)
- return;
- }
-
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
-}
-
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
-{
- struct mm_struct *mm;
- struct page *page;
- unsigned int bit, mask;
-
- mm = tlb->mm;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | 3);
- tlb_remove_table(tlb, table);
- return;
- }
- bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.list_lock);
- mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
- if (mask & 3)
- list_add_tail(&page->lru, &mm->context.pgtable_list);
- else
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (1U << bit));
- tlb_remove_table(tlb, table);
-}
-
-static void __tlb_remove_table(void *_table)
-{
- unsigned int mask = (unsigned long) _table & 3;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-
- switch (mask) {
- case 0: /* pmd or pud */
- free_pages((unsigned long) table, 2);
- break;
- case 1: /* lower 2K of a 4K page table */
- case 2: /* higher 2K of a 4K page table */
- if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
- break;
- /* fallthrough */
- case 3: /* 4K page table with pgstes */
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- break;
- }
-}
-
-static void tlb_remove_table_smp_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
- struct mmu_table_batch *batch;
- int i;
-
- batch = container_of(head, struct mmu_table_batch, rcu);
-
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
-
- free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- if (*batch) {
- call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void thp_split_vma(struct vm_area_struct *vma)
-{
- unsigned long addr;
-
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
-}
-
-static inline void thp_split_mm(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
-
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- thp_split_vma(vma);
- vma->vm_flags &= ~VM_HUGEPAGE;
- vma->vm_flags |= VM_NOHUGEPAGE;
- }
- mm->def_flags |= VM_NOHUGEPAGE;
-}
-#else
-static inline void thp_split_mm(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-/*
- * switch on pgstes for its userspace process (for kvm)
- */
-int s390_enable_sie(void)
-{
- struct mm_struct *mm = current->mm;
-
- /* Do we have pgstes? if yes, we are done */
- if (mm_has_pgste(mm))
- return 0;
- /* Fail if the page tables are 2K */
- if (!mm_alloc_pgste(mm))
- return -EINVAL;
- down_write(&mm->mmap_sem);
- mm->context.has_pgste = 1;
- /* split thp mappings and disable thp for future mappings */
- thp_split_mm(mm);
- up_write(&mm->mmap_sem);
- return 0;
-}
-EXPORT_SYMBOL_GPL(s390_enable_sie);
-
-/*
- * Enable storage key handling from now on and initialize the storage
- * keys with the default key.
- */
-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
-{
- unsigned long ptev;
- pgste_t pgste;
-
- pgste = pgste_get_lock(pte);
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte))) {
- ptep_flush_direct(walk->mm, addr, pte);
- pte_val(*pte) = _PAGE_INVALID;
- }
- /* Clear storage key */
- pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
- PGSTE_GR_BIT | PGSTE_GC_BIT);
- ptev = pte_val(*pte);
- if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
- page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
- pgste_set_unlock(pte, pgste);
- return 0;
-}
-
-int s390_enable_skey(void)
-{
- struct mm_walk walk = { .pte_entry = __s390_enable_skey };
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int rc = 0;
-
- down_write(&mm->mmap_sem);
- if (mm_use_skey(mm))
- goto out_up;
-
- mm->context.use_skey = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.use_skey = 0;
- rc = -ENOMEM;
- goto out_up;
- }
- }
- mm->def_flags &= ~VM_MERGEABLE;
-
- walk.mm = mm;
- walk_page_range(0, TASK_SIZE, &walk);
-
-out_up:
- up_write(&mm->mmap_sem);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_enable_skey);
-
-/*
- * Reset CMMA state, make all pages stable again.
- */
-static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
-{
- pgste_t pgste;
-
- pgste = pgste_get_lock(pte);
- pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
- pgste_set_unlock(pte, pgste);
- return 0;
-}
-
-void s390_reset_cmma(struct mm_struct *mm)
-{
- struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
-
- down_write(&mm->mmap_sem);
- walk.mm = mm;
- walk_page_range(0, TASK_SIZE, &walk);
- up_write(&mm->mmap_sem);
-}
-EXPORT_SYMBOL_GPL(s390_reset_cmma);
-
-/*
- * Test and reset if a guest page is dirty
- */
-bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
-{
- pte_t *pte;
- spinlock_t *ptl;
- bool dirty = false;
-
- pte = get_locked_pte(gmap->mm, address, &ptl);
- if (unlikely(!pte))
- return false;
-
- if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
- dirty = true;
-
- spin_unlock(ptl);
- return dirty;
-}
-EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- /* No need to flush TLB
- * On s390 reference bits are in storage key and never in TLB */
- return pmdp_test_and_clear_young(vma, address, pmdp);
-}
-
-int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty)
-{
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
- entry = pmd_mkyoung(entry);
- if (dirty)
- entry = pmd_mkdirty(entry);
- if (pmd_same(*pmdp, entry))
- return 0;
- pmdp_invalidate(vma, address, pmdp);
- set_pmd_at(vma->vm_mm, address, pmdp, entry);
- return 1;
-}
-
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
-{
- struct list_head *lh = (struct list_head *) pgtable;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- if (!pmd_huge_pte(mm, pmdp))
- INIT_LIST_HEAD(lh);
- else
- list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
- pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
- struct list_head *lh;
- pgtable_t pgtable;
- pte_t *ptep;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- pgtable = pmd_huge_pte(mm, pmdp);
- lh = (struct list_head *) pgtable;
- if (list_empty(lh))
- pmd_huge_pte(mm, pmdp) = NULL;
- else {
- pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
- list_del(lh);
- }
- ptep = (pte_t *) pgtable;
- pte_val(*ptep) = _PAGE_INVALID;
- ptep++;
- pte_val(*ptep) = _PAGE_INVALID;
- return pgtable;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ef7d6c8fea66..d48cf25cfe99 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -56,7 +56,7 @@ static inline pmd_t *vmem_pmd_alloc(void)
return pmd;
}
-static pte_t __ref *vmem_pte_alloc(unsigned long address)
+static pte_t __ref *vmem_pte_alloc(void)
{
pte_t *pte;
@@ -94,16 +94,15 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pgd_populate(&init_mm, pg_dir, pu_dir);
}
pu_dir = pud_offset(pg_dir, address);
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
- !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
+ !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
pud_val(*pu_dir) = __pa(address) |
_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
(ro ? _REGION_ENTRY_PROTECT : 0);
address += PUD_SIZE;
continue;
}
-#endif
if (pud_none(*pu_dir)) {
pm_dir = vmem_pmd_alloc();
if (!pm_dir)
@@ -111,9 +110,9 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pud_populate(&init_mm, pu_dir, pm_dir);
}
pm_dir = pmd_offset(pu_dir, address);
-#ifndef CONFIG_DEBUG_PAGEALLOC
if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
- !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
+ !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
+ !debug_pagealloc_enabled()) {
pmd_val(*pm_dir) = __pa(address) |
_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
_SEGMENT_ENTRY_YOUNG |
@@ -121,9 +120,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
address += PMD_SIZE;
continue;
}
-#endif
if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc(address);
+ pt_dir = vmem_pte_alloc();
if (!pt_dir)
goto out;
pmd_populate(&init_mm, pm_dir, pt_dir);
@@ -235,7 +233,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
- pt_dir = vmem_pte_alloc(address);
+ pt_dir = vmem_pte_alloc();
if (!pt_dir)
goto out;
pmd_populate(&init_mm, pm_dir, pt_dir);
@@ -372,7 +370,7 @@ void __init vmem_map_init(void)
ro_end = (unsigned long)&_eshared & PAGE_MASK;
for_each_memblock(memory, reg) {
start = reg->base;
- end = reg->base + reg->size - 1;
+ end = reg->base + reg->size;
if (start >= ro_end || end <= ro_start)
vmem_add_mem(start, end - start, 0);
else if (start >= ro_start && end <= ro_end)
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index f010c93a88b1..fda605dbc1b4 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -37,7 +37,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* | | |
* +---------------+ |
* | 8 byte skbp | |
- * R15+170 -> +---------------+ |
+ * R15+176 -> +---------------+ |
* | 8 byte hlen | |
* R15+168 -> +---------------+ |
* | 4 byte align | |
@@ -58,7 +58,7 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
#define STK_OFF (STK_SPACE - STK_160_UNUSED)
#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
-#define STK_OFF_SKBP 170 /* Offset of SKB pointer on stack */
+#define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */
#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 3c0bfc1f2694..bee281f3163d 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -45,7 +45,7 @@ struct bpf_jit {
int labels[1]; /* Labels for local jumps */
};
-#define BPF_SIZE_MAX 0x7ffff /* Max size for program (20 bit signed displ) */
+#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
#define SEEN_SKB 1 /* skb access */
#define SEEN_MEM 2 /* use mem[] for temporary storage */
@@ -54,16 +54,17 @@ struct bpf_jit {
#define SEEN_FUNC 16 /* calls C functions */
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
#define SEEN_SKB_CHANGE 64 /* code changes skb data */
+#define SEEN_REG_AX 128 /* code uses constant blinding */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
/*
* s390 registers
*/
-#define REG_W0 (__MAX_BPF_REG+0) /* Work register 1 (even) */
-#define REG_W1 (__MAX_BPF_REG+1) /* Work register 2 (odd) */
-#define REG_SKB_DATA (__MAX_BPF_REG+2) /* SKB data register */
-#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */
-#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */
+#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */
+#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */
+#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */
+#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */
+#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */
#define REG_0 REG_W0 /* Register 0 */
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
@@ -88,6 +89,8 @@ static const int reg2hex[] = {
[BPF_REG_9] = 10,
/* BPF stack pointer */
[BPF_REG_FP] = 13,
+ /* Register for blinding (shared with REG_SKB_DATA) */
+ [BPF_REG_AX] = 12,
/* SKB data pointer */
[REG_SKB_DATA] = 12,
/* Work registers for s390x backend */
@@ -385,7 +388,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op)
/*
* For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
* we store the SKB header length on the stack and the SKB data
- * pointer in REG_SKB_DATA.
+ * pointer in REG_SKB_DATA if BPF_REG_AX is not used.
*/
static void emit_load_skb_data_hlen(struct bpf_jit *jit)
{
@@ -397,9 +400,10 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit)
offsetof(struct sk_buff, data_len));
/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
- /* lg %skb_data,data_off(%b1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
- BPF_REG_1, offsetof(struct sk_buff, data));
+ if (!(jit->seen & SEEN_REG_AX))
+ /* lg %skb_data,data_off(%b1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
+ BPF_REG_1, offsetof(struct sk_buff, data));
}
/*
@@ -446,7 +450,7 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
emit_load_skb_data_hlen(jit);
if (jit->seen & SEEN_SKB_CHANGE)
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
+ EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
STK_OFF_SKBP);
}
@@ -487,6 +491,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
s32 imm = insn->imm;
s16 off = insn->off;
+ if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
+ jit->seen |= SEEN_REG_AX;
switch (insn->code) {
/*
* BPF_MOV
@@ -1188,7 +1194,7 @@ call_fn:
/*
* Implicit input:
* BPF_REG_6 (R7) : skb pointer
- * REG_SKB_DATA (R12): skb data pointer
+ * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
*
* Calculated input:
* BPF_REG_2 (R3) : offset of byte(s) to fetch in skb
@@ -1209,6 +1215,11 @@ call_fn:
/* agfr %b2,%src (%src is s32 here) */
EMIT4(0xb9180000, BPF_REG_2, src_reg);
+ /* Reload REG_SKB_DATA if BPF_REG_AX is used */
+ if (jit->seen & SEEN_REG_AX)
+ /* lg %skb_data,data_off(%b6) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
+ BPF_REG_6, offsetof(struct sk_buff, data));
/* basr %b5,%w1 (%b5 is call saved) */
EMIT2(0x0d00, BPF_REG_5, REG_W1);
@@ -1262,37 +1273,62 @@ void bpf_jit_compile(struct bpf_prog *fp)
/*
* Compile eBPF program "fp"
*/
-void bpf_int_jit_compile(struct bpf_prog *fp)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
+ struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
+ bool tmp_blinded = false;
struct bpf_jit jit;
int pass;
if (!bpf_jit_enable)
- return;
+ return orig_fp;
+
+ tmp = bpf_jit_blind_constants(fp);
+ /*
+ * If blinding was requested and we failed during blinding,
+ * we must fall back to the interpreter.
+ */
+ if (IS_ERR(tmp))
+ return orig_fp;
+ if (tmp != fp) {
+ tmp_blinded = true;
+ fp = tmp;
+ }
+
memset(&jit, 0, sizeof(jit));
jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
- if (jit.addrs == NULL)
- return;
+ if (jit.addrs == NULL) {
+ fp = orig_fp;
+ goto out;
+ }
/*
* Three initial passes:
* - 1/2: Determine clobbered registers
* - 3: Calculate program size and addrs arrray
*/
for (pass = 1; pass <= 3; pass++) {
- if (bpf_jit_prog(&jit, fp))
+ if (bpf_jit_prog(&jit, fp)) {
+ fp = orig_fp;
goto free_addrs;
+ }
}
/*
* Final pass: Allocate and generate program
*/
- if (jit.size >= BPF_SIZE_MAX)
+ if (jit.size >= BPF_SIZE_MAX) {
+ fp = orig_fp;
goto free_addrs;
+ }
header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
- if (!header)
+ if (!header) {
+ fp = orig_fp;
goto free_addrs;
- if (bpf_jit_prog(&jit, fp))
+ }
+ if (bpf_jit_prog(&jit, fp)) {
+ fp = orig_fp;
goto free_addrs;
+ }
if (bpf_jit_enable > 1) {
bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
if (jit.prg_buf)
@@ -1305,6 +1341,11 @@ void bpf_int_jit_compile(struct bpf_prog *fp)
}
free_addrs:
kfree(jit.addrs);
+out:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(fp, fp == orig_fp ?
+ tmp : orig_fp);
+ return fp;
}
/*
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 1bd23017191e..496e4a7ee00e 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprofilefs.o oprofile_stats.o \
timer_int.o )
-oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y := $(DRIVER_OBJS) init.o
oprofile-y += hwsampler.o
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
deleted file mode 100644
index 1884e1759529..000000000000
--- a/arch/s390/oprofile/backtrace.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * S390 Version
- * Copyright IBM Corp. 2005
- * Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
- */
-
-#include <linux/oprofile.h>
-
-#include <asm/processor.h> /* for struct stack_frame */
-
-static unsigned long
-__show_trace(unsigned int *depth, unsigned long sp,
- unsigned long low, unsigned long high)
-{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (*depth) {
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8]);
-
- /* Follow the backchain. */
- while (*depth) {
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8]);
-
- }
-
- if (*depth == 0)
- break;
-
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- (*depth)--;
- oprofile_add_trace(sf->gprs[8]);
- low = sp;
- sp = regs->gprs[15];
- }
- return sp;
-}
-
-void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
- unsigned long head, frame_size;
- struct stack_frame* head_sf;
-
- if (user_mode(regs))
- return;
-
- frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- head = regs->gprs[15];
- head_sf = (struct stack_frame*)head;
-
- if (!head_sf->back_chain)
- return;
-
- head = head_sf->back_chain;
-
- head = __show_trace(&depth, head,
- S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
- S390_lowcore.async_stack + frame_size);
-
- __show_trace(&depth, head, S390_lowcore.thread_info,
- S390_lowcore.thread_info + THREAD_SIZE);
-}
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9cfa2ffaa9d6..791935a65800 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -20,8 +20,6 @@
#include "../../../drivers/oprofile/oprof.h"
-extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
-
#include "hwsampler.h"
#include "op_counter.h"
@@ -456,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
+ case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
default: return -ENODEV;
}
}
@@ -494,6 +493,24 @@ static void oprofile_hwsampler_exit(void)
hwsampler_shutdown();
}
+static int __s390_backtrace(void *data, unsigned long address)
+{
+ unsigned int *depth = data;
+
+ if (*depth == 0)
+ return 1;
+ (*depth)--;
+ oprofile_add_trace(address);
+ return 0;
+}
+
+static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
+{
+ if (user_mode(regs))
+ return;
+ dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+}
+
int __init oprofile_arch_init(struct oprofile_operations *ops)
{
ops->backtrace = s390_backtrace;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 8f19c8f9d660..871af75c69c2 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -637,12 +637,11 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
int pcibios_add_device(struct pci_dev *pdev)
{
- struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
- zdev->pdev = pdev;
pdev->dev.groups = zpci_attr_groups;
+ pdev->dev.archdata.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_BAR_COUNT; i++) {
@@ -664,8 +663,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
struct zpci_dev *zdev = to_zpci(pdev);
- zdev->pdev = pdev;
- zpci_debug_init_device(zdev);
+ zpci_debug_init_device(zdev, dev_name(&pdev->dev));
zpci_fmb_enable_device(zdev);
return pci_enable_resources(pdev, mask);
@@ -677,7 +675,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
zpci_fmb_disable_device(zdev);
zpci_debug_exit_device(zdev);
- zdev->pdev = NULL;
}
#ifdef CONFIG_HIBERNATE_CALLBACKS
@@ -864,8 +861,11 @@ static inline int barsize(u8 size)
static int zpci_mem_init(void)
{
+ BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
+ __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb));
+
zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
- 16, 0, NULL);
+ __alignof__(struct zpci_fmb), 0, NULL);
if (!zdev_fmb_cache)
goto error_fmb;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d6e411ed8b1f..1a4512c8544a 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -8,13 +8,19 @@
#define KMSG_COMPONENT "zpci"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/compat.h>
#include <linux/kernel.h>
+#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/pci.h>
+#include <linux/uaccess.h>
#include <asm/pci_debug.h>
#include <asm/pci_clp.h>
+#include <asm/compat.h>
+#include <asm/clp.h>
+#include <uapi/asm/clp.h>
static inline void zpci_err_clp(unsigned int rsp, int rc)
{
@@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc)
}
/*
- * Call Logical Processor
- * Retry logic is handled by the caller.
+ * Call Logical Processor with c=1, lps=0 and command 1
+ * to get the bit mask of installed logical processors
*/
-static inline u8 clp_instr(void *data)
+static inline int clp_get_ilp(unsigned long *ilp)
+{
+ unsigned long mask;
+ int cc = 3;
+
+ asm volatile (
+ " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1)
+ : "cc");
+ *ilp = mask;
+ return cc;
+}
+
+/*
+ * Call Logical Processor with c=0, the give constant lps and an lpcb request.
+ */
+static inline int clp_req(void *data, unsigned int lps)
{
struct { u8 _[CLP_BLK_SIZE]; } *req = data;
u64 ignored;
- u8 cc;
+ int cc = 3;
asm volatile (
- " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
- " ipm %[cc]\n"
+ " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n"
+ "0: ipm %[cc]\n"
" srl %[cc],28\n"
- : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
- : [req] "a" (req)
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req)
+ : [req] "a" (req), [lps] "i" (lps)
: "cc");
return cc;
}
@@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
rrb->response.hdr.len = sizeof(rrb->response);
rrb->request.pfgid = pfgid;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
clp_store_query_pci_fngrp(zdev, &rrb->response);
else {
@@ -143,13 +171,12 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
rrb->response.hdr.len = sizeof(rrb->response);
rrb->request.fh = fh;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
rc = clp_store_query_pci_fn(zdev, &rrb->response);
if (rc)
goto out;
- if (rrb->response.pfgid)
- rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+ rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
} else {
zpci_err("Q PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
@@ -214,7 +241,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
retries--;
if (retries < 0)
@@ -280,7 +307,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
rrb->request.resume_token = resume_token;
/* Get PCI function handle list */
- rc = clp_instr(rrb);
+ rc = clp_req(rrb, CLP_LPS_PCI);
if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("List PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
@@ -391,3 +418,198 @@ int clp_rescan_pci_devices_simple(void)
clp_free_block(rrb);
return rc;
}
+
+static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_base_slpc(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query(struct clp_req *req,
+ struct clp_req_rsp_query_pci *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_query_grp(struct clp_req *req,
+ struct clp_req_rsp_query_pci_grp *lpcb)
+{
+ unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
+
+ if (lpcb->request.hdr.len != sizeof(lpcb->request) ||
+ lpcb->response.hdr.len > limit)
+ return -EINVAL;
+ if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 ||
+ lpcb->request.reserved4 != 0)
+ return -EINVAL;
+ return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0;
+}
+
+static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb)
+{
+ switch (lpcb->cmd) {
+ case 0x0001: /* store logical-processor characteristics */
+ return clp_pci_slpc(req, (void *) lpcb);
+ case 0x0002: /* list PCI functions */
+ return clp_pci_list(req, (void *) lpcb);
+ case 0x0003: /* query PCI function */
+ return clp_pci_query(req, (void *) lpcb);
+ case 0x0004: /* query PCI function group */
+ return clp_pci_query_grp(req, (void *) lpcb);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int clp_normal_command(struct clp_req *req)
+{
+ struct clp_req_hdr *lpcb;
+ void __user *uptr;
+ int rc;
+
+ rc = -EINVAL;
+ if (req->lps != 0 && req->lps != 2)
+ goto out;
+
+ rc = -ENOMEM;
+ lpcb = clp_alloc_block(GFP_KERNEL);
+ if (!lpcb)
+ goto out;
+
+ rc = -EFAULT;
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = -EINVAL;
+ if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0)
+ goto out_free;
+
+ switch (req->lps) {
+ case 0:
+ rc = clp_base_command(req, lpcb);
+ break;
+ case 2:
+ rc = clp_pci_command(req, lpcb);
+ break;
+ }
+ if (rc)
+ goto out_free;
+
+ rc = -EFAULT;
+ if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0)
+ goto out_free;
+
+ rc = 0;
+
+out_free:
+ clp_free_block(lpcb);
+out:
+ return rc;
+}
+
+static int clp_immediate_command(struct clp_req *req)
+{
+ void __user *uptr;
+ unsigned long ilp;
+ int exists;
+
+ if (req->cmd > 1 || clp_get_ilp(&ilp) != 0)
+ return -EINVAL;
+
+ uptr = (void __force __user *)(unsigned long) req->data_p;
+ if (req->cmd == 0) {
+ /* Command code 0: test for a specific processor */
+ exists = test_bit_inv(req->lps, &ilp);
+ return put_user(exists, (int __user *) uptr);
+ }
+ /* Command code 1: return bit mask of installed processors */
+ return put_user(ilp, (unsigned long __user *) uptr);
+}
+
+static long clp_misc_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct clp_req req;
+ void __user *argp;
+
+ if (cmd != CLP_SYNC)
+ return -EINVAL;
+
+ argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg;
+ if (copy_from_user(&req, argp, sizeof(req)))
+ return -EFAULT;
+ if (req.r != 0)
+ return -EINVAL;
+ return req.c ? clp_immediate_command(&req) : clp_normal_command(&req);
+}
+
+static int clp_misc_release(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+static const struct file_operations clp_misc_fops = {
+ .owner = THIS_MODULE,
+ .open = nonseekable_open,
+ .release = clp_misc_release,
+ .unlocked_ioctl = clp_misc_ioctl,
+ .compat_ioctl = clp_misc_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice clp_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "clp",
+ .fops = &clp_misc_fops,
+};
+
+static int __init clp_misc_init(void)
+{
+ return misc_register(&clp_misc_device);
+}
+
+device_initcall(clp_misc_init);
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 4129b0a5fd78..38993b156924 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -1,5 +1,5 @@
/*
- * Copyright IBM Corp. 2012
+ * Copyright IBM Corp. 2012,2015
*
* Author(s):
* Jan Glauber <jang@linux.vnet.ibm.com>
@@ -23,22 +23,45 @@ EXPORT_SYMBOL_GPL(pci_debug_msg_id);
debug_info_t *pci_debug_err_id;
EXPORT_SYMBOL_GPL(pci_debug_err_id);
-static char *pci_perf_names[] = {
- /* hardware counters */
+static char *pci_common_names[] = {
"Load operations",
"Store operations",
"Store block operations",
"Refresh operations",
+};
+
+static char *pci_fmt0_names[] = {
"DMA read bytes",
"DMA write bytes",
};
+static char *pci_fmt1_names[] = {
+ "Received bytes",
+ "Received packets",
+ "Transmitted bytes",
+ "Transmitted packets",
+};
+
+static char *pci_fmt2_names[] = {
+ "Consumed work units",
+ "Maximum work units",
+};
+
static char *pci_sw_names[] = {
"Allocated pages",
"Mapped pages",
"Unmapped pages",
};
+static void pci_fmb_show(struct seq_file *m, char *name[], int length,
+ u64 *data)
+{
+ int i;
+
+ for (i = 0; i < length; i++, data++)
+ seq_printf(m, "%26s:\t%llu\n", name[i], *data);
+}
+
static void pci_sw_counter_show(struct seq_file *m)
{
struct zpci_dev *zdev = m->private;
@@ -53,8 +76,6 @@ static void pci_sw_counter_show(struct seq_file *m)
static int pci_perf_show(struct seq_file *m, void *v)
{
struct zpci_dev *zdev = m->private;
- u64 *stat;
- int i;
if (!zdev)
return 0;
@@ -72,15 +93,27 @@ static int pci_perf_show(struct seq_file *m, void *v)
seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
- /* hardware counters */
- stat = (u64 *) &zdev->fmb->ld_ops;
- for (i = 0; i < 4; i++)
- seq_printf(m, "%26s:\t%llu\n",
- pci_perf_names[i], *(stat + i));
- if (zdev->fmb->dma_valid)
- for (i = 4; i < 6; i++)
- seq_printf(m, "%26s:\t%llu\n",
- pci_perf_names[i], *(stat + i));
+ pci_fmb_show(m, pci_common_names, ARRAY_SIZE(pci_common_names),
+ &zdev->fmb->ld_ops);
+
+ switch (zdev->fmb->format) {
+ case 0:
+ if (!(zdev->fmb->fmt_ind & ZPCI_FMB_DMA_COUNTER_VALID))
+ break;
+ pci_fmb_show(m, pci_fmt0_names, ARRAY_SIZE(pci_fmt0_names),
+ &zdev->fmb->fmt0.dma_rbytes);
+ break;
+ case 1:
+ pci_fmb_show(m, pci_fmt1_names, ARRAY_SIZE(pci_fmt1_names),
+ &zdev->fmb->fmt1.rx_bytes);
+ break;
+ case 2:
+ pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names),
+ &zdev->fmb->fmt2.consumed_work_units);
+ break;
+ default:
+ seq_puts(m, "Unknown format\n");
+ }
pci_sw_counter_show(m);
mutex_unlock(&zdev->lock);
@@ -128,10 +161,9 @@ static const struct file_operations debugfs_pci_perf_fops = {
.release = single_release,
};
-void zpci_debug_init_device(struct zpci_dev *zdev)
+void zpci_debug_init_device(struct zpci_dev *zdev, const char *name)
{
- zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
- debugfs_root);
+ zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root);
if (IS_ERR(zdev->debugfs_dev))
zdev->debugfs_dev = NULL;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 4638b93c7632..1ea8c07eab84 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table)
dma_free_cpu_table(table);
}
-static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+static unsigned long __dma_alloc_iommu(struct device *dev,
unsigned long start, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long boundary_size;
- boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
PAGE_SIZE) >> PAGE_SHIFT;
return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
start, size, 0, boundary_size, 0);
}
-static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
+static unsigned long dma_alloc_iommu(struct device *dev, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long offset, flags;
int wrap = 0;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
+ offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
if (offset == -1) {
/* wrap-around */
- offset = __dma_alloc_iommu(zdev, 0, size);
+ offset = __dma_alloc_iommu(dev, 0, size);
wrap = 1;
}
@@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
return offset;
}
-static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
+static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
{
+ struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
unsigned long flags;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
@@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
/* This rounds up number of pages based on size and offset */
nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
+ iommu_page_index = dma_alloc_iommu(dev, nr_pages);
if (iommu_page_index == -1) {
ret = -ENOSPC;
goto out_err;
@@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
return dma_addr + (offset & ~PAGE_MASK);
out_free:
- dma_free_iommu(zdev, iommu_page_index, nr_pages);
+ dma_free_iommu(dev, iommu_page_index, nr_pages);
out_err:
zpci_err("map error:\n");
zpci_err_dma(ret, pa);
@@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
atomic64_add(npages, &zdev->unmapped_pages);
iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
- dma_free_iommu(zdev, iommu_page_index, npages);
+ dma_free_iommu(dev, iommu_page_index, npages);
}
static void *s390_dma_alloc(struct device *dev, size_t size,
@@ -454,7 +457,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
zdev->dma_table = dma_alloc_cpu_table();
if (!zdev->dma_table) {
rc = -ENOMEM;
- goto out_clean;
+ goto out;
}
/*
@@ -474,18 +477,22 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
if (!zdev->iommu_bitmap) {
rc = -ENOMEM;
- goto out_reg;
+ goto free_dma_table;
}
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
(u64) zdev->dma_table);
if (rc)
- goto out_reg;
- return 0;
+ goto free_bitmap;
-out_reg:
+ return 0;
+free_bitmap:
+ vfree(zdev->iommu_bitmap);
+ zdev->iommu_bitmap = NULL;
+free_dma_table:
dma_free_cpu_table(zdev->dma_table);
-out_clean:
+ zdev->dma_table = NULL;
+out:
return rc;
}
@@ -544,7 +551,7 @@ static int __init dma_debug_do_init(void)
}
fs_initcall(dma_debug_do_init);
-struct dma_map_ops s390_dma_ops = {
+struct dma_map_ops s390_pci_dma_ops = {
.alloc = s390_dma_alloc,
.free = s390_dma_free,
.map_sg = s390_dma_map_sg,
@@ -555,7 +562,7 @@ struct dma_map_ops s390_dma_ops = {
.is_phys = 0,
/* dma_supported is unconditionally true without a callback */
};
-EXPORT_SYMBOL_GPL(s390_dma_ops);
+EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
static int __init s390_iommu_setup(char *str)
{
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b0e04751c5d5..fb2a9a560fdc 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -46,11 +46,14 @@ struct zpci_ccdf_avail {
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ struct pci_dev *pdev = NULL;
zpci_err("error CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
return;
pdev->error_state = pci_channel_io_perm_failure;
+ pci_dev_put(pdev);
}
void zpci_event_error(void *data)
@@ -69,9 +73,12 @@ void zpci_event_error(void *data)
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+ struct pci_dev *pdev = NULL;
int ret;
+ if (zdev)
+ pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+
pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
zpci_err("avail CCDF:\n");
@@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default:
break;
}
+ if (pdev)
+ pci_dev_put(pdev);
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index f37a5808883d..ed484dc84d14 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -12,6 +12,8 @@
#include <linux/stat.h>
#include <linux/pci.h>
+#include <asm/sclp.h>
+
#define zpci_attr(name, fmt, member) \
static ssize_t name##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
@@ -77,8 +79,29 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
sizeof(zdev->util_str));
}
static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+
+static ssize_t report_error_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ struct zpci_report_error_header *report = (void *) buf;
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ int ret;
+
+ if (off || (count < sizeof(*report)))
+ return -EINVAL;
+
+ ret = sclp_pci_report(report, zdev->fh, zdev->fid);
+
+ return ret ? ret : count;
+}
+static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE);
+
static struct bin_attribute *zpci_bin_attrs[] = {
&bin_attr_util_string,
+ &bin_attr_report_error,
NULL,
};