summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/boot/dts/Makefile2
-rw-r--r--arch/arm/crypto/Makefile2
-rw-r--r--arch/arm/mach-npcm/npcm7xx.c2
-rw-r--r--arch/arm64/crypto/Makefile2
-rw-r--r--arch/arm64/include/asm/assembler.h136
-rw-r--r--arch/arm64/include/asm/cpucaps.h13
-rw-r--r--arch/arm64/include/asm/kvm_asm.h2
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/asm-offsets.c3
-rw-r--r--arch/arm64/kernel/bpi.S102
-rw-r--r--arch/arm64/kernel/cpu_errata.c97
-rw-r--r--arch/arm64/kvm/hyp/entry.S12
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S64
-rw-r--r--arch/arm64/kvm/hyp/switch.c10
-rw-r--r--arch/microblaze/include/asm/pci.h7
-rw-r--r--arch/microblaze/include/asm/pgtable.h2
-rw-r--r--arch/microblaze/pci/pci-common.c99
-rw-r--r--arch/mips/mm/gup.c2
-rw-r--r--arch/nios2/kernel/time.c4
-rw-r--r--arch/openrisc/include/uapi/asm/unistd.h2
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/compat.h6
-rw-r--r--arch/parisc/include/asm/elf.h69
-rw-r--r--arch/parisc/include/uapi/asm/siginfo.h7
-rw-r--r--arch/parisc/kernel/binfmt_elf32.c98
-rw-r--r--arch/parisc/kernel/cache.c2
-rw-r--r--arch/parisc/kernel/pacache.S9
-rw-r--r--arch/parisc/kernel/process.c13
-rw-r--r--arch/parisc/kernel/traps.c7
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/include/asm/cputable.h23
-rw-r--r--arch/powerpc/include/asm/kexec.h2
-rw-r--r--arch/powerpc/include/asm/module.h12
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c14
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c11
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c39
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/traps.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c4
-rw-r--r--arch/powerpc/mm/slice.c1
-rw-r--r--arch/powerpc/mm/tlb-radix.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c7
-rw-r--r--arch/s390/boot/compressed/misc.c23
-rw-r--r--arch/s390/crypto/aes_s390.c8
-rw-r--r--arch/s390/crypto/paes_s390.c8
-rw-r--r--arch/s390/include/asm/ap.h6
-rw-r--r--arch/s390/include/asm/cio.h10
-rw-r--r--arch/s390/include/asm/ipl.h25
-rw-r--r--arch/s390/include/asm/nospec-branch.h1
-rw-r--r--arch/s390/include/asm/reset.h20
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h163
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/s390/kernel/early.c14
-rw-r--r--arch/s390/kernel/ipl.c376
-rw-r--r--arch/s390/kernel/machine_kexec.c2
-rw-r--r--arch/s390/kernel/nospec-branch.c8
-rw-r--r--arch/s390/kernel/reipl.S87
-rw-r--r--arch/s390/kernel/relocate_kernel.S54
-rw-r--r--arch/s390/kernel/setup.c3
-rw-r--r--arch/s390/mm/gup.c2
-rw-r--r--arch/sh/boards/of-generic.c6
-rw-r--r--arch/sh/drivers/pci/pci.c5
-rw-r--r--arch/sh/drivers/pci/pcie-sh7786.c78
-rw-r--r--arch/sh/include/asm/futex.h5
-rw-r--r--arch/sh/include/cpu-sh4/cpu/sh7786.h7
-rw-r--r--arch/sh/kernel/dma-nommu.c7
-rw-r--r--arch/sh/kernel/entry-common.S2
-rw-r--r--arch/sh/kernel/setup.c8
-rw-r--r--arch/sh/mm/consistent.c4
-rw-r--r--arch/sh/mm/gup.c2
-rw-r--r--arch/sparc/mm/gup.c4
-rw-r--r--arch/sparc/vdso/Makefile4
-rw-r--r--arch/um/Kconfig.net11
-rw-r--r--arch/um/drivers/Makefile4
-rw-r--r--arch/um/drivers/chan_kern.c53
-rw-r--r--arch/um/drivers/line.c2
-rw-r--r--arch/um/drivers/net_kern.c4
-rw-r--r--arch/um/drivers/random.c11
-rw-r--r--arch/um/drivers/ubd_kern.c4
-rw-r--r--arch/um/drivers/vector_kern.c1633
-rw-r--r--arch/um/drivers/vector_kern.h130
-rw-r--r--arch/um/drivers/vector_transports.c458
-rw-r--r--arch/um/drivers/vector_user.c590
-rw-r--r--arch/um/drivers/vector_user.h100
-rw-r--r--arch/um/include/asm/asm-prototypes.h1
-rw-r--r--arch/um/include/asm/irq.h12
-rw-r--r--arch/um/include/shared/irq_user.h12
-rw-r--r--arch/um/include/shared/net_kern.h2
-rw-r--r--arch/um/include/shared/os.h17
-rw-r--r--arch/um/kernel/irq.c461
-rw-r--r--arch/um/kernel/time.c6
-rw-r--r--arch/um/os-Linux/file.c1
-rw-r--r--arch/um/os-Linux/irq.c202
-rw-r--r--arch/um/os-Linux/signal.c3
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/boot/compressed/kaslr.c3
-rw-r--r--arch/x86/entry/calling.h2
-rw-r--r--arch/x86/entry/common.c20
-rw-r--r--arch/x86/entry/entry_64.S7
-rw-r--r--arch/x86/entry/entry_64_compat.S6
-rw-r--r--arch/x86/entry/syscall_32.c15
-rw-r--r--arch/x86/entry/syscall_64.c6
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl723
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl712
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh14
-rw-r--r--arch/x86/entry/vdso/Makefile4
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c18
-rw-r--r--arch/x86/events/intel/ds.c34
-rw-r--r--arch/x86/include/asm/apic.h4
-rw-r--r--arch/x86/include/asm/kexec-bzimage64.h2
-rw-r--r--arch/x86/include/asm/pgtable.h27
-rw-r--r--arch/x86/include/asm/pgtable_types.h29
-rw-r--r--arch/x86/include/asm/pti.h2
-rw-r--r--arch/x86/include/asm/syscall.h4
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h209
-rw-r--r--arch/x86/include/asm/syscalls.h17
-rw-r--r--arch/x86/include/asm/tlbflush.h7
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h18
-rw-r--r--arch/x86/kernel/acpi/boot.c13
-rw-r--r--arch/x86/kernel/apic/apic_common.c2
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/apic/x2apic.h2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/common.c32
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c2
-rw-r--r--arch/x86/kernel/crash.c334
-rw-r--r--arch/x86/kernel/espfix_64.c4
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_64.S11
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c10
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c111
-rw-r--r--arch/x86/kernel/signal_compat.c2
-rw-r--r--arch/x86/mm/cpu_entry_area.c14
-rw-r--r--arch/x86/mm/ident_map.c3
-rw-r--r--arch/x86/mm/init.c14
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/init_64.c11
-rw-r--r--arch/x86/mm/iomap_32.c6
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/kasan_init_64.c14
-rw-r--r--arch/x86/mm/pageattr.c97
-rw-r--r--arch/x86/mm/pgtable.c12
-rw-r--r--arch/x86/mm/pti.c126
-rw-r--r--arch/x86/power/hibernate_64.c20
-rw-r--r--arch/x86/purgatory/Makefile3
-rw-r--r--arch/x86/purgatory/purgatory.c2
-rw-r--r--arch/x86/purgatory/sha256.c283
-rw-r--r--arch/x86/purgatory/sha256.h21
-rw-r--r--arch/x86/purgatory/string.c12
-rw-r--r--arch/x86/um/stub_segv.c3
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c8
-rw-r--r--arch/x86/xen/smp_pv.c1
-rw-r--r--arch/x86/xen/xen-head.S4
157 files changed, 5584 insertions, 3107 deletions
diff --git a/arch/arc/boot/dts/Makefile b/arch/arc/boot/dts/Makefile
index 22a4c5d4702f..a83c4f5e928b 100644
--- a/arch/arc/boot/dts/Makefile
+++ b/arch/arc/boot/dts/Makefile
@@ -9,8 +9,6 @@ endif
obj-y += $(builtindtb-y).dtb.o
dtb-y := $(builtindtb-y).dtb
-.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
-
# for CONFIG_OF_ALL_DTBS test
dtstree := $(srctree)/$(src)
dtb- := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 3304e671918d..8de542c48ade 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -67,4 +67,4 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
$(call cmd,perl)
endif
-.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
+targets += sha256-core.S sha512-core.S
diff --git a/arch/arm/mach-npcm/npcm7xx.c b/arch/arm/mach-npcm/npcm7xx.c
index 5f7cd88103ef..c5f77d854c4f 100644
--- a/arch/arm/mach-npcm/npcm7xx.c
+++ b/arch/arm/mach-npcm/npcm7xx.c
@@ -17,4 +17,6 @@ static const char *const npcm7xx_dt_match[] = {
DT_MACHINE_START(NPCM7XX_DT, "NPCM7XX Chip family")
.atag_offset = 0x100,
.dt_compat = npcm7xx_dt_match,
+ .l2c_aux_val = 0x0,
+ .l2c_aux_mask = ~0x0,
MACHINE_END
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 8df9f326f449..f35ac684b1c0 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -78,4 +78,4 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
endif
-.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
+targets += sha256-core.S sha512-core.S
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 053d83e8db6f..0bcc98dbba56 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -565,4 +565,140 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
#endif
.endm
+ /*
+ * frame_push - Push @regcount callee saved registers to the stack,
+ * starting at x19, as well as x29/x30, and set x29 to
+ * the new value of sp. Add @extra bytes of stack space
+ * for locals.
+ */
+ .macro frame_push, regcount:req, extra
+ __frame st, \regcount, \extra
+ .endm
+
+ /*
+ * frame_pop - Pop the callee saved registers from the stack that were
+ * pushed in the most recent call to frame_push, as well
+ * as x29/x30 and any extra stack space that may have been
+ * allocated.
+ */
+ .macro frame_pop
+ __frame ld
+ .endm
+
+ .macro __frame_regs, reg1, reg2, op, num
+ .if .Lframe_regcount == \num
+ \op\()r \reg1, [sp, #(\num + 1) * 8]
+ .elseif .Lframe_regcount > \num
+ \op\()p \reg1, \reg2, [sp, #(\num + 1) * 8]
+ .endif
+ .endm
+
+ .macro __frame, op, regcount, extra=0
+ .ifc \op, st
+ .if (\regcount) < 0 || (\regcount) > 10
+ .error "regcount should be in the range [0 ... 10]"
+ .endif
+ .if ((\extra) % 16) != 0
+ .error "extra should be a multiple of 16 bytes"
+ .endif
+ .ifdef .Lframe_regcount
+ .if .Lframe_regcount != -1
+ .error "frame_push/frame_pop may not be nested"
+ .endif
+ .endif
+ .set .Lframe_regcount, \regcount
+ .set .Lframe_extra, \extra
+ .set .Lframe_local_offset, ((\regcount + 3) / 2) * 16
+ stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
+ mov x29, sp
+ .endif
+
+ __frame_regs x19, x20, \op, 1
+ __frame_regs x21, x22, \op, 3
+ __frame_regs x23, x24, \op, 5
+ __frame_regs x25, x26, \op, 7
+ __frame_regs x27, x28, \op, 9
+
+ .ifc \op, ld
+ .if .Lframe_regcount == -1
+ .error "frame_push/frame_pop may not be nested"
+ .endif
+ ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
+ .set .Lframe_regcount, -1
+ .endif
+ .endm
+
+/*
+ * Check whether to yield to another runnable task from kernel mode NEON code
+ * (which runs with preemption disabled).
+ *
+ * if_will_cond_yield_neon
+ * // pre-yield patchup code
+ * do_cond_yield_neon
+ * // post-yield patchup code
+ * endif_yield_neon <label>
+ *
+ * where <label> is optional, and marks the point where execution will resume
+ * after a yield has been performed. If omitted, execution resumes right after
+ * the endif_yield_neon invocation. Note that the entire sequence, including
+ * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
+ * is not defined.
+ *
+ * As a convenience, in the case where no patchup code is required, the above
+ * sequence may be abbreviated to
+ *
+ * cond_yield_neon <label>
+ *
+ * Note that the patchup code does not support assembler directives that change
+ * the output section, any use of such directives is undefined.
+ *
+ * The yield itself consists of the following:
+ * - Check whether the preempt count is exactly 1, in which case disabling
+ * preemption once will make the task preemptible. If this is not the case,
+ * yielding is pointless.
+ * - Check whether TIF_NEED_RESCHED is set, and if so, disable and re-enable
+ * kernel mode NEON (which will trigger a reschedule), and branch to the
+ * yield fixup code.
+ *
+ * This macro sequence may clobber all CPU state that is not guaranteed by the
+ * AAPCS to be preserved across an ordinary function call.
+ */
+
+ .macro cond_yield_neon, lbl
+ if_will_cond_yield_neon
+ do_cond_yield_neon
+ endif_yield_neon \lbl
+ .endm
+
+ .macro if_will_cond_yield_neon
+#ifdef CONFIG_PREEMPT
+ get_thread_info x0
+ ldr w1, [x0, #TSK_TI_PREEMPT]
+ ldr x0, [x0, #TSK_TI_FLAGS]
+ cmp w1, #PREEMPT_DISABLE_OFFSET
+ csel x0, x0, xzr, eq
+ tbnz x0, #TIF_NEED_RESCHED, .Lyield_\@ // needs rescheduling?
+ /* fall through to endif_yield_neon */
+ .subsection 1
+.Lyield_\@ :
+#else
+ .section ".discard.cond_yield_neon", "ax"
+#endif
+ .endm
+
+ .macro do_cond_yield_neon
+ bl kernel_neon_end
+ bl kernel_neon_begin
+ .endm
+
+ .macro endif_yield_neon, lbl
+ .ifnb \lbl
+ b \lbl
+ .else
+ b .Lyield_out_\@
+ .endif
+ .previous
+.Lyield_out_\@ :
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index a311880feb0f..bc51b72fafd4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -43,13 +43,12 @@
#define ARM64_SVE 22
#define ARM64_UNMAP_KERNEL_AT_EL0 23
#define ARM64_HARDEN_BRANCH_PREDICTOR 24
-#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
-#define ARM64_HAS_RAS_EXTN 26
-#define ARM64_WORKAROUND_843419 27
-#define ARM64_HAS_CACHE_IDC 28
-#define ARM64_HAS_CACHE_DIC 29
-#define ARM64_HW_DBM 30
+#define ARM64_HAS_RAS_EXTN 25
+#define ARM64_WORKAROUND_843419 26
+#define ARM64_HAS_CACHE_IDC 27
+#define ARM64_HAS_CACHE_DIC 28
+#define ARM64_HW_DBM 29
-#define ARM64_NCAPS 31
+#define ARM64_NCAPS 30
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d53d40704416..f6648a3e4152 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -71,8 +71,6 @@ extern u32 __kvm_get_mdcr_el2(void);
extern u32 __init_stage2_translation(void);
-extern void __qcom_hyp_sanitize_btac_predictors(void);
-
#else /* __ASSEMBLY__ */
.macro get_host_ctxt reg, tmp
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9b55a3f24be7..bf825f38d206 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,8 +55,6 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
-arm64-obj-$(CONFIG_KVM_INDIRECT_VECTORS)+= bpi.o
-
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
head-y := head.o
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 78e1b0a70aaf..5bdda651bd05 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/preempt.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
#include <asm/fixmap.h>
@@ -93,6 +94,8 @@ int main(void)
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
+ DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
+ BLANK();
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
deleted file mode 100644
index bb0b67722e86..000000000000
--- a/arch/arm64/kernel/bpi.S
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Contains CPU specific branch predictor invalidation sequences
- *
- * Copyright (C) 2018 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/arm-smccc.h>
-
-#include <asm/alternative.h>
-#include <asm/mmu.h>
-
-.macro hyp_ventry
- .align 7
-1: .rept 27
- nop
- .endr
-/*
- * The default sequence is to directly branch to the KVM vectors,
- * using the computed offset. This applies for VHE as well as
- * !ARM64_HARDEN_EL2_VECTORS.
- *
- * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
- * with:
- *
- * stp x0, x1, [sp, #-16]!
- * movz x0, #(addr & 0xffff)
- * movk x0, #((addr >> 16) & 0xffff), lsl #16
- * movk x0, #((addr >> 32) & 0xffff), lsl #32
- * br x0
- *
- * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
- * See kvm_patch_vector_branch for details.
- */
-alternative_cb kvm_patch_vector_branch
- b __kvm_hyp_vector + (1b - 0b)
- nop
- nop
- nop
- nop
-alternative_cb_end
-.endm
-
-.macro generate_vectors
-0:
- .rept 16
- hyp_ventry
- .endr
- .org 0b + SZ_2K // Safety measure
-.endm
-
-
- .text
- .pushsection .hyp.text, "ax"
-
- .align 11
-ENTRY(__bp_harden_hyp_vecs_start)
- .rept BP_HARDEN_EL2_SLOTS
- generate_vectors
- .endr
-ENTRY(__bp_harden_hyp_vecs_end)
-
- .popsection
-
-ENTRY(__qcom_hyp_sanitize_link_stack_start)
- stp x29, x30, [sp, #-16]!
- .rept 16
- bl . + 4
- .endr
- ldp x29, x30, [sp], #16
-ENTRY(__qcom_hyp_sanitize_link_stack_end)
-
-.macro smccc_workaround_1 inst
- sub sp, sp, #(8 * 4)
- stp x2, x3, [sp, #(8 * 0)]
- stp x0, x1, [sp, #(8 * 2)]
- mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
- \inst #0
- ldp x2, x3, [sp, #(8 * 0)]
- ldp x0, x1, [sp, #(8 * 2)]
- add sp, sp, #(8 * 4)
-.endm
-
-ENTRY(__smccc_workaround_1_smc_start)
- smccc_workaround_1 smc
-ENTRY(__smccc_workaround_1_smc_end)
-
-ENTRY(__smccc_workaround_1_hvc_start)
- smccc_workaround_1 hvc
-ENTRY(__smccc_workaround_1_hvc_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9262ec57f5ab..a900befadfe8 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -86,13 +86,9 @@ atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-#ifdef CONFIG_KVM
-extern char __qcom_hyp_sanitize_link_stack_start[];
-extern char __qcom_hyp_sanitize_link_stack_end[];
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
extern char __smccc_workaround_1_smc_start[];
extern char __smccc_workaround_1_smc_end[];
-extern char __smccc_workaround_1_hvc_start[];
-extern char __smccc_workaround_1_hvc_end[];
static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
const char *hyp_vecs_end)
@@ -132,12 +128,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
spin_unlock(&bp_lock);
}
#else
-#define __qcom_hyp_sanitize_link_stack_start NULL
-#define __qcom_hyp_sanitize_link_stack_end NULL
#define __smccc_workaround_1_smc_start NULL
#define __smccc_workaround_1_smc_end NULL
-#define __smccc_workaround_1_hvc_start NULL
-#define __smccc_workaround_1_hvc_end NULL
static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
@@ -145,7 +137,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
{
__this_cpu_write(bp_hardening_data.fn, fn);
}
-#endif /* CONFIG_KVM */
+#endif /* CONFIG_KVM_INDIRECT_VECTORS */
static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
bp_hardening_cb_t fn,
@@ -178,12 +170,25 @@ static void call_hvc_arch_workaround_1(void)
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
}
+static void qcom_link_stack_sanitization(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
static void
enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cb;
void *smccc_start, *smccc_end;
struct arm_smccc_res res;
+ u32 midr = read_cpuid_id();
if (!entry->matches(entry, SCOPE_LOCAL_CPU))
return;
@@ -198,8 +203,9 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
if ((int)res.a0 < 0)
return;
cb = call_hvc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_hvc_start;
- smccc_end = __smccc_workaround_1_hvc_end;
+ /* This is a guest, no need to patch KVM vectors */
+ smccc_start = NULL;
+ smccc_end = NULL;
break;
case PSCI_CONDUIT_SMC:
@@ -216,30 +222,14 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
return;
}
+ if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+ ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
+ cb = qcom_link_stack_sanitization;
+
install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
return;
}
-
-static void qcom_link_stack_sanitization(void)
-{
- u64 tmp;
-
- asm volatile("mov %0, x30 \n"
- ".rept 16 \n"
- "bl . + 4 \n"
- ".endr \n"
- "mov x30, %0 \n"
- : "=&r" (tmp));
-}
-
-static void
-qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry)
-{
- install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
- __qcom_hyp_sanitize_link_stack_start,
- __qcom_hyp_sanitize_link_stack_end);
-}
#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \
@@ -324,33 +314,23 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
- {},
-};
-
-static const struct midr_range qcom_bp_harden_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
{},
};
-static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = {
- {
- CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
- .cpu_enable = enable_smccc_arch_workaround_1,
- },
- {
- CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
- .cpu_enable = qcom_enable_link_stack_sanitization,
- },
+#endif
+
+#ifdef CONFIG_HARDEN_EL2_VECTORS
+
+static const struct midr_range arm64_harden_el2_vectors[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
{},
};
#endif
-#ifndef ERRATA_MIDR_ALL_VERSIONS
-#define ERRATA_MIDR_ALL_VERSIONS(x) MIDR_ALL_VERSIONS(x)
-#endif
-
const struct arm64_cpu_capabilities arm64_errata[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
@@ -495,25 +475,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .matches = multi_entry_cap_matches,
- .cpu_enable = multi_entry_cap_cpu_enable,
- .match_list = arm64_bp_harden_list,
- },
- {
- .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
- ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
+ .cpu_enable = enable_smccc_arch_workaround_1,
+ ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
},
#endif
#ifdef CONFIG_HARDEN_EL2_VECTORS
{
- .desc = "Cortex-A57 EL2 vector hardening",
- .capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- },
- {
- .desc = "Cortex-A72 EL2 vector hardening",
+ .desc = "EL2 vector hardening",
.capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
},
#endif
{
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 1f458f7c3b44..e41a161d313a 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -209,15 +209,3 @@ alternative_endif
eret
ENDPROC(__fpsimd_guest_restore)
-
-ENTRY(__qcom_hyp_sanitize_btac_predictors)
- /**
- * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700)
- * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls
- * b15-b0: contains SiP functionID
- */
- movz x0, #0x1700
- movk x0, #0xc200, lsl #16
- smc #0
- ret
-ENDPROC(__qcom_hyp_sanitize_btac_predictors)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 87dfecce82b1..bffece27b5c1 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2015 - ARM Ltd
+ * Copyright (C) 2015-2018 - ARM Ltd
* Author: Marc Zyngier <marc.zyngier@arm.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -24,6 +24,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/mmu.h>
.text
.pushsection .hyp.text, "ax"
@@ -237,3 +238,64 @@ ENTRY(__kvm_hyp_vector)
invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
valid_vect el1_error // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
+
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+.macro hyp_ventry
+ .align 7
+1: .rept 27
+ nop
+ .endr
+/*
+ * The default sequence is to directly branch to the KVM vectors,
+ * using the computed offset. This applies for VHE as well as
+ * !ARM64_HARDEN_EL2_VECTORS.
+ *
+ * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
+ * with:
+ *
+ * stp x0, x1, [sp, #-16]!
+ * movz x0, #(addr & 0xffff)
+ * movk x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk x0, #((addr >> 32) & 0xffff), lsl #32
+ * br x0
+ *
+ * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * See kvm_patch_vector_branch for details.
+ */
+alternative_cb kvm_patch_vector_branch
+ b __kvm_hyp_vector + (1b - 0b)
+ nop
+ nop
+ nop
+ nop
+alternative_cb_end
+.endm
+
+.macro generate_vectors
+0:
+ .rept 16
+ hyp_ventry
+ .endr
+ .org 0b + SZ_2K // Safety measure
+.endm
+
+ .align 11
+ENTRY(__bp_harden_hyp_vecs_start)
+ .rept BP_HARDEN_EL2_SLOTS
+ generate_vectors
+ .endr
+ENTRY(__bp_harden_hyp_vecs_end)
+
+ .popsection
+
+ENTRY(__smccc_workaround_1_smc_start)
+ sub sp, sp, #(8 * 4)
+ stp x2, x3, [sp, #(8 * 0)]
+ stp x0, x1, [sp, #(8 * 2)]
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+ smc #0
+ ldp x2, x3, [sp, #(8 * 0)]
+ ldp x0, x1, [sp, #(8 * 2)]
+ add sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_1_smc_end)
+#endif
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 07b572173265..d9645236e474 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -472,16 +472,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
- if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
- u32 midr = read_cpuid_id();
-
- /* Apply BTAC predictors mitigation to all Falkor chips */
- if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
- ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
- __qcom_hyp_sanitize_btac_predictors();
- }
- }
-
fp_enabled = __fpsimd_enabled_nvhe();
__sysreg_save_state_nvhe(guest_ctxt);
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 114b93488193..5de871eb4a59 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -47,9 +47,10 @@ extern int pci_proc_domain(struct pci_bus *bus);
struct vm_area_struct;
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
-#define HAVE_PCI_MMAP 1
-#define arch_can_pci_mmap_io() 1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP 1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_io() 1
extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
size_t count);
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index e53b8532353c..db8b1fa83452 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -33,6 +33,8 @@ extern int mem_init_done;
#define PAGE_KERNEL __pgprot(0) /* these mean nothing to non MMU */
#define pgprot_noncached(x) (x)
+#define pgprot_writecombine pgprot_noncached
+#define pgprot_device pgprot_noncached
#define __swp_type(x) (0)
#define __swp_offset(x) (0)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index ae79e8638d50..161f9758c631 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -151,72 +151,22 @@ void pcibios_set_master(struct pci_dev *dev)
}
/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- * -- paulus.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
*/
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap. They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
- resource_size_t *offset,
- enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long io_offset = 0;
- int i, res_bit;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ resource_size_t ioaddr = pci_resource_start(pdev, bar);
if (!hose)
- return NULL; /* should never happen */
-
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
- *offset += hose->pci_mem_offset;
-#endif
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
- *offset += io_offset;
- res_bit = IORESOURCE_IO;
- }
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
+ return -EINVAL; /* should never happen */
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
- continue;
-
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
- return rp;
- }
+ /* Convert to an offset within this PCI controller */
+ ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
- return NULL;
+ vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+ return 0;
}
/*
@@ -268,37 +218,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
return prot;
}
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- resource_size_t offset =
- ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
- struct resource *rp;
- int ret;
-
- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
-
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
- return ret;
-}
-
/* This provides legacy IO read access on a bus */
int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
{
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 1e4658eee13f..5a4875cac1ec 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -178,6 +178,8 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 20e86209ef2e..ab88b6dd4679 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -336,9 +336,9 @@ static int __init nios2_time_init(struct device_node *timer)
return ret;
}
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
- ts->tv_sec = mktime(2007, 1, 1, 0, 0, 0);
+ ts->tv_sec = mktime64(2007, 1, 1, 0, 0, 0);
ts->tv_nsec = 0;
}
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 9a3ee389631e..11c5a58ab333 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -17,8 +17,6 @@
* (at your option) any later version.
*/
-#define __ARCH_HAVE_MMU
-
#define sys_mmap2 sys_mmap_pgoff
#define __ARCH_WANT_RENAMEAT
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7e0bb9836b58..fc5a574c3482 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -338,6 +338,7 @@ source "mm/Kconfig"
config COMPAT
def_bool y
depends on 64BIT
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
config SYSVIPC_COMPAT
def_bool y
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index c22db5323244..57b8b2a2fd4e 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -193,6 +193,12 @@ struct compat_shmid64_ds {
};
/*
+ * The type of struct elf_prstatus.pr_reg in compatible core dumps.
+ */
+#define COMPAT_ELF_NGREG 80
+typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+/*
* A pointer passed in from user mode. This should not
* be used for syscall parameters, just declare them
* as pointers because the syscall entry code will have
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 382d75a2ee4f..f019d3ec0c1c 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -6,7 +6,7 @@
* ELF register definitions..
*/
-#include <asm/ptrace.h>
+#include <linux/types.h>
#define EM_PARISC 15
@@ -169,16 +169,12 @@ typedef struct elf64_fdesc {
__u64 gp;
} Elf64_Fdesc;
-#ifdef __KERNEL__
-
#ifdef CONFIG_64BIT
#define Elf_Fdesc Elf64_Fdesc
#else
#define Elf_Fdesc Elf32_Fdesc
#endif /*CONFIG_64BIT*/
-#endif /*__KERNEL__*/
-
/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */
#define PT_HP_TLS (PT_LOOS + 0x0)
@@ -213,44 +209,44 @@ typedef struct elf64_fdesc {
#define PF_HP_SBP 0x08000000
/*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization. This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+
+#define ELF_PLATFORM ("PARISC")
+
+/*
* The following definitions are those for 32-bit ELF binaries on a 32-bit
* kernel and for 64-bit binaries on a 64-bit kernel. To run 32-bit binaries
- * on a 64-bit kernel, arch/parisc/kernel/binfmt_elf32.c defines these
- * macros appropriately and then #includes binfmt_elf.c, which then includes
- * this file.
+ * on a 64-bit kernel, fs/compat_binfmt_elf.c defines ELF_CLASS and then
+ * #includes binfmt_elf.c, which then includes this file.
*/
#ifndef ELF_CLASS
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- *
- * Note that this header file is used by default in fs/binfmt_elf.c. So
- * the following macros are for the default case. However, for the 64
- * bit kernel we also support 32 bit parisc binaries. To do that
- * arch/parisc/kernel/binfmt_elf32.c defines its own set of these
- * macros, and then it includes fs/binfmt_elf.c to provide an alternate
- * elf binary handler for 32 bit binaries (on the 64 bit kernel).
- */
#ifdef CONFIG_64BIT
-#define ELF_CLASS ELFCLASS64
+#define ELF_CLASS ELFCLASS64
#else
#define ELF_CLASS ELFCLASS32
#endif
typedef unsigned long elf_greg_t;
-/*
- * This yields a string that ld.so will use to load implementation
- * specific libraries for optimization. This is more specific in
- * intent than poking at uname or /proc/cpuinfo.
- */
-
-#define ELF_PLATFORM ("PARISC\0")
-
#define SET_PERSONALITY(ex) \
+({ \
set_personality((current->personality & ~PER_MASK) | PER_LINUX); \
current->thread.map_base = DEFAULT_MAP_BASE; \
- current->thread.task_size = DEFAULT_TASK_SIZE \
+ current->thread.task_size = DEFAULT_TASK_SIZE; \
+ })
+
+#endif /* ! ELF_CLASS */
+
+#define COMPAT_SET_PERSONALITY(ex) \
+({ \
+ set_thread_flag(TIF_32BIT); \
+ current->thread.map_base = DEFAULT_MAP_BASE32; \
+ current->thread.task_size = DEFAULT_TASK_SIZE32; \
+ })
/*
* Fill in general registers in a core dump. This saves pretty
@@ -277,10 +273,12 @@ typedef unsigned long elf_greg_t;
#define ELF_CORE_COPY_REGS(dst, pt) \
memset(dst, 0, sizeof(dst)); /* don't leak any "random" bits */ \
- memcpy(dst + 0, pt->gr, 32 * sizeof(elf_greg_t)); \
- memcpy(dst + 32, pt->sr, 8 * sizeof(elf_greg_t)); \
- memcpy(dst + 40, pt->iaoq, 2 * sizeof(elf_greg_t)); \
- memcpy(dst + 42, pt->iasq, 2 * sizeof(elf_greg_t)); \
+ { int i; \
+ for (i = 0; i < 32; i++) dst[i] = pt->gr[i]; \
+ for (i = 0; i < 8; i++) dst[32 + i] = pt->sr[i]; \
+ } \
+ dst[40] = pt->iaoq[0]; dst[41] = pt->iaoq[1]; \
+ dst[42] = pt->iasq[0]; dst[43] = pt->iasq[1]; \
dst[44] = pt->sar; dst[45] = pt->iir; \
dst[46] = pt->isr; dst[47] = pt->ior; \
dst[48] = mfctl(22); dst[49] = mfctl(0); \
@@ -292,7 +290,7 @@ typedef unsigned long elf_greg_t;
dst[60] = mfctl(12); dst[61] = mfctl(13); \
dst[62] = mfctl(10); dst[63] = mfctl(15);
-#endif /* ! ELF_CLASS */
+#define CORE_DUMP_USE_REGSET
#define ELF_NGREG 80 /* We only need 64 at present, but leave space
for expansion. */
@@ -310,7 +308,10 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
struct pt_regs; /* forward declaration... */
-#define elf_check_arch(x) ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define elf_check_arch(x) \
+ ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_elf_check_arch(x) \
+ ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELFCLASS32)
/*
* These are used to set parameters in the core dumps.
diff --git a/arch/parisc/include/uapi/asm/siginfo.h b/arch/parisc/include/uapi/asm/siginfo.h
index be40331f757d..4a1062e05aaf 100644
--- a/arch/parisc/include/uapi/asm/siginfo.h
+++ b/arch/parisc/include/uapi/asm/siginfo.h
@@ -8,11 +8,4 @@
#include <asm-generic/siginfo.h>
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME 0 /* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
#endif
diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c
deleted file mode 100644
index 20dfa081ed0b..000000000000
--- a/arch/parisc/kernel/binfmt_elf32.c
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Support for 32-bit Linux/Parisc ELF binaries on 64 bit kernels
- *
- * Copyright (C) 2000 John Marvin
- * Copyright (C) 2000 Hewlett Packard Co.
- *
- * Heavily inspired from various other efforts to do the same thing
- * (ia64,sparc64/mips64)
- */
-
-/* Make sure include/asm-parisc/elf.h does the right thing */
-
-#define ELF_CLASS ELFCLASS32
-
-#define ELF_CORE_COPY_REGS(dst, pt) \
- memset(dst, 0, sizeof(dst)); /* don't leak any "random" bits */ \
- { int i; \
- for (i = 0; i < 32; i++) dst[i] = (elf_greg_t) pt->gr[i]; \
- for (i = 0; i < 8; i++) dst[32 + i] = (elf_greg_t) pt->sr[i]; \
- } \
- dst[40] = (elf_greg_t) pt->iaoq[0]; dst[41] = (elf_greg_t) pt->iaoq[1]; \
- dst[42] = (elf_greg_t) pt->iasq[0]; dst[43] = (elf_greg_t) pt->iasq[1]; \
- dst[44] = (elf_greg_t) pt->sar; dst[45] = (elf_greg_t) pt->iir; \
- dst[46] = (elf_greg_t) pt->isr; dst[47] = (elf_greg_t) pt->ior; \
- dst[48] = (elf_greg_t) mfctl(22); dst[49] = (elf_greg_t) mfctl(0); \
- dst[50] = (elf_greg_t) mfctl(24); dst[51] = (elf_greg_t) mfctl(25); \
- dst[52] = (elf_greg_t) mfctl(26); dst[53] = (elf_greg_t) mfctl(27); \
- dst[54] = (elf_greg_t) mfctl(28); dst[55] = (elf_greg_t) mfctl(29); \
- dst[56] = (elf_greg_t) mfctl(30); dst[57] = (elf_greg_t) mfctl(31); \
- dst[58] = (elf_greg_t) mfctl( 8); dst[59] = (elf_greg_t) mfctl( 9); \
- dst[60] = (elf_greg_t) mfctl(12); dst[61] = (elf_greg_t) mfctl(13); \
- dst[62] = (elf_greg_t) mfctl(10); dst[63] = (elf_greg_t) mfctl(15);
-
-
-typedef unsigned int elf_greg_t;
-
-#include <linux/spinlock.h>
-#include <asm/processor.h>
-#include <linux/module.h>
-#include <linux/elfcore.h>
-#include <linux/compat.h> /* struct compat_timeval */
-
-#define elf_prstatus elf_prstatus32
-struct elf_prstatus32
-{
- struct elf_siginfo pr_info; /* Info associated with signal */
- short pr_cursig; /* Current signal */
- unsigned int pr_sigpend; /* Set of pending signals */
- unsigned int pr_sighold; /* Set of held signals */
- pid_t pr_pid;
- pid_t pr_ppid;
- pid_t pr_pgrp;
- pid_t pr_sid;
- struct compat_timeval pr_utime; /* User time */
- struct compat_timeval pr_stime; /* System time */
- struct compat_timeval pr_cutime; /* Cumulative user time */
- struct compat_timeval pr_cstime; /* Cumulative system time */
- elf_gregset_t pr_reg; /* GP registers */
- int pr_fpvalid; /* True if math co-processor being used. */
-};
-
-#define elf_prpsinfo elf_prpsinfo32
-struct elf_prpsinfo32
-{
- char pr_state; /* numeric process state */
- char pr_sname; /* char for pr_state */
- char pr_zomb; /* zombie */
- char pr_nice; /* nice val */
- unsigned int pr_flag; /* flags */
- u16 pr_uid;
- u16 pr_gid;
- pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
- /* Lots missing */
- char pr_fname[16]; /* filename of executable */
- char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
-};
-
-#define init_elf_binfmt init_elf32_binfmt
-
-#define ELF_PLATFORM ("PARISC32\0")
-
-/*
- * We should probably use this macro to set a flag somewhere to indicate
- * this is a 32 on 64 process. We could use PER_LINUX_32BIT, or we
- * could set a processor dependent flag in the thread_struct.
- */
-
-#undef SET_PERSONALITY
-#define SET_PERSONALITY(ex) \
- set_thread_flag(TIF_32BIT); \
- current->thread.map_base = DEFAULT_MAP_BASE32; \
- current->thread.task_size = DEFAULT_TASK_SIZE32 \
-
-#undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
-
-#include "../../../fs/binfmt_elf.c"
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index a99da95fc9fd..bddd2acebdcc 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -254,7 +254,7 @@ parisc_cache_init(void)
}
}
-void disable_sr_hashing(void)
+void __init disable_sr_hashing(void)
{
int srhash_type, retval;
unsigned long space_bits;
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 67b0f7532e83..22e6374ece44 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -38,9 +38,10 @@
#include <asm/cache.h>
#include <asm/ldcw.h>
#include <linux/linkage.h>
+#include <linux/init.h>
- .text
- .align 128
+ .section .text.hot
+ .align 16
ENTRY_CFI(flush_tlb_all_local)
.proc
@@ -328,8 +329,6 @@ fdsync:
.procend
ENDPROC_CFI(flush_data_cache_local)
- .align 16
-
/* Macros to serialize TLB purge operations on SMP. */
.macro tlb_lock la,flags,tmp
@@ -1216,6 +1215,8 @@ ENTRY_CFI(flush_kernel_icache_range_asm)
.procend
ENDPROC_CFI(flush_kernel_icache_range_asm)
+ __INIT
+
/* align should cover use of rfi in disable_sr_hashing_asm and
* srdis_done.
*/
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index bbe46571ff96..b931745815e0 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -112,14 +112,6 @@ void machine_restart(char *cmd)
}
-void machine_halt(void)
-{
- /*
- ** The LED/ChassisCodes are updated by the led_halt()
- ** function, called by the reboot notifier chain.
- */
-}
-
void (*chassis_power_off)(void);
/*
@@ -158,6 +150,11 @@ void machine_power_off(void)
void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
+void machine_halt(void)
+{
+ machine_power_off();
+}
+
void flush_thread(void)
{
/* Only needs to handle fpu stuff or perf monitors.
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index c919e6c0a687..68e671a11987 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -627,9 +627,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
on condition */
if(user_mode(regs)){
si.si_signo = SIGFPE;
- /* Set to zero, and let the userspace app figure it out from
- the insn pointed to by si_addr */
- si.si_code = FPE_FIXME;
+ /* Let userspace app figure it out from the insn pointed
+ * to by si_addr.
+ */
+ si.si_code = FPE_CONDTRAP;
si.si_addr = (void __user *) regs->iaoq[0];
force_sig_info(SIGFPE, &si, current);
return;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 73ce5dd07642..c32a181a7cbb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -552,6 +552,9 @@ config KEXEC_FILE
for kernel and initramfs as opposed to a list of segments as is the
case for the older kexec call.
+config ARCH_HAS_KEXEC_PURGATORY
+ def_bool KEXEC_FILE
+
config RELOCATABLE
bool "Build a relocatable kernel"
depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE))
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 931dda8be87c..66fcab13c8b4 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -545,18 +545,37 @@ enum {
#ifdef CONFIG_PPC_BOOK3E
#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500)
#else
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+#define CPU_FTRS_DT_CPU_BASE \
+ (CPU_FTR_LWSYNC | \
+ CPU_FTR_FPU_UNAVAILABLE | \
+ CPU_FTR_NODSISRALIGN | \
+ CPU_FTR_NOEXECUTE | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | \
+ CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_DAWR | \
+ CPU_FTR_ARCH_206 | \
+ CPU_FTR_ARCH_207S)
+#else
+#define CPU_FTRS_DT_CPU_BASE (~0ul)
+#endif
+
#ifdef CONFIG_CPU_LITTLE_ENDIAN
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \
- CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
+ CPU_FTRS_DT_CPU_BASE)
#else
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
- CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
+ CPU_FTRS_DT_CPU_BASE)
#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index d8b1e8e7e035..4a585cba1787 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -95,7 +95,7 @@ static inline bool kdump_in_progress(void)
}
#ifdef CONFIG_KEXEC_FILE
-extern struct kexec_file_ops kexec_elf64_ops;
+extern const struct kexec_file_ops kexec_elf64_ops;
#ifdef CONFIG_IMA_KEXEC
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 7e28442827f1..4f6573934792 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -15,9 +15,19 @@
#ifdef CC_USING_MPROFILE_KERNEL
-#define MODULE_ARCH_VERMAGIC "mprofile-kernel"
+#define MODULE_ARCH_VERMAGIC_FTRACE "mprofile-kernel "
+#else
+#define MODULE_ARCH_VERMAGIC_FTRACE ""
#endif
+#ifdef CONFIG_RELOCATABLE
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE "relocatable "
+#else
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_ARCH_VERMAGIC_FTRACE MODULE_ARCH_VERMAGIC_RELOCATABLE
+
#ifndef __powerpc64__
/*
* Thanks to Paul M for explaining this.
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 7159e1a6a61a..03e1a920491e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -21,6 +21,9 @@
/* We calculate number of sg entries based on PAGE_SIZE */
#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
+/* Default time to sleep or delay between OPAL_BUSY/OPAL_BUSY_EVENT loops */
+#define OPAL_BUSY_DELAY_MS 10
+
/* /sys/firmware/opal */
extern struct kobject *opal_kobj;
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index e88fbb1fdb8f..8ab51f6ca03a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -53,18 +53,6 @@ struct dt_cpu_feature {
int disabled;
};
-#define CPU_FTRS_BASE \
- (CPU_FTR_LWSYNC | \
- CPU_FTR_FPU_UNAVAILABLE |\
- CPU_FTR_NODSISRALIGN |\
- CPU_FTR_NOEXECUTE |\
- CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_STCX_CHECKS_ADDRESS |\
- CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_DAWR | \
- CPU_FTR_ARCH_206 |\
- CPU_FTR_ARCH_207S)
-
#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
#define COMMON_USER_BASE (PPC_FEATURE_32 | PPC_FEATURE_64 | \
@@ -124,7 +112,7 @@ static char dt_cpu_name[64];
static struct cpu_spec __initdata base_cpu_spec = {
.cpu_name = NULL,
- .cpu_features = CPU_FTRS_BASE,
+ .cpu_features = CPU_FTRS_DT_CPU_BASE,
.cpu_user_features = COMMON_USER_BASE,
.cpu_user_features2 = COMMON_USER2_BASE,
.mmu_features = 0,
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 9a42309b091a..ba4f18a43ee8 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -572,7 +572,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
{
int ret;
unsigned int fdt_size;
- unsigned long kernel_load_addr, purgatory_load_addr;
+ unsigned long kernel_load_addr;
unsigned long initrd_load_addr = 0, fdt_load_addr;
void *fdt;
const void *slave_code;
@@ -580,6 +580,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
struct elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size };
+ struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size, .top_down = true };
ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
@@ -591,14 +593,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
- ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true,
- &purgatory_load_addr);
+ ret = kexec_load_purgatory(image, &pbuf);
if (ret) {
pr_err("Loading purgatory failed.\n");
goto out;
}
- pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+ pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
if (initrd != NULL) {
kbuf.buffer = initrd;
@@ -657,7 +658,7 @@ out:
return ret ? ERR_PTR(ret) : fdt;
}
-struct kexec_file_ops kexec_elf64_ops = {
+const struct kexec_file_ops kexec_elf64_ops = {
.probe = elf64_probe,
.load = elf64_load,
};
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index 45e0b7d5f200..0bd23dc789a4 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -31,52 +31,19 @@
#define SLAVE_CODE_SIZE 256
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_elf64_ops,
+ NULL
};
int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
unsigned long buf_len)
{
- int i, ret = -ENOEXEC;
- struct kexec_file_ops *fops;
-
/* We don't support crash kernels yet. */
if (image->type == KEXEC_TYPE_CRASH)
return -EOPNOTSUPP;
- for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
- fops = kexec_file_loaders[i];
- if (!fops || !fops->probe)
- continue;
-
- ret = fops->probe(buf, buf_len);
- if (!ret) {
- image->fops = fops;
- return ret;
- }
- }
-
- return ret;
-}
-
-void *arch_kexec_kernel_image_load(struct kimage *image)
-{
- if (!image->fops || !image->fops->load)
- return ERR_PTR(-ENOEXEC);
-
- return image->fops->load(image, image->kernel_buf,
- image->kernel_buf_len, image->initrd_buf,
- image->initrd_buf_len, image->cmdline_buf,
- image->cmdline_buf_len);
-}
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
- if (!image->fops || !image->fops->cleanup)
- return 0;
-
- return image->fops->cleanup(image->image_loader_data);
+ return kexec_image_probe_default(image, buf, buf_len);
}
/**
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 66f2b6299c40..44c30dd38067 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -880,7 +880,7 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
-static void init_fallback_flush(void)
+static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
int cpu;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a2ef0c0e6c31..0904492e7032 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1613,6 +1613,22 @@ void facility_unavailable_exception(struct pt_regs *regs)
value = mfspr(SPRN_FSCR);
status = value >> 56;
+ if ((hv || status >= 2) &&
+ (status < ARRAY_SIZE(facility_strings)) &&
+ facility_strings[status])
+ facility = facility_strings[status];
+
+ /* We should not have taken this interrupt in kernel */
+ if (!user_mode(regs)) {
+ pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
+ facility, status, regs->nip);
+ die("Unexpected facility unavailable exception", regs, SIGABRT);
+ }
+
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
if (status == FSCR_DSCR_LG) {
/*
* User is accessing the DSCR register using the problem
@@ -1679,25 +1695,11 @@ void facility_unavailable_exception(struct pt_regs *regs)
return;
}
- if ((hv || status >= 2) &&
- (status < ARRAY_SIZE(facility_strings)) &&
- facility_strings[status])
- facility = facility_strings[status];
-
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
-
pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
out:
- if (user_mode(regs)) {
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return;
- }
-
- die("Unexpected facility unavailable exception", regs, SIGABRT);
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
}
#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index e1c083fbe434..78e6a392330f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -470,8 +470,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
for (i = 0; i < npages; ++i) {
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
- trace_tlbie(kvm->arch.lpid, 0, rbvalues[i],
- kvm->arch.lpid, 0, 0, 0);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
@@ -492,8 +490,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
for (i = 0; i < npages; ++i) {
asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (0));
- trace_tlbie(kvm->arch.lpid, 1, rbvalues[i],
- 0, 0, 0, 0);
}
asm volatile("ptesync" : : : "memory");
}
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 9cd87d11fe4e..205fe557ca10 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -35,6 +35,7 @@
#include <asm/mmu.h>
#include <asm/copro.h>
#include <asm/hugetlb.h>
+#include <asm/mmu_context.h>
static DEFINE_SPINLOCK(slice_convert_lock);
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 2fba6170ab3f..a5d7309c2d05 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -33,13 +33,12 @@ static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
{
unsigned long rb;
unsigned long rs;
- unsigned int r = 1; /* radix format */
rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
- asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
- : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
: "memory");
}
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index ba2ff06a2c98..1bceb95f422d 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -11,6 +11,7 @@
#define DEBUG
+#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/of.h>
@@ -56,8 +57,12 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_write_nvram(__pa(buf), count, off);
- if (rc == OPAL_BUSY_EVENT)
+ if (rc == OPAL_BUSY_EVENT) {
+ msleep(OPAL_BUSY_DELAY_MS);
opal_poll_events(NULL);
+ } else if (rc == OPAL_BUSY) {
+ msleep(OPAL_BUSY_DELAY_MS);
+ }
}
if (rc)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 63838a17e56a..511b2cc9b91a 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -119,34 +119,12 @@ static void error(char *x)
asm volatile("lpsw %0" : : "Q" (psw));
}
-/*
- * Safe guard the ipl parameter block against a memory area that will be
- * overwritten. The validity check for the ipl parameter block is complex
- * (see cio_get_iplinfo and ipl_save_parameters) but if the pointer to
- * the ipl parameter block intersects with the passed memory area we can
- * safely assume that we can read from that memory. In that case just copy
- * the memory to IPL_PARMBLOCK_ORIGIN even if there is no ipl parameter
- * block.
- */
-static void check_ipl_parmblock(void *start, unsigned long size)
-{
- void *src, *dst;
-
- src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
- if (src + PAGE_SIZE <= start || src >= start + size)
- return;
- dst = (void *) IPL_PARMBLOCK_ORIGIN;
- memmove(dst, src, PAGE_SIZE);
- S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
-}
-
unsigned long decompress_kernel(void)
{
void *output, *kernel_end;
output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE);
kernel_end = output + SZ__bss_start;
- check_ipl_parmblock((void *) 0, (unsigned long) kernel_end);
#ifdef CONFIG_BLK_DEV_INITRD
/*
@@ -156,7 +134,6 @@ unsigned long decompress_kernel(void)
* current bss section..
*/
if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
- check_ipl_parmblock(kernel_end, INITRD_SIZE);
memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
INITRD_START = (unsigned long) kernel_end;
}
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index fa9b7dd1a513..ad47abd08630 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -329,7 +329,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
static struct crypto_alg ecb_aes_alg = {
.cra_name = "ecb(aes)",
.cra_driver_name = "ecb-aes-s390",
- .cra_priority = 400, /* combo: aes + ecb */
+ .cra_priority = 401, /* combo: aes + ecb + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -426,7 +426,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg cbc_aes_alg = {
.cra_name = "cbc(aes)",
.cra_driver_name = "cbc-aes-s390",
- .cra_priority = 400, /* combo: aes + cbc */
+ .cra_priority = 402, /* ecb-aes-s390 + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -633,7 +633,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
static struct crypto_alg xts_aes_alg = {
.cra_name = "xts(aes)",
.cra_driver_name = "xts-aes-s390",
- .cra_priority = 400, /* combo: aes + xts */
+ .cra_priority = 402, /* ecb-aes-s390 + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
@@ -763,7 +763,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg ctr_aes_alg = {
.cra_name = "ctr(aes)",
.cra_driver_name = "ctr-aes-s390",
- .cra_priority = 400, /* combo: aes + ctr */
+ .cra_priority = 402, /* ecb-aes-s390 + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = 1,
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 003932db8d12..80b27294c1de 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -138,7 +138,7 @@ static int ecb_paes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg ecb_paes_alg = {
.cra_name = "ecb(paes)",
.cra_driver_name = "ecb-paes-s390",
- .cra_priority = 400, /* combo: aes + ecb */
+ .cra_priority = 401, /* combo: aes + ecb + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_paes_ctx),
@@ -241,7 +241,7 @@ static int cbc_paes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg cbc_paes_alg = {
.cra_name = "cbc(paes)",
.cra_driver_name = "cbc-paes-s390",
- .cra_priority = 400, /* combo: aes + cbc */
+ .cra_priority = 402, /* ecb-paes-s390 + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_paes_ctx),
@@ -377,7 +377,7 @@ static int xts_paes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg xts_paes_alg = {
.cra_name = "xts(paes)",
.cra_driver_name = "xts-paes-s390",
- .cra_priority = 400, /* combo: aes + xts */
+ .cra_priority = 402, /* ecb-paes-s390 + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct s390_pxts_ctx),
@@ -523,7 +523,7 @@ static int ctr_paes_decrypt(struct blkcipher_desc *desc,
static struct crypto_alg ctr_paes_alg = {
.cra_name = "ctr(paes)",
.cra_driver_name = "ctr-paes-s390",
- .cra_priority = 400, /* combo: aes + ctr */
+ .cra_priority = 402, /* ecb-paes-s390 + 1 */
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct s390_paes_ctx),
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index cfce6835b109..c1bedb4c8de0 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -20,9 +20,9 @@
*/
typedef unsigned int ap_qid_t;
-#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255))
-#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63)
-#define AP_QID_QUEUE(_qid) ((_qid) & 255)
+#define AP_MKQID(_card, _queue) (((_card) & 0xff) << 8 | ((_queue) & 0xff))
+#define AP_QID_CARD(_qid) (((_qid) >> 8) & 0xff)
+#define AP_QID_QUEUE(_qid) ((_qid) & 0xff)
/**
* struct ap_queue_status - Holds the AP queue status.
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 847a04262b9c..225667652069 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -328,16 +328,6 @@ static inline u8 pathmask_to_pos(u8 mask)
void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
-extern void reipl_ccw_dev(struct ccw_dev_id *id);
-
-struct cio_iplinfo {
- u8 ssid;
- u16 devno;
- int is_qdio;
-};
-
-extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
-
/* Function from drivers/s390/cio/chsc.c */
int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 186c7b5f5511..ae5135704616 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -15,8 +15,6 @@
#define NSS_NAME_SIZE 8
-#define IPL_PARMBLOCK_ORIGIN 0x2000
-
#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
sizeof(struct ipl_block_fcp))
@@ -29,10 +27,6 @@
#define IPL_MAX_SUPPORTED_VERSION (0)
-#define IPL_PARMBLOCK_START ((struct ipl_parameter_block *) \
- IPL_PARMBLOCK_ORIGIN)
-#define IPL_PARMBLOCK_SIZE (IPL_PARMBLOCK_START->hdr.len)
-
struct ipl_list_hdr {
u32 len;
u8 reserved1[3];
@@ -83,33 +77,21 @@ struct ipl_parameter_block {
union {
struct ipl_block_fcp fcp;
struct ipl_block_ccw ccw;
+ char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
} ipl_info;
} __packed __aligned(PAGE_SIZE);
-/*
- * IPL validity flags
- */
-extern u32 ipl_flags;
-
struct save_area;
struct save_area * __init save_area_alloc(bool is_boot_cpu);
struct save_area * __init save_area_boot_cpu(void);
void __init save_area_add_regs(struct save_area *, void *regs);
void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
-extern void do_reipl(void);
-extern void do_halt(void);
-extern void do_poff(void);
-extern void ipl_verify_parameters(void);
-extern void ipl_update_parameters(void);
+extern void s390_reset_system(void);
+extern void ipl_store_parameters(void);
extern size_t append_ipl_vmparm(char *, size_t);
extern size_t append_ipl_scpdata(char *, size_t);
-enum {
- IPL_DEVNO_VALID = 1,
- IPL_PARMBLOCK_VALID = 2,
-};
-
enum ipl_type {
IPL_TYPE_UNKNOWN = 1,
IPL_TYPE_CCW = 2,
@@ -138,6 +120,7 @@ struct ipl_info
extern struct ipl_info ipl_info;
extern void setup_ipl(void);
+extern void set_os_info_reipl_block(void);
/*
* DIAG 308 support
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 35bf28fe4c64..b4bd8c41e9d3 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -9,6 +9,7 @@
extern int nospec_disable;
void nospec_init_branches(void);
+void nospec_auto_detect(void);
void nospec_revert(s32 *start, s32 *end);
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h
deleted file mode 100644
index 6450b31ade03..000000000000
--- a/arch/s390/include/asm/reset.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright IBM Corp. 2006
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#ifndef _ASM_S390_RESET_H
-#define _ASM_S390_RESET_H
-
-#include <linux/list.h>
-
-struct reset_call {
- struct list_head list;
- void (*fn)(void);
-};
-
-extern void register_reset_call(struct reset_call *reset);
-extern void unregister_reset_call(struct reset_call *reset);
-extern void s390_reset_system(void);
-#endif /* _ASM_S390_RESET_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index d568307321fc..b62e0614e440 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -203,9 +203,9 @@ struct ep11_urb {
} __attribute__((packed));
/**
- * struct zcrypt_device_status
+ * struct zcrypt_device_status_ext
* @hwtype: raw hardware type
- * @qid: 6 bit device index, 8 bit domain
+ * @qid: 8 bit device index, 8 bit domain
* @functions: AP device function bit field 'abcdef'
* a, b, c = reserved
* d = CCA coprocessor
@@ -214,28 +214,23 @@ struct ep11_urb {
* @online online status
* @reserved reserved
*/
-struct zcrypt_device_status {
+struct zcrypt_device_status_ext {
unsigned int hwtype:8;
- unsigned int qid:14;
+ unsigned int qid:16;
unsigned int online:1;
unsigned int functions:6;
- unsigned int reserved:3;
+ unsigned int reserved:1;
};
-#define MAX_ZDEV_CARDIDS 64
-#define MAX_ZDEV_DOMAINS 256
+#define MAX_ZDEV_CARDIDS_EXT 256
+#define MAX_ZDEV_DOMAINS_EXT 256
-/**
- * Maximum number of zcrypt devices
- */
-#define MAX_ZDEV_ENTRIES (MAX_ZDEV_CARDIDS * MAX_ZDEV_DOMAINS)
+/* Maximum number of zcrypt devices */
+#define MAX_ZDEV_ENTRIES_EXT (MAX_ZDEV_CARDIDS_EXT * MAX_ZDEV_DOMAINS_EXT)
-/**
- * zcrypt_device_matrix
- * Device matrix of all zcrypt devices
- */
-struct zcrypt_device_matrix {
- struct zcrypt_device_status device[MAX_ZDEV_ENTRIES];
+/* Device matrix of all zcrypt devices */
+struct zcrypt_device_matrix_ext {
+ struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
};
#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
@@ -270,71 +265,35 @@ struct zcrypt_device_matrix {
* ZSENDEP11CPRB
* Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
*
- * Z90STAT_STATUS_MASK
- * Return an 64 element array of unsigned chars for the status of
- * all devices.
+ * ZCRYPT_DEVICE_STATUS
+ * The given struct zcrypt_device_matrix_ext is updated with
+ * status information for each currently known apqn.
+ *
+ * ZCRYPT_STATUS_MASK
+ * Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ * status of all devices.
* 0x01: PCICA
* 0x02: PCICC
* 0x03: PCIXCC_MCL2
* 0x04: PCIXCC_MCL3
* 0x05: CEX2C
* 0x06: CEX2A
- * 0x0d: device is disabled via the proc filesystem
- *
- * Z90STAT_QDEPTH_MASK
- * Return an 64 element array of unsigned chars for the queue
- * depth of all devices.
- *
- * Z90STAT_PERDEV_REQCNT
- * Return an 64 element array of unsigned integers for the number
- * of successfully completed requests per device since the device
- * was detected and made available.
- *
- * Z90STAT_REQUESTQ_COUNT
- * Return an integer count of the number of entries waiting to be
- * sent to a device.
- *
- * Z90STAT_PENDINGQ_COUNT
- * Return an integer count of the number of entries sent to all
- * devices awaiting the reply.
- *
- * Z90STAT_TOTALOPEN_COUNT
- * Return an integer count of the number of open file handles.
- *
- * Z90STAT_DOMAIN_INDEX
- * Return the integer value of the Cryptographic Domain.
- *
- * The following ioctls are deprecated and should be no longer used:
- *
- * Z90STAT_TOTALCOUNT
- * Return an integer count of all device types together.
- *
- * Z90STAT_PCICACOUNT
- * Return an integer count of all PCICAs.
- *
- * Z90STAT_PCICCCOUNT
- * Return an integer count of all PCICCs.
- *
- * Z90STAT_PCIXCCMCL2COUNT
- * Return an integer count of all MCL2 PCIXCCs.
- *
- * Z90STAT_PCIXCCMCL3COUNT
- * Return an integer count of all MCL3 PCIXCCs.
- *
- * Z90STAT_CEX2CCOUNT
- * Return an integer count of all CEX2Cs.
+ * 0x07: CEX3C
+ * 0x08: CEX3A
+ * 0x0a: CEX4
+ * 0x0b: CEX5
+ * 0x0c: CEX6
+ * 0x0d: device is disabled
*
- * Z90STAT_CEX2ACOUNT
- * Return an integer count of all CEX2As.
+ * ZCRYPT_QDEPTH_MASK
+ * Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ * queue depth of all devices.
*
- * ICAZ90STATUS
- * Return some device driver status in a ica_z90_status struct
- * This takes an ica_z90_status struct as its arg.
+ * ZCRYPT_PERDEV_REQCNT
+ * Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned integers for
+ * the number of successfully completed requests per device since the
+ * device was detected and made available.
*
- * Z90STAT_PCIXCCCOUNT
- * Return an integer count of all PCIXCCs (MCL2 + MCL3).
- * This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
- * MCL2 PCIXCCs.
*/
/**
@@ -344,22 +303,56 @@ struct zcrypt_device_matrix {
#define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
#define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
-#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
-/* New status calls */
-#define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
-#define Z90STAT_PCICACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int)
-#define Z90STAT_PCICCCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int)
-#define Z90STAT_PCIXCCMCL2COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int)
-#define Z90STAT_PCIXCCMCL3COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int)
-#define Z90STAT_CEX2CCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int)
-#define Z90STAT_CEX2ACOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int)
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
+
+/*
+ * Only deprecated defines, structs and ioctls below this line.
+ */
+
+/* Deprecated: use MAX_ZDEV_CARDIDS_EXT */
+#define MAX_ZDEV_CARDIDS 64
+/* Deprecated: use MAX_ZDEV_DOMAINS_EXT */
+#define MAX_ZDEV_DOMAINS 256
+
+/* Deprecated: use MAX_ZDEV_ENTRIES_EXT */
+#define MAX_ZDEV_ENTRIES (MAX_ZDEV_CARDIDS * MAX_ZDEV_DOMAINS)
+
+/* Deprecated: use struct zcrypt_device_status_ext */
+struct zcrypt_device_status {
+ unsigned int hwtype:8;
+ unsigned int qid:14;
+ unsigned int online:1;
+ unsigned int functions:6;
+ unsigned int reserved:3;
+};
+
+/* Deprecated: use struct zcrypt_device_matrix_ext */
+struct zcrypt_device_matrix {
+ struct zcrypt_device_status device[MAX_ZDEV_ENTRIES];
+};
+
+/* Deprecated: use ZCRYPT_DEVICE_STATUS */
+#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+/* Deprecated: use ZCRYPT_STATUS_MASK */
+#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+/* Deprecated: use ZCRYPT_QDEPTH_MASK */
+#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+/* Deprecated: use ZCRYPT_PERDEV_REQCNT */
+#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+/* Deprecated: use sysfs to query these values */
#define Z90STAT_REQUESTQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
#define Z90STAT_PENDINGQ_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
#define Z90STAT_DOMAIN_INDEX _IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
-#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
-#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
-#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+/*
+ * The ioctl number ranges 0x40 - 0x42 and 0x4b - 0x4e had been used in the
+ * past, don't assign new ioctls for these.
+ */
#endif /* __ASM_S390_ZCRYPT_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 18c1eeb847b2..6f2a193ccccc 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -279,7 +279,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask,
set, sizeof(compat_sigset_t)))
return -EFAULT;
- if (__put_user(ptr_to_compat(&frame->sc), &frame->sc.sregs))
+ if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs))
return -EFAULT;
/* Store registers needed to create the signal frame */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index b00b515baa53..32daa0f84325 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -342,16 +342,6 @@ static __init void memmove_early(void *dst, const void *src, size_t n)
S390_lowcore.program_new_psw = old;
}
-static __init noinline void ipl_save_parameters(void)
-{
- void *src, *dst;
-
- src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
- dst = (void *) IPL_PARMBLOCK_ORIGIN;
- memmove_early(dst, src, PAGE_SIZE);
- S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
-}
-
static __init noinline void rescue_initrd(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
@@ -421,10 +411,8 @@ static void __init setup_boot_command_line(void)
void __init startup_init(void)
{
reset_tod_clock();
- ipl_save_parameters();
rescue_initrd();
clear_bss_section();
- ipl_verify_parameters();
time_early_init();
init_kernel_storage_key();
lockdep_off();
@@ -432,7 +420,7 @@ void __init startup_init(void)
setup_facility_list();
detect_machine_type();
setup_arch_string();
- ipl_update_parameters();
+ ipl_store_parameters();
setup_boot_command_line();
detect_diag9c();
detect_diag44();
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 34477c1aee6d..4296d7e61fb6 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -24,9 +24,7 @@
#include <asm/smp.h>
#include <asm/setup.h>
#include <asm/cpcmd.h>
-#include <asm/cio.h>
#include <asm/ebcdic.h>
-#include <asm/reset.h>
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
@@ -119,39 +117,12 @@ static char *dump_type_str(enum dump_type type)
}
}
-static u8 ipl_ssid;
-static u16 ipl_devno;
-u32 ipl_flags;
-
-enum ipl_method {
- REIPL_METHOD_CCW_CIO,
- REIPL_METHOD_CCW_DIAG,
- REIPL_METHOD_CCW_VM,
- REIPL_METHOD_FCP_RO_DIAG,
- REIPL_METHOD_FCP_RW_DIAG,
- REIPL_METHOD_FCP_RO_VM,
- REIPL_METHOD_FCP_DUMP,
- REIPL_METHOD_NSS,
- REIPL_METHOD_NSS_DIAG,
- REIPL_METHOD_DEFAULT,
-};
-
-enum dump_method {
- DUMP_METHOD_NONE,
- DUMP_METHOD_CCW_CIO,
- DUMP_METHOD_CCW_DIAG,
- DUMP_METHOD_CCW_VM,
- DUMP_METHOD_FCP_DIAG,
-};
-
-static int diag308_set_works;
-
+static int ipl_block_valid;
static struct ipl_parameter_block ipl_block;
static int reipl_capabilities = IPL_TYPE_UNKNOWN;
static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
-static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
static struct ipl_parameter_block *reipl_block_fcp;
static struct ipl_parameter_block *reipl_block_ccw;
static struct ipl_parameter_block *reipl_block_nss;
@@ -159,7 +130,6 @@ static struct ipl_parameter_block *reipl_block_actual;
static int dump_capabilities = DUMP_TYPE_NONE;
static enum dump_type dump_type = DUMP_TYPE_NONE;
-static enum dump_method dump_method = DUMP_METHOD_NONE;
static struct ipl_parameter_block *dump_block_fcp;
static struct ipl_parameter_block *dump_block_ccw;
@@ -260,33 +230,25 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
sys_##_prefix##_##_name##_show, \
sys_##_prefix##_##_name##_store)
-static void make_attrs_ro(struct attribute **attrs)
-{
- while (*attrs) {
- (*attrs)->mode = S_IRUGO;
- attrs++;
- }
-}
-
/*
* ipl section
*/
static __init enum ipl_type get_ipl_type(void)
{
- struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
- if (!(ipl_flags & IPL_DEVNO_VALID))
+ if (!ipl_block_valid)
return IPL_TYPE_UNKNOWN;
- if (!(ipl_flags & IPL_PARMBLOCK_VALID))
+
+ switch (ipl_block.hdr.pbt) {
+ case DIAG308_IPL_TYPE_CCW:
return IPL_TYPE_CCW;
- if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
- return IPL_TYPE_UNKNOWN;
- if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
- return IPL_TYPE_UNKNOWN;
- if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
- return IPL_TYPE_FCP_DUMP;
- return IPL_TYPE_FCP;
+ case DIAG308_IPL_TYPE_FCP:
+ if (ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+ return IPL_TYPE_FCP_DUMP;
+ else
+ return IPL_TYPE_FCP;
+ }
+ return IPL_TYPE_UNKNOWN;
}
struct ipl_info ipl_info;
@@ -338,7 +300,7 @@ size_t append_ipl_vmparm(char *dest, size_t size)
size_t rc;
rc = 0;
- if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+ if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)
rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
else
dest[0] = 0;
@@ -401,7 +363,7 @@ size_t append_ipl_scpdata(char *dest, size_t len)
size_t rc;
rc = 0;
- if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+ if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
else
dest[0] = 0;
@@ -415,14 +377,14 @@ static struct kobj_attribute sys_ipl_vm_parm_attr =
static ssize_t sys_ipl_device_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
- struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno);
+ return sprintf(page, "0.%x.%04x\n", ipl_block.ipl_info.ccw.ssid,
+ ipl_block.ipl_info.ccw.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+ return sprintf(page, "0.0.%04x\n",
+ ipl_block.ipl_info.fcp.devno);
default:
return 0;
}
@@ -435,8 +397,8 @@ static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
- return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
- IPL_PARMBLOCK_SIZE);
+ return memory_read_from_buffer(buf, count, &off, &ipl_block,
+ ipl_block.hdr.len);
}
static struct bin_attribute ipl_parameter_attr =
__BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
@@ -446,8 +408,8 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
- unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
- void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+ unsigned int size = ipl_block.ipl_info.fcp.scp_data_len;
+ void *scp_data = &ipl_block.ipl_info.fcp.scp_data;
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
@@ -462,14 +424,14 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
/* FCP ipl device attributes */
-DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.lun);
-DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
-DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
- IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
+ (unsigned long long)ipl_block.ipl_info.fcp.br_lba);
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -545,10 +507,6 @@ static void __ipl_run(void *unused)
{
__bpon();
diag308(DIAG308_LOAD_CLEAR, NULL);
- if (MACHINE_IS_VM)
- __cpcmd("IPL", NULL, 0, NULL);
- else if (ipl_info.type == IPL_TYPE_CCW)
- reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
}
static void ipl_run(struct shutdown_trigger *trigger)
@@ -776,6 +734,7 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
/* copy and convert to ebcdic */
memcpy(ipb->hdr.loadparm, buf, lp_len);
ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
+ ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
return len;
}
@@ -938,11 +897,10 @@ static struct attribute_group reipl_nss_attr_group = {
.attrs = reipl_nss_attrs,
};
-static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+void set_os_info_reipl_block(void)
{
- reipl_block_actual = reipl_block;
os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
- reipl_block->hdr.len);
+ reipl_block_actual->hdr.len);
}
/* reipl type */
@@ -954,38 +912,16 @@ static int reipl_set_type(enum ipl_type type)
switch(type) {
case IPL_TYPE_CCW:
- if (diag308_set_works)
- reipl_method = REIPL_METHOD_CCW_DIAG;
- else if (MACHINE_IS_VM)
- reipl_method = REIPL_METHOD_CCW_VM;
- else
- reipl_method = REIPL_METHOD_CCW_CIO;
- set_reipl_block_actual(reipl_block_ccw);
+ reipl_block_actual = reipl_block_ccw;
break;
case IPL_TYPE_FCP:
- if (diag308_set_works)
- reipl_method = REIPL_METHOD_FCP_RW_DIAG;
- else if (MACHINE_IS_VM)
- reipl_method = REIPL_METHOD_FCP_RO_VM;
- else
- reipl_method = REIPL_METHOD_FCP_RO_DIAG;
- set_reipl_block_actual(reipl_block_fcp);
- break;
- case IPL_TYPE_FCP_DUMP:
- reipl_method = REIPL_METHOD_FCP_DUMP;
+ reipl_block_actual = reipl_block_fcp;
break;
case IPL_TYPE_NSS:
- if (diag308_set_works)
- reipl_method = REIPL_METHOD_NSS_DIAG;
- else
- reipl_method = REIPL_METHOD_NSS;
- set_reipl_block_actual(reipl_block_nss);
- break;
- case IPL_TYPE_UNKNOWN:
- reipl_method = REIPL_METHOD_DEFAULT;
+ reipl_block_actual = reipl_block_nss;
break;
default:
- BUG();
+ break;
}
reipl_type = type;
return 0;
@@ -1018,77 +954,25 @@ static struct kobj_attribute reipl_type_attr =
static struct kset *reipl_kset;
static struct kset *reipl_fcp_kset;
-static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
- const enum ipl_method m)
-{
- char loadparm[LOADPARM_LEN + 1] = {};
- char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
- char nss_name[NSS_NAME_SIZE + 1] = {};
- size_t pos = 0;
-
- reipl_get_ascii_loadparm(loadparm, ipb);
- reipl_get_ascii_nss_name(nss_name, ipb);
- reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
-
- switch (m) {
- case REIPL_METHOD_CCW_VM:
- pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
- break;
- case REIPL_METHOD_NSS:
- pos = sprintf(dst, "IPL %s", nss_name);
- break;
- default:
- break;
- }
- if (strlen(loadparm) > 0)
- pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
- if (strlen(vmparm) > 0)
- sprintf(dst + pos, " PARM %s", vmparm);
-}
-
static void __reipl_run(void *unused)
{
- struct ccw_dev_id devid;
- static char buf[128];
-
- switch (reipl_method) {
- case REIPL_METHOD_CCW_CIO:
- devid.ssid = reipl_block_ccw->ipl_info.ccw.ssid;
- devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
- reipl_ccw_dev(&devid);
- break;
- case REIPL_METHOD_CCW_VM:
- get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
- __cpcmd(buf, NULL, 0, NULL);
- break;
- case REIPL_METHOD_CCW_DIAG:
+ switch (reipl_type) {
+ case IPL_TYPE_CCW:
diag308(DIAG308_SET, reipl_block_ccw);
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
- case REIPL_METHOD_FCP_RW_DIAG:
+ case IPL_TYPE_FCP:
diag308(DIAG308_SET, reipl_block_fcp);
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
- case REIPL_METHOD_FCP_RO_DIAG:
- diag308(DIAG308_LOAD_CLEAR, NULL);
- break;
- case REIPL_METHOD_FCP_RO_VM:
- __cpcmd("IPL", NULL, 0, NULL);
- break;
- case REIPL_METHOD_NSS_DIAG:
+ case IPL_TYPE_NSS:
diag308(DIAG308_SET, reipl_block_nss);
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
- case REIPL_METHOD_NSS:
- get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
- __cpcmd(buf, NULL, 0, NULL);
- break;
- case REIPL_METHOD_DEFAULT:
- if (MACHINE_IS_VM)
- __cpcmd("IPL", NULL, 0, NULL);
+ case IPL_TYPE_UNKNOWN:
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
- case REIPL_METHOD_FCP_DUMP:
+ case IPL_TYPE_FCP_DUMP:
break;
}
disabled_wait((unsigned long) __builtin_return_address(0));
@@ -1119,7 +1003,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
/* VM PARM */
- if (MACHINE_IS_VM && diag308_set_works &&
+ if (MACHINE_IS_VM && ipl_block_valid &&
(ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
@@ -1141,9 +1025,6 @@ static int __init reipl_nss_init(void)
if (!reipl_block_nss)
return -ENOMEM;
- if (!diag308_set_works)
- sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
-
rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
if (rc)
return rc;
@@ -1161,24 +1042,16 @@ static int __init reipl_ccw_init(void)
if (!reipl_block_ccw)
return -ENOMEM;
- if (MACHINE_IS_VM) {
- if (!diag308_set_works)
- sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
- rc = sysfs_create_group(&reipl_kset->kobj,
- &reipl_ccw_attr_group_vm);
- } else {
- if(!diag308_set_works)
- sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
- rc = sysfs_create_group(&reipl_kset->kobj,
- &reipl_ccw_attr_group_lpar);
- }
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
+ : &reipl_ccw_attr_group_lpar);
if (rc)
return rc;
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
- reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid;
- reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+ reipl_block_ccw->ipl_info.ccw.ssid = ipl_block.ipl_info.ccw.ssid;
+ reipl_block_ccw->ipl_info.ccw.devno = ipl_block.ipl_info.ccw.devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -1190,14 +1063,6 @@ static int __init reipl_fcp_init(void)
{
int rc;
- if (!diag308_set_works) {
- if (ipl_info.type == IPL_TYPE_FCP) {
- make_attrs_ro(reipl_fcp_attrs);
- sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
- } else
- return 0;
- }
-
reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
if (!reipl_block_fcp)
return -ENOMEM;
@@ -1218,7 +1083,7 @@ static int __init reipl_fcp_init(void)
}
if (ipl_info.type == IPL_TYPE_FCP) {
- memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
+ memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
/*
* Fix loadparm: There are systems where the (SCSI) LOADPARM
* is invalid in the SCSI IPL parameter block, so take it
@@ -1340,21 +1205,6 @@ static int dump_set_type(enum dump_type type)
{
if (!(dump_capabilities & type))
return -EINVAL;
- switch (type) {
- case DUMP_TYPE_CCW:
- if (diag308_set_works)
- dump_method = DUMP_METHOD_CCW_DIAG;
- else if (MACHINE_IS_VM)
- dump_method = DUMP_METHOD_CCW_VM;
- else
- dump_method = DUMP_METHOD_CCW_CIO;
- break;
- case DUMP_TYPE_FCP:
- dump_method = DUMP_METHOD_FCP_DIAG;
- break;
- default:
- dump_method = DUMP_METHOD_NONE;
- }
dump_type = type;
return 0;
}
@@ -1397,25 +1247,11 @@ static void diag308_dump(void *dump_block)
static void __dump_run(void *unused)
{
- struct ccw_dev_id devid;
- static char buf[100];
-
- switch (dump_method) {
- case DUMP_METHOD_CCW_CIO:
- devid.ssid = dump_block_ccw->ipl_info.ccw.ssid;
- devid.devno = dump_block_ccw->ipl_info.ccw.devno;
- reipl_ccw_dev(&devid);
- break;
- case DUMP_METHOD_CCW_VM:
- sprintf(buf, "STORE STATUS");
- __cpcmd(buf, NULL, 0, NULL);
- sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
- __cpcmd(buf, NULL, 0, NULL);
- break;
- case DUMP_METHOD_CCW_DIAG:
+ switch (dump_type) {
+ case DUMP_TYPE_CCW:
diag308_dump(dump_block_ccw);
break;
- case DUMP_METHOD_FCP_DIAG:
+ case DUMP_TYPE_FCP:
diag308_dump(dump_block_fcp);
break;
default:
@@ -1425,7 +1261,7 @@ static void __dump_run(void *unused)
static void dump_run(struct shutdown_trigger *trigger)
{
- if (dump_method == DUMP_METHOD_NONE)
+ if (dump_type == DUMP_TYPE_NONE)
return;
smp_send_stop();
smp_call_ipl_cpu(__dump_run, NULL);
@@ -1457,8 +1293,6 @@ static int __init dump_fcp_init(void)
if (!sclp_ipl_info.has_dump)
return 0; /* LDIPL DUMP is not installed */
- if (!diag308_set_works)
- return 0;
dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
if (!dump_block_fcp)
return -ENOMEM;
@@ -1516,18 +1350,9 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
dump_run(trigger);
}
-static int __init dump_reipl_init(void)
-{
- if (!diag308_set_works)
- return -EOPNOTSUPP;
- else
- return 0;
-}
-
static struct shutdown_action __refdata dump_reipl_action = {
.name = SHUTDOWN_ACTION_DUMP_REIPL_STR,
.fn = dump_reipl_run,
- .init = dump_reipl_init,
};
/*
@@ -1838,10 +1663,8 @@ static int __init s390_ipl_init(void)
* case the system is booted from HMC. Fortunately in this case
* READ SCP info provides the correct value.
*/
- if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 &&
- diag308_set_works)
- memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm,
- LOADPARM_LEN);
+ if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
+ memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, LOADPARM_LEN);
shutdown_actions_init();
shutdown_triggers_init();
return 0;
@@ -1921,19 +1744,20 @@ static struct notifier_block on_panic_nb = {
void __init setup_ipl(void)
{
+ BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE);
+
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- ipl_info.data.ccw.dev_id.ssid = ipl_ssid;
- ipl_info.data.ccw.dev_id.devno = ipl_devno;
+ ipl_info.data.ccw.dev_id.ssid = ipl_block.ipl_info.ccw.ssid;
+ ipl_info.data.ccw.dev_id.devno = ipl_block.ipl_info.ccw.devno;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
ipl_info.data.fcp.dev_id.ssid = 0;
- ipl_info.data.fcp.dev_id.devno =
- IPL_PARMBLOCK_START->ipl_info.fcp.devno;
- ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
- ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
+ ipl_info.data.fcp.dev_id.devno = ipl_block.ipl_info.fcp.devno;
+ ipl_info.data.fcp.wwpn = ipl_block.ipl_info.fcp.wwpn;
+ ipl_info.data.fcp.lun = ipl_block.ipl_info.fcp.lun;
break;
case IPL_TYPE_NSS:
case IPL_TYPE_UNKNOWN:
@@ -1943,85 +1767,21 @@ void __init setup_ipl(void)
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
-void __init ipl_update_parameters(void)
+void __init ipl_store_parameters(void)
{
int rc;
rc = diag308(DIAG308_STORE, &ipl_block);
- if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG))
- diag308_set_works = 1;
-}
-
-void __init ipl_verify_parameters(void)
-{
- struct cio_iplinfo iplinfo;
-
- if (cio_get_iplinfo(&iplinfo))
- return;
-
- ipl_ssid = iplinfo.ssid;
- ipl_devno = iplinfo.devno;
- ipl_flags |= IPL_DEVNO_VALID;
- if (!iplinfo.is_qdio)
- return;
- ipl_flags |= IPL_PARMBLOCK_VALID;
-}
-
-static LIST_HEAD(rcall);
-static DEFINE_MUTEX(rcall_mutex);
-
-void register_reset_call(struct reset_call *reset)
-{
- mutex_lock(&rcall_mutex);
- list_add(&reset->list, &rcall);
- mutex_unlock(&rcall_mutex);
-}
-EXPORT_SYMBOL_GPL(register_reset_call);
-
-void unregister_reset_call(struct reset_call *reset)
-{
- mutex_lock(&rcall_mutex);
- list_del(&reset->list);
- mutex_unlock(&rcall_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_reset_call);
-
-static void do_reset_calls(void)
-{
- struct reset_call *reset;
-
- if (diag308_set_works) {
- diag308_reset();
- return;
- }
- list_for_each_entry(reset, &rcall, list)
- reset->fn();
+ if (rc == DIAG308_RC_OK && ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+ ipl_block_valid = 1;
}
void s390_reset_system(void)
{
- struct lowcore *lc;
-
- lc = (struct lowcore *)(unsigned long) store_prefix();
-
- /* Stack for interrupt/machine check handler */
- lc->panic_stack = S390_lowcore.panic_stack;
-
/* Disable prefixing */
set_prefix(0);
/* Disable lowcore protection */
- __ctl_clear_bit(0,28);
-
- /* Set new machine check handler */
- S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
- S390_lowcore.mcck_new_psw.addr =
- (unsigned long) s390_base_mcck_handler;
-
- /* Set new program check handler */
- S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
- S390_lowcore.program_new_psw.addr =
- (unsigned long) s390_base_pgm_handler;
-
- do_reset_calls();
+ __ctl_clear_bit(0, 28);
+ diag308_reset();
}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index a80050bbe2e4..b7020e721ae3 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -20,7 +20,6 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/smp.h>
-#include <asm/reset.h>
#include <asm/ipl.h>
#include <asm/diag.h>
#include <asm/elf.h>
@@ -253,6 +252,7 @@ void machine_shutdown(void)
void machine_crash_shutdown(struct pt_regs *regs)
{
+ set_os_info_reipl_block();
}
/*
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 14867ec5f726..f236ce8757e8 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -72,7 +72,7 @@ static int __init nospectre_v2_setup_early(char *str)
}
early_param("nospectre_v2", nospectre_v2_setup_early);
-static int __init spectre_v2_auto_early(void)
+void __init nospec_auto_detect(void)
{
if (IS_ENABLED(CC_USING_EXPOLINE)) {
/*
@@ -87,11 +87,7 @@ static int __init spectre_v2_auto_early(void)
* nobp setting decides what is done, this depends on the
* CONFIG_KERNEL_NP option and the nobp/nospec parameters.
*/
- return 0;
}
-#ifdef CONFIG_EXPOLINE_AUTO
-early_initcall(spectre_v2_auto_early);
-#endif
static int __init spectre_v2_setup_early(char *str)
{
@@ -102,7 +98,7 @@ static int __init spectre_v2_setup_early(char *str)
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;
if (str && !strncmp(str, "auto", 4))
- spectre_v2_auto_early();
+ nospec_auto_detect();
return 0;
}
early_param("spectre_v2", spectre_v2_setup_early);
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index a40ebd1d29d0..73cc3750f0d3 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -75,90 +75,3 @@ ENTRY(store_status)
.align 8
.Lclkcmp: .quad 0x0000000000000000
.previous
-
-#
-# do_reipl_asm
-# Parameter: r2 = schid of reipl device
-#
-
-ENTRY(do_reipl_asm)
- basr %r13,0
-.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13)
-.Lpg1: lgr %r3,%r2
- larl %r2,.Lstatus
- brasl %r14,store_status
-
-.Lstatus: lctlg %c6,%c6,.Lall-.Lpg0(%r13)
- lgr %r1,%r2
- mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
- stsch .Lschib-.Lpg0(%r13)
- oi .Lschib+5-.Lpg0(%r13),0x84
-.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01
- msch .Lschib-.Lpg0(%r13)
- lghi %r0,5
-.Lssch: ssch .Liplorb-.Lpg0(%r13)
- jz .L001
- brct %r0,.Lssch
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
-.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13)
-.Lcont: c %r1,__LC_SUBCHANNEL_ID
- jnz .Ltpi
- clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
- jnz .Ltpi
- tsch .Liplirb-.Lpg0(%r13)
- tm .Liplirb+9-.Lpg0(%r13),0xbf
- jz .L002
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3
- jz .L003
- bas %r14,.Ldisab-.Lpg0(%r13)
-.L003: st %r1,__LC_SUBCHANNEL_ID
- lhi %r1,0 # mode 0 = esa
- slr %r0,%r0 # set cpuid to zero
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
- lpsw 0
-.Ldisab: sll %r14,1
- srl %r14,1 # need to kill hi bit to avoid specification exceptions.
- st %r14,.Ldispsw+12-.Lpg0(%r13)
- lpswe .Ldispsw-.Lpg0(%r13)
- .align 8
-.Lall: .quad 0x00000000ff000000
- .align 16
-/*
- * These addresses have to be 31 bit otherwise
- * the sigp will throw a specifcation exception
- * when switching to ESA mode as bit 31 be set
- * in the ESA psw.
- * Bit 31 of the addresses has to be 0 for the
- * 31bit lpswe instruction a fact they appear to have
- * omitted from the pop.
- */
-.Lnewpsw: .quad 0x0000000080000000
- .quad .Lpg1
-.Lpcnew: .quad 0x0000000080000000
- .quad .Lecs
-.Lionew: .quad 0x0000000080000000
- .quad .Lcont
-.Lwaitpsw: .quad 0x0202000080000000
- .quad .Ltpi
-.Ldispsw: .quad 0x0002000080000000
- .quad 0x0000000000000000
-.Liplccws: .long 0x02000000,0x60000018
- .long 0x08000008,0x20000001
-.Liplorb: .long 0x0049504c,0x0040ff80
- .long 0x00000000+.Liplccws
-.Lschib: .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
-.Liplirb: .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
- .long 0x00000000,0x00000000
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index 9c2c96da23d0..c97c2d40fe15 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -29,33 +29,6 @@
ENTRY(relocate_kernel)
basr %r13,0 # base address
.base:
- stctg %c0,%c15,ctlregs-.base(%r13)
- stmg %r0,%r15,gprregs-.base(%r13)
- lghi %r0,3
- sllg %r0,%r0,31
- stg %r0,0x1d0(%r0)
- la %r0,.back_pgm-.base(%r13)
- stg %r0,0x1d8(%r0)
- la %r1,load_psw-.base(%r13)
- mvc 0(8,%r0),0(%r1)
- la %r0,.back-.base(%r13)
- st %r0,4(%r0)
- oi 4(%r0),0x80
- lghi %r0,0
- diag %r0,%r0,0x308
- .back:
- lhi %r1,1 # mode 1 = esame
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
- sam64 # switch to 64 bit addressing mode
- basr %r13,0
- .back_base:
- oi have_diag308-.back_base(%r13),0x01
- lctlg %c0,%c15,ctlregs-.back_base(%r13)
- lmg %r0,%r15,gprregs-.back_base(%r13)
- j .top
- .back_pgm:
- lmg %r0,%r15,gprregs-.base(%r13)
- .top:
lghi %r7,PAGE_SIZE # load PAGE_SIZE in r7
lghi %r9,PAGE_SIZE # load PAGE_SIZE in r9
lg %r5,0(%r2) # read another word for indirection page
@@ -64,55 +37,36 @@ ENTRY(relocate_kernel)
je .indir_check # NO, goto "indir_check"
lgr %r6,%r5 # r6 = r5
nill %r6,0xf000 # mask it out and...
- j .top # ...next iteration
+ j .base # ...next iteration
.indir_check:
tml %r5,0x2 # is it a indirection page?
je .done_test # NO, goto "done_test"
nill %r5,0xf000 # YES, mask out,
lgr %r2,%r5 # move it into the right register,
- j .top # and read next...
+ j .base # and read next...
.done_test:
tml %r5,0x4 # is it the done indicator?
je .source_test # NO! Well, then it should be the source indicator...
j .done # ok, lets finish it here...
.source_test:
tml %r5,0x8 # it should be a source indicator...
- je .top # NO, ignore it...
+ je .base # NO, ignore it...
lgr %r8,%r5 # r8 = r5
nill %r8,0xf000 # masking
0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
jo 0b
- j .top
+ j .base
.done:
sgr %r0,%r0 # clear register r0
la %r4,load_psw-.base(%r13) # load psw-address into the register
o %r3,4(%r4) # or load address into psw
st %r3,4(%r4)
mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
- tm have_diag308-.base(%r13),0x01
- jno .no_diag308
diag %r0,%r0,0x308
- .no_diag308:
- sam31 # 31 bit mode
- sr %r1,%r1 # erase register r1
- sr %r2,%r2 # erase register r2
- sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
- lpsw 0 # hopefully start new kernel...
.align 8
load_psw:
.long 0x00080000,0x80000000
- ctlregs:
- .rept 16
- .quad 0
- .endr
- gprregs:
- .rept 16
- .quad 0
- .endr
- have_diag308:
- .byte 0
- .align 8
relocate_kernel_end:
.align 8
.globl relocate_kernel_len
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7b58a712f818..fc3b4aa185cc 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -894,6 +894,9 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = (unsigned long) _end;
+ if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
+ nospec_auto_detect();
+
parse_early_param();
#ifdef CONFIG_CRASH_DUMP
/* Deactivate elfcorehdr= kernel parameter */
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 05c8abd864f1..2809d11c7a28 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -220,6 +220,8 @@ static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 4feb7c86f4ac..46b2481eec90 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -126,12 +126,6 @@ static void __init sh_of_setup(char **cmdline_p)
{
struct device_node *root;
-#ifdef CONFIG_USE_BUILTIN_DTB
- unflatten_and_copy_device_tree();
-#else
- unflatten_device_tree();
-#endif
-
board_time_init = sh_of_time_init;
sh_mv.mv_name = "Unknown SH model";
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 5976a2c8a3e3..e5b7437ab4af 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -49,6 +49,8 @@ static void pcibios_scanbus(struct pci_channel *hose)
for (i = 0; i < hose->nr_resources; i++) {
res = hose->resources + i;
offset = 0;
+ if (res->flags & IORESOURCE_DISABLED)
+ continue;
if (res->flags & IORESOURCE_IO)
offset = hose->io_offset;
else if (res->flags & IORESOURCE_MEM)
@@ -102,6 +104,9 @@ int register_pci_controller(struct pci_channel *hose)
for (i = 0; i < hose->nr_resources; i++) {
struct resource *res = hose->resources + i;
+ if (res->flags & IORESOURCE_DISABLED)
+ continue;
+
if (res->flags & IORESOURCE_IO) {
if (request_resource(&ioport_resource, res) < 0)
goto out;
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index 0167a7352719..382e7ecf4c82 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -19,6 +19,7 @@
#include <linux/clk.h>
#include <linux/sh_clk.h>
#include <linux/sh_intc.h>
+#include <cpu/sh7786.h>
#include "pcie-sh7786.h"
#include <asm/sizes.h>
@@ -32,6 +33,7 @@ struct sh7786_pcie_port {
static struct sh7786_pcie_port *sh7786_pcie_ports;
static unsigned int nr_ports;
+static unsigned long dma_pfn_offset;
static struct sh7786_pcie_hwops {
int (*core_init)(void);
@@ -40,73 +42,73 @@ static struct sh7786_pcie_hwops {
static struct resource sh7786_pci0_resources[] = {
{
- .name = "PCIe0 IO",
+ .name = "PCIe0 MEM 0",
.start = 0xfd000000,
.end = 0xfd000000 + SZ_8M - 1,
- .flags = IORESOURCE_IO,
+ .flags = IORESOURCE_MEM,
}, {
- .name = "PCIe0 MEM 0",
+ .name = "PCIe0 MEM 1",
.start = 0xc0000000,
.end = 0xc0000000 + SZ_512M - 1,
.flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
}, {
- .name = "PCIe0 MEM 1",
+ .name = "PCIe0 MEM 2",
.start = 0x10000000,
.end = 0x10000000 + SZ_64M - 1,
.flags = IORESOURCE_MEM,
}, {
- .name = "PCIe0 MEM 2",
+ .name = "PCIe0 IO",
.start = 0xfe100000,
.end = 0xfe100000 + SZ_1M - 1,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_IO,
},
};
static struct resource sh7786_pci1_resources[] = {
{
- .name = "PCIe1 IO",
+ .name = "PCIe1 MEM 0",
.start = 0xfd800000,
.end = 0xfd800000 + SZ_8M - 1,
- .flags = IORESOURCE_IO,
+ .flags = IORESOURCE_MEM,
}, {
- .name = "PCIe1 MEM 0",
+ .name = "PCIe1 MEM 1",
.start = 0xa0000000,
.end = 0xa0000000 + SZ_512M - 1,
.flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
}, {
- .name = "PCIe1 MEM 1",
+ .name = "PCIe1 MEM 2",
.start = 0x30000000,
.end = 0x30000000 + SZ_256M - 1,
.flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
}, {
- .name = "PCIe1 MEM 2",
+ .name = "PCIe1 IO",
.start = 0xfe300000,
.end = 0xfe300000 + SZ_1M - 1,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_IO,
},
};
static struct resource sh7786_pci2_resources[] = {
{
- .name = "PCIe2 IO",
+ .name = "PCIe2 MEM 0",
.start = 0xfc800000,
.end = 0xfc800000 + SZ_4M - 1,
- .flags = IORESOURCE_IO,
+ .flags = IORESOURCE_MEM,
}, {
- .name = "PCIe2 MEM 0",
+ .name = "PCIe2 MEM 1",
.start = 0x80000000,
.end = 0x80000000 + SZ_512M - 1,
.flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
}, {
- .name = "PCIe2 MEM 1",
+ .name = "PCIe2 MEM 2",
.start = 0x20000000,
.end = 0x20000000 + SZ_256M - 1,
.flags = IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
}, {
- .name = "PCIe2 MEM 2",
+ .name = "PCIe2 IO",
.start = 0xfcd00000,
.end = 0xfcd00000 + SZ_1M - 1,
- .flags = IORESOURCE_MEM,
+ .flags = IORESOURCE_IO,
},
};
@@ -301,7 +303,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
{
struct pci_channel *chan = port->hose;
unsigned int data;
- phys_addr_t memphys;
+ phys_addr_t memstart, memend;
size_t memsize;
int ret, i, win;
@@ -357,15 +359,26 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
data |= (0xff << 16);
pci_write_reg(chan, data, SH4A_PCIEMACCTLR);
- memphys = __pa(memory_start);
- memsize = roundup_pow_of_two(memory_end - memory_start);
+ memstart = __pa(memory_start);
+ memend = __pa(memory_end);
+ memsize = roundup_pow_of_two(memend - memstart);
+
+ /*
+ * The start address must be aligned on its size. So we round
+ * it down, and then recalculate the size so that it covers
+ * the entire memory.
+ */
+ memstart = ALIGN_DOWN(memstart, memsize);
+ memsize = roundup_pow_of_two(memend - memstart);
+
+ dma_pfn_offset = memstart >> PAGE_SHIFT;
/*
* If there's more than 512MB of memory, we need to roll over to
* LAR1/LAMR1.
*/
if (memsize > SZ_512M) {
- pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1);
+ pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1);
pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1,
SH4A_PCIELAMR1);
memsize = SZ_512M;
@@ -381,7 +394,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
* LAR0/LAMR0 covers up to the first 512MB, which is enough to
* cover all of lowmem on most platforms.
*/
- pci_write_reg(chan, memphys, SH4A_PCIELAR0);
+ pci_write_reg(chan, memstart, SH4A_PCIELAR0);
pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0);
/* Finish initialization */
@@ -438,6 +451,9 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
* mode, so just skip them entirely.
*/
if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode())
+ res->flags |= IORESOURCE_DISABLED;
+
+ if (res->flags & IORESOURCE_DISABLED)
continue;
pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win));
@@ -472,6 +488,11 @@ int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
return evt2irq(0xae0);
}
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ pdev->dev.dma_pfn_offset = dma_pfn_offset;
+}
+
static int __init sh7786_pcie_core_init(void)
{
/* Return the number of ports */
@@ -527,6 +548,7 @@ static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = {
static int __init sh7786_pcie_init(void)
{
struct clk *platclk;
+ u32 mm_sel;
int i;
printk(KERN_NOTICE "PCI: Starting initialization.\n");
@@ -560,6 +582,16 @@ static int __init sh7786_pcie_init(void)
clk_enable(platclk);
+ mm_sel = sh7786_mm_sel();
+
+ /*
+ * Depending on the MMSELR register value, the PCIe0 MEM 1
+ * area may not be available. See Table 13.11 of the SH7786
+ * datasheet.
+ */
+ if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6)
+ sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED;
+
printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports);
for (i = 0; i < nr_ports; i++) {
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 15bf07bfa96b..6d192f4908a7 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -37,10 +37,7 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
pagefault_disable();
do {
- if (op == FUTEX_OP_SET)
- ret = oldval = 0;
- else
- ret = get_user(oldval, uaddr);
+ ret = get_user(oldval, uaddr);
if (ret) break;
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h
index 0df09e638f09..96b8cb1f754a 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7786.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h
@@ -14,6 +14,8 @@
#ifndef __CPU_SH7786_H__
#define __CPU_SH7786_H__
+#include <linux/io.h>
+
enum {
/* PA */
GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
@@ -131,4 +133,9 @@ enum {
GPIO_FN_IRL7, GPIO_FN_IRL6, GPIO_FN_IRL5, GPIO_FN_IRL4,
};
+static inline u32 sh7786_mm_sel(void)
+{
+ return __raw_readl(0xFC400020) & 0x7;
+}
+
#endif /* __CPU_SH7786_H__ */
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 62b485107eae..178457d7620c 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -16,7 +16,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- dma_addr_t addr = page_to_phys(page) + offset;
+ dma_addr_t addr = page_to_phys(page) + offset
+ - PFN_PHYS(dev->dma_pfn_offset);
WARN_ON(size == 0);
@@ -36,12 +37,14 @@ static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
WARN_ON(nents == 0 || sg[0].length == 0);
for_each_sg(sg, s, nents, i) {
+ dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
+
BUG_ON(!sg_page(s));
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
sh_sync_dma_for_device(sg_virt(s), s->length, dir);
- s->dma_address = sg_phys(s);
+ s->dma_address = sg_phys(s) - offset;
s->dma_length = s->length;
}
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index c001f782c5f1..28cc61216b64 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -255,7 +255,7 @@ debug_trap:
mov.l @r8, r8
jsr @r8
nop
- bra __restore_all
+ bra ret_from_exception
nop
CFI_ENDPROC
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index b95c411d0333..d34e998b809f 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -330,6 +330,14 @@ void __init setup_arch(char **cmdline_p)
/* Let earlyprintk output early console messages */
early_platform_driver_probe("earlyprintk", 1, 1);
+#ifdef CONFIG_OF_FLATTREE
+#ifdef CONFIG_USE_BUILTIN_DTB
+ unflatten_and_copy_device_tree();
+#else
+ unflatten_device_tree();
+#endif
+#endif
+
paging_init();
#ifdef CONFIG_DUMMY_CONSOLE
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 6ea3aab508f2..8ce98691d822 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -59,7 +59,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
- *dma_handle = virt_to_phys(ret);
+ *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
return ret_nocache;
}
@@ -69,7 +69,7 @@ void dma_generic_free_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
int order = get_order(size);
- unsigned long pfn = dma_handle >> PAGE_SHIFT;
+ unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset;
int k;
for (k = 0; k < (1 << order); k++)
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index 8045b5bb7075..56c86ca98ecf 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -160,6 +160,8 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 357b6047653a..aee6dba83d0e 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -193,6 +193,10 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1;
}
+/*
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
+ */
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index a6615d8864f7..dd0b5a92ffd0 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -29,9 +29,7 @@ vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles)
-targets += $(vdso_img_sodbg)
-.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
- $(vdso_img-y:%=$(obj)/vdso%.so)
+targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
export CPPFLAGS_vdso.lds += -P -C
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index e871af24d9cd..c390f3deb0dc 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -109,6 +109,17 @@ config UML_NET_DAEMON
more than one without conflict. If you don't need UML networking,
say N.
+config UML_NET_VECTOR
+ bool "Vector I/O high performance network devices"
+ depends on UML_NET
+ help
+ This User-Mode Linux network driver uses multi-message send
+ and receive functions. The host running the UML guest must have
+ a linux kernel version above 3.0 and a libc version > 2.13.
+ This driver provides tap, raw, gre and l2tpv3 network transports
+ with up to 4 times higher network throughput than the UML network
+ drivers.
+
config UML_NET_VDE
bool "VDE transport"
depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..16b3cebddafb 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,6 +9,7 @@
slip-objs := slip_kern.o slip_user.o
slirp-objs := slirp_kern.o slirp_user.o
daemon-objs := daemon_kern.o daemon_user.o
+vector-objs := vector_kern.o vector_user.o vector_transports.o
umcast-objs := umcast_kern.o umcast_user.o
net-objs := net_kern.o net_user.o
mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
+obj-$(CONFIG_UML_NET_VECTOR) += vector.o
obj-$(CONFIG_UML_NET_VDE) += vde.o
obj-$(CONFIG_UML_NET_MCAST) += umcast.o
obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
obj-$(CONFIG_UML_RANDOM) += random.o
# pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c67afba..05588f9466c7 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
return err;
}
-/* Items are added in IRQ context, when free_irq can't be called, and
- * removed in process context, when it can.
- * This handles interrupt sources which disappear, and which need to
- * be permanently disabled. This is discovered in IRQ context, but
- * the freeing of the IRQ must be done later.
- */
-static DEFINE_SPINLOCK(irqs_to_free_lock);
-static LIST_HEAD(irqs_to_free);
-
-void free_irqs(void)
-{
- struct chan *chan;
- LIST_HEAD(list);
- struct list_head *ele;
- unsigned long flags;
-
- spin_lock_irqsave(&irqs_to_free_lock, flags);
- list_splice_init(&irqs_to_free, &list);
- spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-
- list_for_each(ele, &list) {
- chan = list_entry(ele, struct chan, free_list);
-
- if (chan->input && chan->enabled)
- um_free_irq(chan->line->driver->read_irq, chan);
- if (chan->output && chan->enabled)
- um_free_irq(chan->line->driver->write_irq, chan);
- chan->enabled = 0;
- }
-}
-
static void close_one_chan(struct chan *chan, int delay_free_irq)
{
- unsigned long flags;
-
if (!chan->opened)
return;
- if (delay_free_irq) {
- spin_lock_irqsave(&irqs_to_free_lock, flags);
- list_add(&chan->free_list, &irqs_to_free);
- spin_unlock_irqrestore(&irqs_to_free_lock, flags);
- }
- else {
- if (chan->input && chan->enabled)
- um_free_irq(chan->line->driver->read_irq, chan);
- if (chan->output && chan->enabled)
- um_free_irq(chan->line->driver->write_irq, chan);
- chan->enabled = 0;
- }
+ /* we can safely call free now - it will be marked
+ * as free and freed once the IRQ stopped processing
+ */
+ if (chan->input && chan->enabled)
+ um_free_irq(chan->line->driver->read_irq, chan);
+ if (chan->output && chan->enabled)
+ um_free_irq(chan->line->driver->write_irq, chan);
+ chan->enabled = 0;
if (chan->ops->close != NULL)
(*chan->ops->close)(chan->fd, chan->data);
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 366e57f5e8d6..8d80b27502e6 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
if (err)
return err;
if (output)
- err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+ err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
line_write_interrupt, IRQF_SHARED,
driver->write_irq_name, data);
return err;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index b305f8247909..3ef1b48e064a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
#endif
}
-static void setup_etheraddr(struct net_device *dev, char *str)
+void uml_net_setup_etheraddr(struct net_device *dev, char *str)
{
unsigned char *addr = dev->dev_addr;
char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
*/
snprintf(dev->name, sizeof(dev->name), "eth%d", n);
- setup_etheraddr(dev, mac);
+ uml_net_setup_etheraddr(dev, mac);
printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 37c51a6be690..778a0e52d5a5 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,6 +13,7 @@
#include <linux/miscdevice.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
+#include <init.h>
#include <irq_kern.h>
#include <os.h>
@@ -154,7 +155,14 @@ err_out_cleanup_hw:
/*
* rng_cleanup - shutdown RNG module
*/
-static void __exit rng_cleanup (void)
+
+static void cleanup(void)
+{
+ free_irq_by_fd(random_fd);
+ os_close_file(random_fd);
+}
+
+static void __exit rng_cleanup(void)
{
os_close_file(random_fd);
misc_deregister (&rng_miscdev);
@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
module_init (rng_init);
module_exit (rng_cleanup);
+__uml_exitcall(cleanup);
MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b55fe9bf5d3e..d4e8c497ae86 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1587,11 +1587,11 @@ int io_thread(void *arg)
do {
res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
- if (res > 0) {
+ if (res >= 0) {
written += res;
} else {
if (res != -EAGAIN) {
- printk("io_thread - read failed, fd = %d, "
+ printk("io_thread - write failed, fd = %d, "
"err = %d\n", kernel_fd, -n);
}
}
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644
index 000000000000..02168fe25105
--- /dev/null
+++ b/arch/um/drivers/vector_kern.c
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ * Licensed under the GPL.
+ */
+
+#include <linux/version.h>
+#include <linux/bootmem.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <net_kern.h>
+#include <os.h>
+#include "mconsole_kern.h"
+#include "vector_user.h"
+#include "vector_kern.h"
+
+/*
+ * Adapted from network devices with the following major changes:
+ * All transports are static - simplifies the code significantly
+ * Multiple FDs/IRQs per device
+ * Vector IO optionally used for read/write, falling back to legacy
+ * based on configuration and/or availability
+ * Configuration is no longer positional - L2TPv3 and GRE require up to
+ * 10 parameters, passing this as positional is not fit for purpose.
+ * Only socket transports are supported
+ */
+
+
+#define DRIVER_NAME "uml-vector"
+#define DRIVER_VERSION "01"
+struct vector_cmd_line_arg {
+ struct list_head list;
+ int unit;
+ char *arguments;
+};
+
+struct vector_device {
+ struct list_head list;
+ struct net_device *dev;
+ struct platform_device pdev;
+ int unit;
+ int opened;
+};
+
+static LIST_HEAD(vec_cmd_line);
+
+static DEFINE_SPINLOCK(vector_devices_lock);
+static LIST_HEAD(vector_devices);
+
+static int driver_registered;
+
+static void vector_eth_configure(int n, struct arglist *def);
+
+/* Argument accessors to set variables (and/or set default values)
+ * mtu, buffer sizing, default headroom, etc
+ */
+
+#define DEFAULT_HEADROOM 2
+#define SAFETY_MARGIN 32
+#define DEFAULT_VECTOR_SIZE 64
+#define TX_SMALL_PACKET 128
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
+
+static const struct {
+ const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+ { "rx_queue_max" },
+ { "rx_queue_running_average" },
+ { "tx_queue_max" },
+ { "tx_queue_running_average" },
+ { "rx_encaps_errors" },
+ { "tx_timeout_count" },
+ { "tx_restart_queue" },
+ { "tx_kicks" },
+ { "tx_flow_control_xon" },
+ { "tx_flow_control_xoff" },
+ { "rx_csum_offload_good" },
+ { "rx_csum_offload_errors"},
+ { "sg_ok"},
+ { "sg_linearized"},
+};
+
+#define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys)
+
+static void vector_reset_stats(struct vector_private *vp)
+{
+ vp->estats.rx_queue_max = 0;
+ vp->estats.rx_queue_running_average = 0;
+ vp->estats.tx_queue_max = 0;
+ vp->estats.tx_queue_running_average = 0;
+ vp->estats.rx_encaps_errors = 0;
+ vp->estats.tx_timeout_count = 0;
+ vp->estats.tx_restart_queue = 0;
+ vp->estats.tx_kicks = 0;
+ vp->estats.tx_flow_control_xon = 0;
+ vp->estats.tx_flow_control_xoff = 0;
+ vp->estats.sg_ok = 0;
+ vp->estats.sg_linearized = 0;
+}
+
+static int get_mtu(struct arglist *def)
+{
+ char *mtu = uml_vector_fetch_arg(def, "mtu");
+ long result;
+
+ if (mtu != NULL) {
+ if (kstrtoul(mtu, 10, &result) == 0)
+ return result;
+ }
+ return ETH_MAX_PACKET;
+}
+
+static int get_depth(struct arglist *def)
+{
+ char *mtu = uml_vector_fetch_arg(def, "depth");
+ long result;
+
+ if (mtu != NULL) {
+ if (kstrtoul(mtu, 10, &result) == 0)
+ return result;
+ }
+ return DEFAULT_VECTOR_SIZE;
+}
+
+static int get_headroom(struct arglist *def)
+{
+ char *mtu = uml_vector_fetch_arg(def, "headroom");
+ long result;
+
+ if (mtu != NULL) {
+ if (kstrtoul(mtu, 10, &result) == 0)
+ return result;
+ }
+ return DEFAULT_HEADROOM;
+}
+
+static int get_req_size(struct arglist *def)
+{
+ char *gro = uml_vector_fetch_arg(def, "gro");
+ long result;
+
+ if (gro != NULL) {
+ if (kstrtoul(gro, 10, &result) == 0) {
+ if (result > 0)
+ return 65536;
+ }
+ }
+ return get_mtu(def) + ETH_HEADER_OTHER +
+ get_headroom(def) + SAFETY_MARGIN;
+}
+
+
+static int get_transport_options(struct arglist *def)
+{
+ char *transport = uml_vector_fetch_arg(def, "transport");
+ char *vector = uml_vector_fetch_arg(def, "vec");
+
+ int vec_rx = VECTOR_RX;
+ int vec_tx = VECTOR_TX;
+ long parsed;
+
+ if (vector != NULL) {
+ if (kstrtoul(vector, 10, &parsed) == 0) {
+ if (parsed == 0) {
+ vec_rx = 0;
+ vec_tx = 0;
+ }
+ }
+ }
+
+
+ if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+ return (vec_rx | VECTOR_BPF);
+ if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+ return (vec_rx | vec_tx);
+ return (vec_rx | vec_tx);
+}
+
+
+/* A mini-buffer for packet drop read
+ * All of our supported transports are datagram oriented and we always
+ * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
+ * than the packet size it still counts as full packet read and will
+ * clean the incoming stream to keep sigio/epoll happy
+ */
+
+#define DROP_BUFFER_SIZE 32
+
+static char *drop_buffer;
+
+/* Array backed queues optimized for bulk enqueue/dequeue and
+ * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
+ * For more details and full design rationale see
+ * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
+ */
+
+
+/*
+ * Advance the mmsg queue head by n = advance. Resets the queue to
+ * maximum enqueue/dequeue-at-once capacity if possible. Called by
+ * dequeuers. Caller must hold the head_lock!
+ */
+
+static int vector_advancehead(struct vector_queue *qi, int advance)
+{
+ int queue_depth;
+
+ qi->head =
+ (qi->head + advance)
+ % qi->max_depth;
+
+
+ spin_lock(&qi->tail_lock);
+ qi->queue_depth -= advance;
+
+ /* we are at 0, use this to
+ * reset head and tail so we can use max size vectors
+ */
+
+ if (qi->queue_depth == 0) {
+ qi->head = 0;
+ qi->tail = 0;
+ }
+ queue_depth = qi->queue_depth;
+ spin_unlock(&qi->tail_lock);
+ return queue_depth;
+}
+
+/* Advance the queue tail by n = advance.
+ * This is called by enqueuers which should hold the
+ * head lock already
+ */
+
+static int vector_advancetail(struct vector_queue *qi, int advance)
+{
+ int queue_depth;
+
+ qi->tail =
+ (qi->tail + advance)
+ % qi->max_depth;
+ spin_lock(&qi->head_lock);
+ qi->queue_depth += advance;
+ queue_depth = qi->queue_depth;
+ spin_unlock(&qi->head_lock);
+ return queue_depth;
+}
+
+static int prep_msg(struct vector_private *vp,
+ struct sk_buff *skb,
+ struct iovec *iov)
+{
+ int iov_index = 0;
+ int nr_frags, frag;
+ skb_frag_t *skb_frag;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ if (nr_frags > MAX_IOV_SIZE) {
+ if (skb_linearize(skb) != 0)
+ goto drop;
+ }
+ if (vp->header_size > 0) {
+ iov[iov_index].iov_len = vp->header_size;
+ vp->form_header(iov[iov_index].iov_base, skb, vp);
+ iov_index++;
+ }
+ iov[iov_index].iov_base = skb->data;
+ if (nr_frags > 0) {
+ iov[iov_index].iov_len = skb->len - skb->data_len;
+ vp->estats.sg_ok++;
+ } else
+ iov[iov_index].iov_len = skb->len;
+ iov_index++;
+ for (frag = 0; frag < nr_frags; frag++) {
+ skb_frag = &skb_shinfo(skb)->frags[frag];
+ iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+ iov[iov_index].iov_len = skb_frag_size(skb_frag);
+ iov_index++;
+ }
+ return iov_index;
+drop:
+ return -1;
+}
+/*
+ * Generic vector enqueue with support for forming headers using transport
+ * specific callback. Allows GRE, L2TPv3, RAW and other transports
+ * to use a common enqueue procedure in vector mode
+ */
+
+static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
+{
+ struct vector_private *vp = netdev_priv(qi->dev);
+ int queue_depth;
+ int packet_len;
+ struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+ int iov_count;
+
+ spin_lock(&qi->tail_lock);
+ spin_lock(&qi->head_lock);
+ queue_depth = qi->queue_depth;
+ spin_unlock(&qi->head_lock);
+
+ if (skb)
+ packet_len = skb->len;
+
+ if (queue_depth < qi->max_depth) {
+
+ *(qi->skbuff_vector + qi->tail) = skb;
+ mmsg_vector += qi->tail;
+ iov_count = prep_msg(
+ vp,
+ skb,
+ mmsg_vector->msg_hdr.msg_iov
+ );
+ if (iov_count < 1)
+ goto drop;
+ mmsg_vector->msg_hdr.msg_iovlen = iov_count;
+ mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
+ mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+ queue_depth = vector_advancetail(qi, 1);
+ } else
+ goto drop;
+ spin_unlock(&qi->tail_lock);
+ return queue_depth;
+drop:
+ qi->dev->stats.tx_dropped++;
+ if (skb != NULL) {
+ packet_len = skb->len;
+ dev_consume_skb_any(skb);
+ netdev_completed_queue(qi->dev, 1, packet_len);
+ }
+ spin_unlock(&qi->tail_lock);
+ return queue_depth;
+}
+
+static int consume_vector_skbs(struct vector_queue *qi, int count)
+{
+ struct sk_buff *skb;
+ int skb_index;
+ int bytes_compl = 0;
+
+ for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
+ skb = *(qi->skbuff_vector + skb_index);
+ /* mark as empty to ensure correct destruction if
+ * needed
+ */
+ bytes_compl += skb->len;
+ *(qi->skbuff_vector + skb_index) = NULL;
+ dev_consume_skb_any(skb);
+ }
+ qi->dev->stats.tx_bytes += bytes_compl;
+ qi->dev->stats.tx_packets += count;
+ netdev_completed_queue(qi->dev, count, bytes_compl);
+ return vector_advancehead(qi, count);
+}
+
+/*
+ * Generic vector deque via sendmmsg with support for forming headers
+ * using transport specific callback. Allows GRE, L2TPv3, RAW and
+ * other transports to use a common dequeue procedure in vector mode
+ */
+
+
+static int vector_send(struct vector_queue *qi)
+{
+ struct vector_private *vp = netdev_priv(qi->dev);
+ struct mmsghdr *send_from;
+ int result = 0, send_len, queue_depth = qi->max_depth;
+
+ if (spin_trylock(&qi->head_lock)) {
+ if (spin_trylock(&qi->tail_lock)) {
+ /* update queue_depth to current value */
+ queue_depth = qi->queue_depth;
+ spin_unlock(&qi->tail_lock);
+ while (queue_depth > 0) {
+ /* Calculate the start of the vector */
+ send_len = queue_depth;
+ send_from = qi->mmsg_vector;
+ send_from += qi->head;
+ /* Adjust vector size if wraparound */
+ if (send_len + qi->head > qi->max_depth)
+ send_len = qi->max_depth - qi->head;
+ /* Try to TX as many packets as possible */
+ if (send_len > 0) {
+ result = uml_vector_sendmmsg(
+ vp->fds->tx_fd,
+ send_from,
+ send_len,
+ 0
+ );
+ vp->in_write_poll =
+ (result != send_len);
+ }
+ /* For some of the sendmmsg error scenarios
+ * we may end being unsure in the TX success
+ * for all packets. It is safer to declare
+ * them all TX-ed and blame the network.
+ */
+ if (result < 0) {
+ if (net_ratelimit())
+ netdev_err(vp->dev, "sendmmsg err=%i\n",
+ result);
+ result = send_len;
+ }
+ if (result > 0) {
+ queue_depth =
+ consume_vector_skbs(qi, result);
+ /* This is equivalent to an TX IRQ.
+ * Restart the upper layers to feed us
+ * more packets.
+ */
+ if (result > vp->estats.tx_queue_max)
+ vp->estats.tx_queue_max = result;
+ vp->estats.tx_queue_running_average =
+ (vp->estats.tx_queue_running_average + result) >> 1;
+ }
+ netif_trans_update(qi->dev);
+ netif_wake_queue(qi->dev);
+ /* if TX is busy, break out of the send loop,
+ * poll write IRQ will reschedule xmit for us
+ */
+ if (result != send_len) {
+ vp->estats.tx_restart_queue++;
+ break;
+ }
+ }
+ }
+ spin_unlock(&qi->head_lock);
+ } else {
+ tasklet_schedule(&vp->tx_poll);
+ }
+ return queue_depth;
+}
+
+/* Queue destructor. Deliberately stateless so we can use
+ * it in queue cleanup if initialization fails.
+ */
+
+static void destroy_queue(struct vector_queue *qi)
+{
+ int i;
+ struct iovec *iov;
+ struct vector_private *vp = netdev_priv(qi->dev);
+ struct mmsghdr *mmsg_vector;
+
+ if (qi == NULL)
+ return;
+ /* deallocate any skbuffs - we rely on any unused to be
+ * set to NULL.
+ */
+ if (qi->skbuff_vector != NULL) {
+ for (i = 0; i < qi->max_depth; i++) {
+ if (*(qi->skbuff_vector + i) != NULL)
+ dev_kfree_skb_any(*(qi->skbuff_vector + i));
+ }
+ kfree(qi->skbuff_vector);
+ }
+ /* deallocate matching IOV structures including header buffs */
+ if (qi->mmsg_vector != NULL) {
+ mmsg_vector = qi->mmsg_vector;
+ for (i = 0; i < qi->max_depth; i++) {
+ iov = mmsg_vector->msg_hdr.msg_iov;
+ if (iov != NULL) {
+ if ((vp->header_size > 0) &&
+ (iov->iov_base != NULL))
+ kfree(iov->iov_base);
+ kfree(iov);
+ }
+ mmsg_vector++;
+ }
+ kfree(qi->mmsg_vector);
+ }
+ kfree(qi);
+}
+
+/*
+ * Queue constructor. Create a queue with a given side.
+ */
+static struct vector_queue *create_queue(
+ struct vector_private *vp,
+ int max_size,
+ int header_size,
+ int num_extra_frags)
+{
+ struct vector_queue *result;
+ int i;
+ struct iovec *iov;
+ struct mmsghdr *mmsg_vector;
+
+ result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
+ if (result == NULL)
+ goto out_fail;
+ result->max_depth = max_size;
+ result->dev = vp->dev;
+ result->mmsg_vector = kmalloc(
+ (sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
+ result->skbuff_vector = kmalloc(
+ (sizeof(void *) * max_size), GFP_KERNEL);
+ if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
+ goto out_fail;
+
+ mmsg_vector = result->mmsg_vector;
+ for (i = 0; i < max_size; i++) {
+ /* Clear all pointers - we use non-NULL as marking on
+ * what to free on destruction
+ */
+ *(result->skbuff_vector + i) = NULL;
+ mmsg_vector->msg_hdr.msg_iov = NULL;
+ mmsg_vector++;
+ }
+ mmsg_vector = result->mmsg_vector;
+ result->max_iov_frags = num_extra_frags;
+ for (i = 0; i < max_size; i++) {
+ if (vp->header_size > 0)
+ iov = kmalloc(
+ sizeof(struct iovec) * (3 + num_extra_frags),
+ GFP_KERNEL
+ );
+ else
+ iov = kmalloc(
+ sizeof(struct iovec) * (2 + num_extra_frags),
+ GFP_KERNEL
+ );
+ if (iov == NULL)
+ goto out_fail;
+ mmsg_vector->msg_hdr.msg_iov = iov;
+ mmsg_vector->msg_hdr.msg_iovlen = 1;
+ mmsg_vector->msg_hdr.msg_control = NULL;
+ mmsg_vector->msg_hdr.msg_controllen = 0;
+ mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
+ mmsg_vector->msg_hdr.msg_name = NULL;
+ mmsg_vector->msg_hdr.msg_namelen = 0;
+ if (vp->header_size > 0) {
+ iov->iov_base = kmalloc(header_size, GFP_KERNEL);
+ if (iov->iov_base == NULL)
+ goto out_fail;
+ iov->iov_len = header_size;
+ mmsg_vector->msg_hdr.msg_iovlen = 2;
+ iov++;
+ }
+ iov->iov_base = NULL;
+ iov->iov_len = 0;
+ mmsg_vector++;
+ }
+ spin_lock_init(&result->head_lock);
+ spin_lock_init(&result->tail_lock);
+ result->queue_depth = 0;
+ result->head = 0;
+ result->tail = 0;
+ return result;
+out_fail:
+ destroy_queue(result);
+ return NULL;
+}
+
+/*
+ * We do not use the RX queue as a proper wraparound queue for now
+ * This is not necessary because the consumption via netif_rx()
+ * happens in-line. While we can try using the return code of
+ * netif_rx() for flow control there are no drivers doing this today.
+ * For this RX specific use we ignore the tail/head locks and
+ * just read into a prepared queue filled with skbuffs.
+ */
+
+static struct sk_buff *prep_skb(
+ struct vector_private *vp,
+ struct user_msghdr *msg)
+{
+ int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+ struct sk_buff *result;
+ int iov_index = 0, len;
+ struct iovec *iov = msg->msg_iov;
+ int err, nr_frags, frag;
+ skb_frag_t *skb_frag;
+
+ if (vp->req_size <= linear)
+ len = linear;
+ else
+ len = vp->req_size;
+ result = alloc_skb_with_frags(
+ linear,
+ len - vp->max_packet,
+ 3,
+ &err,
+ GFP_ATOMIC
+ );
+ if (vp->header_size > 0)
+ iov_index++;
+ if (result == NULL) {
+ iov[iov_index].iov_base = NULL;
+ iov[iov_index].iov_len = 0;
+ goto done;
+ }
+ skb_reserve(result, vp->headroom);
+ result->dev = vp->dev;
+ skb_put(result, vp->max_packet);
+ result->data_len = len - vp->max_packet;
+ result->len += len - vp->max_packet;
+ skb_reset_mac_header(result);
+ result->ip_summed = CHECKSUM_NONE;
+ iov[iov_index].iov_base = result->data;
+ iov[iov_index].iov_len = vp->max_packet;
+ iov_index++;
+
+ nr_frags = skb_shinfo(result)->nr_frags;
+ for (frag = 0; frag < nr_frags; frag++) {
+ skb_frag = &skb_shinfo(result)->frags[frag];
+ iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+ if (iov[iov_index].iov_base != NULL)
+ iov[iov_index].iov_len = skb_frag_size(skb_frag);
+ else
+ iov[iov_index].iov_len = 0;
+ iov_index++;
+ }
+done:
+ msg->msg_iovlen = iov_index;
+ return result;
+}
+
+
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
+
+static void prep_queue_for_rx(struct vector_queue *qi)
+{
+ struct vector_private *vp = netdev_priv(qi->dev);
+ struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+ void **skbuff_vector = qi->skbuff_vector;
+ int i;
+
+ if (qi->queue_depth == 0)
+ return;
+ for (i = 0; i < qi->queue_depth; i++) {
+ /* it is OK if allocation fails - recvmmsg with NULL data in
+ * iov argument still performs an RX, just drops the packet
+ * This allows us stop faffing around with a "drop buffer"
+ */
+
+ *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
+ skbuff_vector++;
+ mmsg_vector++;
+ }
+ qi->queue_depth = 0;
+}
+
+static struct vector_device *find_device(int n)
+{
+ struct vector_device *device;
+ struct list_head *ele;
+
+ spin_lock(&vector_devices_lock);
+ list_for_each(ele, &vector_devices) {
+ device = list_entry(ele, struct vector_device, list);
+ if (device->unit == n)
+ goto out;
+ }
+ device = NULL;
+ out:
+ spin_unlock(&vector_devices_lock);
+ return device;
+}
+
+static int vector_parse(char *str, int *index_out, char **str_out,
+ char **error_out)
+{
+ int n, len, err;
+ char *start = str;
+
+ len = strlen(str);
+
+ while ((*str != ':') && (strlen(str) > 1))
+ str++;
+ if (*str != ':') {
+ *error_out = "Expected ':' after device number";
+ return -EINVAL;
+ }
+ *str = '\0';
+
+ err = kstrtouint(start, 0, &n);
+ if (err < 0) {
+ *error_out = "Bad device number";
+ return err;
+ }
+
+ str++;
+ if (find_device(n)) {
+ *error_out = "Device already configured";
+ return -EINVAL;
+ }
+
+ *index_out = n;
+ *str_out = str;
+ return 0;
+}
+
+static int vector_config(char *str, char **error_out)
+{
+ int err, n;
+ char *params;
+ struct arglist *parsed;
+
+ err = vector_parse(str, &n, &params, error_out);
+ if (err != 0)
+ return err;
+
+ /* This string is broken up and the pieces used by the underlying
+ * driver. We should copy it to make sure things do not go wrong
+ * later.
+ */
+
+ params = kstrdup(params, GFP_KERNEL);
+ if (params == NULL) {
+ *error_out = "vector_config failed to strdup string";
+ return -ENOMEM;
+ }
+
+ parsed = uml_parse_vector_ifspec(params);
+
+ if (parsed == NULL) {
+ *error_out = "vector_config failed to parse parameters";
+ return -EINVAL;
+ }
+
+ vector_eth_configure(n, parsed);
+ return 0;
+}
+
+static int vector_id(char **str, int *start_out, int *end_out)
+{
+ char *end;
+ int n;
+
+ n = simple_strtoul(*str, &end, 0);
+ if ((*end != '\0') || (end == *str))
+ return -1;
+
+ *start_out = n;
+ *end_out = n;
+ *str = end;
+ return n;
+}
+
+static int vector_remove(int n, char **error_out)
+{
+ struct vector_device *vec_d;
+ struct net_device *dev;
+ struct vector_private *vp;
+
+ vec_d = find_device(n);
+ if (vec_d == NULL)
+ return -ENODEV;
+ dev = vec_d->dev;
+ vp = netdev_priv(dev);
+ if (vp->fds != NULL)
+ return -EBUSY;
+ unregister_netdev(dev);
+ platform_device_unregister(&vec_d->pdev);
+ return 0;
+}
+
+/*
+ * There is no shared per-transport initialization code, so
+ * we will just initialize each interface one by one and
+ * add them to a list
+ */
+
+static struct platform_driver uml_net_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ },
+};
+
+
+static void vector_device_release(struct device *dev)
+{
+ struct vector_device *device = dev_get_drvdata(dev);
+ struct net_device *netdev = device->dev;
+
+ list_del(&device->list);
+ kfree(device);
+ free_netdev(netdev);
+}
+
+/* Bog standard recv using recvmsg - not used normally unless the user
+ * explicitly specifies not to use recvmmsg vector RX.
+ */
+
+static int vector_legacy_rx(struct vector_private *vp)
+{
+ int pkt_len;
+ struct user_msghdr hdr;
+ struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
+ int iovpos = 0;
+ struct sk_buff *skb;
+ int header_check;
+
+ hdr.msg_name = NULL;
+ hdr.msg_namelen = 0;
+ hdr.msg_iov = (struct iovec *) &iov;
+ hdr.msg_control = NULL;
+ hdr.msg_controllen = 0;
+ hdr.msg_flags = 0;
+
+ if (vp->header_size > 0) {
+ iov[0].iov_base = vp->header_rxbuffer;
+ iov[0].iov_len = vp->header_size;
+ }
+
+ skb = prep_skb(vp, &hdr);
+
+ if (skb == NULL) {
+ /* Read a packet into drop_buffer and don't do
+ * anything with it.
+ */
+ iov[iovpos].iov_base = drop_buffer;
+ iov[iovpos].iov_len = DROP_BUFFER_SIZE;
+ hdr.msg_iovlen = 1;
+ vp->dev->stats.rx_dropped++;
+ }
+
+ pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
+
+ if (skb != NULL) {
+ if (pkt_len > vp->header_size) {
+ if (vp->header_size > 0) {
+ header_check = vp->verify_header(
+ vp->header_rxbuffer, skb, vp);
+ if (header_check < 0) {
+ dev_kfree_skb_irq(skb);
+ vp->dev->stats.rx_dropped++;
+ vp->estats.rx_encaps_errors++;
+ return 0;
+ }
+ if (header_check > 0) {
+ vp->estats.rx_csum_offload_good++;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ }
+ pskb_trim(skb, pkt_len - vp->rx_header_size);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ vp->dev->stats.rx_bytes += skb->len;
+ vp->dev->stats.rx_packets++;
+ netif_rx(skb);
+ } else {
+ dev_kfree_skb_irq(skb);
+ }
+ }
+ return pkt_len;
+}
+
+/*
+ * Packet at a time TX which falls back to vector TX if the
+ * underlying transport is busy.
+ */
+
+
+
+static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
+{
+ struct iovec iov[3 + MAX_IOV_SIZE];
+ int iov_count, pkt_len = 0;
+
+ iov[0].iov_base = vp->header_txbuffer;
+ iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
+
+ if (iov_count < 1)
+ goto drop;
+ pkt_len = uml_vector_writev(
+ vp->fds->tx_fd,
+ (struct iovec *) &iov,
+ iov_count
+ );
+
+ netif_trans_update(vp->dev);
+ netif_wake_queue(vp->dev);
+
+ if (pkt_len > 0) {
+ vp->dev->stats.tx_bytes += skb->len;
+ vp->dev->stats.tx_packets++;
+ } else {
+ vp->dev->stats.tx_dropped++;
+ }
+ consume_skb(skb);
+ return pkt_len;
+drop:
+ vp->dev->stats.tx_dropped++;
+ consume_skb(skb);
+ return pkt_len;
+}
+
+/*
+ * Receive as many messages as we can in one call using the special
+ * mmsg vector matched to an skb vector which we prepared earlier.
+ */
+
+static int vector_mmsg_rx(struct vector_private *vp)
+{
+ int packet_count, i;
+ struct vector_queue *qi = vp->rx_queue;
+ struct sk_buff *skb;
+ struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+ void **skbuff_vector = qi->skbuff_vector;
+ int header_check;
+
+ /* Refresh the vector and make sure it is with new skbs and the
+ * iovs are updated to point to them.
+ */
+
+ prep_queue_for_rx(qi);
+
+ /* Fire the Lazy Gun - get as many packets as we can in one go. */
+
+ packet_count = uml_vector_recvmmsg(
+ vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
+
+ if (packet_count <= 0)
+ return packet_count;
+
+ /* We treat packet processing as enqueue, buffer refresh as dequeue
+ * The queue_depth tells us how many buffers have been used and how
+ * many do we need to prep the next time prep_queue_for_rx() is called.
+ */
+
+ qi->queue_depth = packet_count;
+
+ for (i = 0; i < packet_count; i++) {
+ skb = (*skbuff_vector);
+ if (mmsg_vector->msg_len > vp->header_size) {
+ if (vp->header_size > 0) {
+ header_check = vp->verify_header(
+ mmsg_vector->msg_hdr.msg_iov->iov_base,
+ skb,
+ vp
+ );
+ if (header_check < 0) {
+ /* Overlay header failed to verify - discard.
+ * We can actually keep this skb and reuse it,
+ * but that will make the prep logic too
+ * complex.
+ */
+ dev_kfree_skb_irq(skb);
+ vp->estats.rx_encaps_errors++;
+ continue;
+ }
+ if (header_check > 0) {
+ vp->estats.rx_csum_offload_good++;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+ }
+ pskb_trim(skb,
+ mmsg_vector->msg_len - vp->rx_header_size);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ /*
+ * We do not need to lock on updating stats here
+ * The interrupt loop is non-reentrant.
+ */
+ vp->dev->stats.rx_bytes += skb->len;
+ vp->dev->stats.rx_packets++;
+ netif_rx(skb);
+ } else {
+ /* Overlay header too short to do anything - discard.
+ * We can actually keep this skb and reuse it,
+ * but that will make the prep logic too complex.
+ */
+ if (skb != NULL)
+ dev_kfree_skb_irq(skb);
+ }
+ (*skbuff_vector) = NULL;
+ /* Move to the next buffer element */
+ mmsg_vector++;
+ skbuff_vector++;
+ }
+ if (packet_count > 0) {
+ if (vp->estats.rx_queue_max < packet_count)
+ vp->estats.rx_queue_max = packet_count;
+ vp->estats.rx_queue_running_average =
+ (vp->estats.rx_queue_running_average + packet_count) >> 1;
+ }
+ return packet_count;
+}
+
+static void vector_rx(struct vector_private *vp)
+{
+ int err;
+
+ if ((vp->options & VECTOR_RX) > 0)
+ while ((err = vector_mmsg_rx(vp)) > 0)
+ ;
+ else
+ while ((err = vector_legacy_rx(vp)) > 0)
+ ;
+ if ((err != 0) && net_ratelimit())
+ netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
+}
+
+static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct vector_private *vp = netdev_priv(dev);
+ int queue_depth = 0;
+
+ if ((vp->options & VECTOR_TX) == 0) {
+ writev_tx(vp, skb);
+ return NETDEV_TX_OK;
+ }
+
+ /* We do BQL only in the vector path, no point doing it in
+ * packet at a time mode as there is no device queue
+ */
+
+ netdev_sent_queue(vp->dev, skb->len);
+ queue_depth = vector_enqueue(vp->tx_queue, skb);
+
+ /* if the device queue is full, stop the upper layers and
+ * flush it.
+ */
+
+ if (queue_depth >= vp->tx_queue->max_depth - 1) {
+ vp->estats.tx_kicks++;
+ netif_stop_queue(dev);
+ vector_send(vp->tx_queue);
+ return NETDEV_TX_OK;
+ }
+ if (skb->xmit_more) {
+ mod_timer(&vp->tl, vp->coalesce);
+ return NETDEV_TX_OK;
+ }
+ if (skb->len < TX_SMALL_PACKET) {
+ vp->estats.tx_kicks++;
+ vector_send(vp->tx_queue);
+ } else
+ tasklet_schedule(&vp->tx_poll);
+ return NETDEV_TX_OK;
+}
+
+static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct vector_private *vp = netdev_priv(dev);
+
+ if (!netif_running(dev))
+ return IRQ_NONE;
+ vector_rx(vp);
+ return IRQ_HANDLED;
+
+}
+
+static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct vector_private *vp = netdev_priv(dev);
+
+ if (!netif_running(dev))
+ return IRQ_NONE;
+ /* We need to pay attention to it only if we got
+ * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
+ * we ignore it. In the future, it may be worth
+ * it to improve the IRQ controller a bit to make
+ * tweaking the IRQ mask less costly
+ */
+
+ if (vp->in_write_poll)
+ tasklet_schedule(&vp->tx_poll);
+ return IRQ_HANDLED;
+
+}
+
+static int irq_rr;
+
+static int vector_net_close(struct net_device *dev)
+{
+ struct vector_private *vp = netdev_priv(dev);
+ unsigned long flags;
+
+ netif_stop_queue(dev);
+ del_timer(&vp->tl);
+
+ if (vp->fds == NULL)
+ return 0;
+
+ /* Disable and free all IRQS */
+ if (vp->rx_irq > 0) {
+ um_free_irq(vp->rx_irq, dev);
+ vp->rx_irq = 0;
+ }
+ if (vp->tx_irq > 0) {
+ um_free_irq(vp->tx_irq, dev);
+ vp->tx_irq = 0;
+ }
+ tasklet_kill(&vp->tx_poll);
+ if (vp->fds->rx_fd > 0) {
+ os_close_file(vp->fds->rx_fd);
+ vp->fds->rx_fd = -1;
+ }
+ if (vp->fds->tx_fd > 0) {
+ os_close_file(vp->fds->tx_fd);
+ vp->fds->tx_fd = -1;
+ }
+ if (vp->bpf != NULL)
+ kfree(vp->bpf);
+ if (vp->fds->remote_addr != NULL)
+ kfree(vp->fds->remote_addr);
+ if (vp->transport_data != NULL)
+ kfree(vp->transport_data);
+ if (vp->header_rxbuffer != NULL)
+ kfree(vp->header_rxbuffer);
+ if (vp->header_txbuffer != NULL)
+ kfree(vp->header_txbuffer);
+ if (vp->rx_queue != NULL)
+ destroy_queue(vp->rx_queue);
+ if (vp->tx_queue != NULL)
+ destroy_queue(vp->tx_queue);
+ kfree(vp->fds);
+ vp->fds = NULL;
+ spin_lock_irqsave(&vp->lock, flags);
+ vp->opened = false;
+ spin_unlock_irqrestore(&vp->lock, flags);
+ return 0;
+}
+
+/* TX tasklet */
+
+static void vector_tx_poll(unsigned long data)
+{
+ struct vector_private *vp = (struct vector_private *)data;
+
+ vp->estats.tx_kicks++;
+ vector_send(vp->tx_queue);
+}
+static void vector_reset_tx(struct work_struct *work)
+{
+ struct vector_private *vp =
+ container_of(work, struct vector_private, reset_tx);
+ netdev_reset_queue(vp->dev);
+ netif_start_queue(vp->dev);
+ netif_wake_queue(vp->dev);
+}
+static int vector_net_open(struct net_device *dev)
+{
+ struct vector_private *vp = netdev_priv(dev);
+ unsigned long flags;
+ int err = -EINVAL;
+ struct vector_device *vdevice;
+
+ spin_lock_irqsave(&vp->lock, flags);
+ if (vp->opened) {
+ spin_unlock_irqrestore(&vp->lock, flags);
+ return -ENXIO;
+ }
+ vp->opened = true;
+ spin_unlock_irqrestore(&vp->lock, flags);
+
+ vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
+
+ if (vp->fds == NULL)
+ goto out_close;
+
+ if (build_transport_data(vp) < 0)
+ goto out_close;
+
+ if ((vp->options & VECTOR_RX) > 0) {
+ vp->rx_queue = create_queue(
+ vp,
+ get_depth(vp->parsed),
+ vp->rx_header_size,
+ MAX_IOV_SIZE
+ );
+ vp->rx_queue->queue_depth = get_depth(vp->parsed);
+ } else {
+ vp->header_rxbuffer = kmalloc(
+ vp->rx_header_size,
+ GFP_KERNEL
+ );
+ if (vp->header_rxbuffer == NULL)
+ goto out_close;
+ }
+ if ((vp->options & VECTOR_TX) > 0) {
+ vp->tx_queue = create_queue(
+ vp,
+ get_depth(vp->parsed),
+ vp->header_size,
+ MAX_IOV_SIZE
+ );
+ } else {
+ vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
+ if (vp->header_txbuffer == NULL)
+ goto out_close;
+ }
+
+ /* READ IRQ */
+ err = um_request_irq(
+ irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
+ IRQ_READ, vector_rx_interrupt,
+ IRQF_SHARED, dev->name, dev);
+ if (err != 0) {
+ netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
+ err = -ENETUNREACH;
+ goto out_close;
+ }
+ vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
+ dev->irq = irq_rr + VECTOR_BASE_IRQ;
+ irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+
+ /* WRITE IRQ - we need it only if we have vector TX */
+ if ((vp->options & VECTOR_TX) > 0) {
+ err = um_request_irq(
+ irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
+ IRQ_WRITE, vector_tx_interrupt,
+ IRQF_SHARED, dev->name, dev);
+ if (err != 0) {
+ netdev_err(dev,
+ "vector_open: failed to get tx irq(%d)\n", err);
+ err = -ENETUNREACH;
+ goto out_close;
+ }
+ vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
+ irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+ }
+
+ if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
+ if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
+ vp->options = vp->options | VECTOR_BPF;
+ }
+
+ if ((vp->options & VECTOR_BPF) != 0)
+ vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
+
+ netif_start_queue(dev);
+
+ /* clear buffer - it can happen that the host side of the interface
+ * is full when we get here. In this case, new data is never queued,
+ * SIGIOs never arrive, and the net never works.
+ */
+
+ vector_rx(vp);
+
+ vector_reset_stats(vp);
+ vdevice = find_device(vp->unit);
+ vdevice->opened = 1;
+
+ if ((vp->options & VECTOR_TX) != 0)
+ add_timer(&vp->tl);
+ return 0;
+out_close:
+ vector_net_close(dev);
+ return err;
+}
+
+
+static void vector_net_set_multicast_list(struct net_device *dev)
+{
+ /* TODO: - we can do some BPF games here */
+ return;
+}
+
+static void vector_net_tx_timeout(struct net_device *dev)
+{
+ struct vector_private *vp = netdev_priv(dev);
+
+ vp->estats.tx_timeout_count++;
+ netif_trans_update(dev);
+ schedule_work(&vp->reset_tx);
+}
+
+static netdev_features_t vector_fix_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+ return features;
+}
+
+static int vector_set_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ struct vector_private *vp = netdev_priv(dev);
+ /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
+ * no way to negotiate it on raw sockets, so we can change
+ * only our side.
+ */
+ if (features & NETIF_F_GRO)
+ /* All new frame buffers will be GRO-sized */
+ vp->req_size = 65536;
+ else
+ /* All new frame buffers will be normal sized */
+ vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+ return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vector_net_poll_controller(struct net_device *dev)
+{
+ disable_irq(dev->irq);
+ vector_rx_interrupt(dev->irq, dev);
+ enable_irq(dev->irq);
+}
+#endif
+
+static void vector_net_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+ strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
+}
+
+static void vector_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring)
+{
+ struct vector_private *vp = netdev_priv(netdev);
+
+ ring->rx_max_pending = vp->rx_queue->max_depth;
+ ring->tx_max_pending = vp->tx_queue->max_depth;
+ ring->rx_pending = vp->rx_queue->max_depth;
+ ring->tx_pending = vp->tx_queue->max_depth;
+}
+
+static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+ switch (stringset) {
+ case ETH_SS_TEST:
+ *buf = '\0';
+ break;
+ case ETH_SS_STATS:
+ memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+
+static int vector_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_TEST:
+ return 0;
+ case ETH_SS_STATS:
+ return VECTOR_NUM_STATS;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void vector_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *estats,
+ u64 *tmp_stats)
+{
+ struct vector_private *vp = netdev_priv(dev);
+
+ memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+}
+
+static int vector_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct vector_private *vp = netdev_priv(netdev);
+
+ ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
+ return 0;
+}
+
+static int vector_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct vector_private *vp = netdev_priv(netdev);
+
+ vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
+ if (vp->coalesce == 0)
+ vp->coalesce = 1;
+ return 0;
+}
+
+static const struct ethtool_ops vector_net_ethtool_ops = {
+ .get_drvinfo = vector_net_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ts_info = ethtool_op_get_ts_info,
+ .get_ringparam = vector_get_ringparam,
+ .get_strings = vector_get_strings,
+ .get_sset_count = vector_get_sset_count,
+ .get_ethtool_stats = vector_get_ethtool_stats,
+ .get_coalesce = vector_get_coalesce,
+ .set_coalesce = vector_set_coalesce,
+};
+
+
+static const struct net_device_ops vector_netdev_ops = {
+ .ndo_open = vector_net_open,
+ .ndo_stop = vector_net_close,
+ .ndo_start_xmit = vector_net_start_xmit,
+ .ndo_set_rx_mode = vector_net_set_multicast_list,
+ .ndo_tx_timeout = vector_net_tx_timeout,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_fix_features = vector_fix_features,
+ .ndo_set_features = vector_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = vector_net_poll_controller,
+#endif
+};
+
+
+static void vector_timer_expire(struct timer_list *t)
+{
+ struct vector_private *vp = from_timer(vp, t, tl);
+
+ vp->estats.tx_kicks++;
+ vector_send(vp->tx_queue);
+}
+
+static void vector_eth_configure(
+ int n,
+ struct arglist *def
+ )
+{
+ struct vector_device *device;
+ struct net_device *dev;
+ struct vector_private *vp;
+ int err;
+
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (device == NULL) {
+ printk(KERN_ERR "eth_configure failed to allocate struct "
+ "vector_device\n");
+ return;
+ }
+ dev = alloc_etherdev(sizeof(struct vector_private));
+ if (dev == NULL) {
+ printk(KERN_ERR "eth_configure: failed to allocate struct "
+ "net_device for vec%d\n", n);
+ goto out_free_device;
+ }
+
+ dev->mtu = get_mtu(def);
+
+ INIT_LIST_HEAD(&device->list);
+ device->unit = n;
+
+ /* If this name ends up conflicting with an existing registered
+ * netdevice, that is OK, register_netdev{,ice}() will notice this
+ * and fail.
+ */
+ snprintf(dev->name, sizeof(dev->name), "vec%d", n);
+ uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+ vp = netdev_priv(dev);
+
+ /* sysfs register */
+ if (!driver_registered) {
+ platform_driver_register(&uml_net_driver);
+ driver_registered = 1;
+ }
+ device->pdev.id = n;
+ device->pdev.name = DRIVER_NAME;
+ device->pdev.dev.release = vector_device_release;
+ dev_set_drvdata(&device->pdev.dev, device);
+ if (platform_device_register(&device->pdev))
+ goto out_free_netdev;
+ SET_NETDEV_DEV(dev, &device->pdev.dev);
+
+ device->dev = dev;
+
+ *vp = ((struct vector_private)
+ {
+ .list = LIST_HEAD_INIT(vp->list),
+ .dev = dev,
+ .unit = n,
+ .options = get_transport_options(def),
+ .rx_irq = 0,
+ .tx_irq = 0,
+ .parsed = def,
+ .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
+ /* TODO - we need to calculate headroom so that ip header
+ * is 16 byte aligned all the time
+ */
+ .headroom = get_headroom(def),
+ .form_header = NULL,
+ .verify_header = NULL,
+ .header_rxbuffer = NULL,
+ .header_txbuffer = NULL,
+ .header_size = 0,
+ .rx_header_size = 0,
+ .rexmit_scheduled = false,
+ .opened = false,
+ .transport_data = NULL,
+ .in_write_poll = false,
+ .coalesce = 2,
+ .req_size = get_req_size(def)
+ });
+
+ dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
+ tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
+ INIT_WORK(&vp->reset_tx, vector_reset_tx);
+
+ timer_setup(&vp->tl, vector_timer_expire, 0);
+ spin_lock_init(&vp->lock);
+
+ /* FIXME */
+ dev->netdev_ops = &vector_netdev_ops;
+ dev->ethtool_ops = &vector_net_ethtool_ops;
+ dev->watchdog_timeo = (HZ >> 1);
+ /* primary IRQ - fixme */
+ dev->irq = 0; /* we will adjust this once opened */
+
+ rtnl_lock();
+ err = register_netdevice(dev);
+ rtnl_unlock();
+ if (err)
+ goto out_undo_user_init;
+
+ spin_lock(&vector_devices_lock);
+ list_add(&device->list, &vector_devices);
+ spin_unlock(&vector_devices_lock);
+
+ return;
+
+out_undo_user_init:
+ return;
+out_free_netdev:
+ free_netdev(dev);
+out_free_device:
+ kfree(device);
+}
+
+
+
+
+/*
+ * Invoked late in the init
+ */
+
+static int __init vector_init(void)
+{
+ struct list_head *ele;
+ struct vector_cmd_line_arg *def;
+ struct arglist *parsed;
+
+ list_for_each(ele, &vec_cmd_line) {
+ def = list_entry(ele, struct vector_cmd_line_arg, list);
+ parsed = uml_parse_vector_ifspec(def->arguments);
+ if (parsed != NULL)
+ vector_eth_configure(def->unit, parsed);
+ }
+ return 0;
+}
+
+
+/* Invoked at initial argument parsing, only stores
+ * arguments until a proper vector_init is called
+ * later
+ */
+
+static int __init vector_setup(char *str)
+{
+ char *error;
+ int n, err;
+ struct vector_cmd_line_arg *new;
+
+ err = vector_parse(str, &n, &str, &error);
+ if (err) {
+ printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
+ str, error);
+ return 1;
+ }
+ new = alloc_bootmem(sizeof(*new));
+ INIT_LIST_HEAD(&new->list);
+ new->unit = n;
+ new->arguments = str;
+ list_add_tail(&new->list, &vec_cmd_line);
+ return 1;
+}
+
+__setup("vec", vector_setup);
+__uml_help(vector_setup,
+"vec[0-9]+:<option>=<value>,<option>=<value>\n"
+" Configure a vector io network device.\n\n"
+);
+
+late_initcall(vector_init);
+
+static struct mc_device vector_mc = {
+ .list = LIST_HEAD_INIT(vector_mc.list),
+ .name = "vec",
+ .config = vector_config,
+ .get_config = NULL,
+ .id = vector_id,
+ .remove = vector_remove,
+};
+
+#ifdef CONFIG_INET
+static int vector_inetaddr_event(
+ struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block vector_inetaddr_notifier = {
+ .notifier_call = vector_inetaddr_event,
+};
+
+static void inet_register(void)
+{
+ register_inetaddr_notifier(&vector_inetaddr_notifier);
+}
+#else
+static inline void inet_register(void)
+{
+}
+#endif
+
+static int vector_net_init(void)
+{
+ mconsole_register_dev(&vector_mc);
+ inet_register();
+ return 0;
+}
+
+__initcall(vector_net_init);
+
+
+
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644
index 000000000000..0b0a767b9076
--- /dev/null
+++ b/arch/um/drivers/vector_kern.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_KERN_H
+#define __UM_VECTOR_KERN_H
+
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include "vector_user.h"
+
+/* Queue structure specially adapted for multiple enqueue/dequeue
+ * in a mmsgrecv/mmsgsend context
+ */
+
+/* Dequeue method */
+
+#define QUEUE_SENDMSG 0
+#define QUEUE_SENDMMSG 1
+
+#define VECTOR_RX 1
+#define VECTOR_TX (1 << 1)
+#define VECTOR_BPF (1 << 2)
+#define VECTOR_QDISC_BYPASS (1 << 3)
+
+#define ETH_MAX_PACKET 1500
+#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
+
+struct vector_queue {
+ struct mmsghdr *mmsg_vector;
+ void **skbuff_vector;
+ /* backlink to device which owns us */
+ struct net_device *dev;
+ spinlock_t head_lock;
+ spinlock_t tail_lock;
+ int queue_depth, head, tail, max_depth, max_iov_frags;
+ short options;
+};
+
+struct vector_estats {
+ uint64_t rx_queue_max;
+ uint64_t rx_queue_running_average;
+ uint64_t tx_queue_max;
+ uint64_t tx_queue_running_average;
+ uint64_t rx_encaps_errors;
+ uint64_t tx_timeout_count;
+ uint64_t tx_restart_queue;
+ uint64_t tx_kicks;
+ uint64_t tx_flow_control_xon;
+ uint64_t tx_flow_control_xoff;
+ uint64_t rx_csum_offload_good;
+ uint64_t rx_csum_offload_errors;
+ uint64_t sg_ok;
+ uint64_t sg_linearized;
+};
+
+#define VERIFY_HEADER_NOK -1
+#define VERIFY_HEADER_OK 0
+#define VERIFY_CSUM_OK 1
+
+struct vector_private {
+ struct list_head list;
+ spinlock_t lock;
+ struct net_device *dev;
+
+ int unit;
+
+ /* Timeout timer in TX */
+
+ struct timer_list tl;
+
+ /* Scheduled "remove device" work */
+ struct work_struct reset_tx;
+ struct vector_fds *fds;
+
+ struct vector_queue *rx_queue;
+ struct vector_queue *tx_queue;
+
+ int rx_irq;
+ int tx_irq;
+
+ struct arglist *parsed;
+
+ void *transport_data; /* transport specific params if needed */
+
+ int max_packet;
+ int req_size; /* different from max packet - used for TSO */
+ int headroom;
+
+ int options;
+
+ /* remote address if any - some transports will leave this as null */
+
+ int header_size;
+ int rx_header_size;
+ int coalesce;
+
+ void *header_rxbuffer;
+ void *header_txbuffer;
+
+ int (*form_header)(uint8_t *header,
+ struct sk_buff *skb, struct vector_private *vp);
+ int (*verify_header)(uint8_t *header,
+ struct sk_buff *skb, struct vector_private *vp);
+
+ spinlock_t stats_lock;
+
+ struct tasklet_struct tx_poll;
+ bool rexmit_scheduled;
+ bool opened;
+ bool in_write_poll;
+
+ /* ethtool stats */
+
+ struct vector_estats estats;
+ void *bpf;
+
+ char user[0];
+};
+
+extern int build_transport_data(struct vector_private *vp);
+
+#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644
index 000000000000..9065047f844b
--- /dev/null
+++ b/arch/um/drivers/vector_transports.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Licensed under the GPL.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/virtio_net.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/netdev_features.h>
+#include "vector_user.h"
+#include "vector_kern.h"
+
+#define GOOD_LINEAR 512
+#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
+
+struct gre_minimal_header {
+ uint16_t header;
+ uint16_t arptype;
+};
+
+
+struct uml_gre_data {
+ uint32_t rx_key;
+ uint32_t tx_key;
+ uint32_t sequence;
+
+ bool ipv6;
+ bool has_sequence;
+ bool pin_sequence;
+ bool checksum;
+ bool key;
+ struct gre_minimal_header expected_header;
+
+ uint32_t checksum_offset;
+ uint32_t key_offset;
+ uint32_t sequence_offset;
+
+};
+
+struct uml_l2tpv3_data {
+ uint64_t rx_cookie;
+ uint64_t tx_cookie;
+ uint64_t rx_session;
+ uint64_t tx_session;
+ uint32_t counter;
+
+ bool udp;
+ bool ipv6;
+ bool has_counter;
+ bool pin_counter;
+ bool cookie;
+ bool cookie_is_64;
+
+ uint32_t cookie_offset;
+ uint32_t session_offset;
+ uint32_t counter_offset;
+};
+
+static int l2tpv3_form_header(uint8_t *header,
+ struct sk_buff *skb, struct vector_private *vp)
+{
+ struct uml_l2tpv3_data *td = vp->transport_data;
+ uint32_t *counter;
+
+ if (td->udp)
+ *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
+ (*(uint32_t *) (header + td->session_offset)) = td->tx_session;
+
+ if (td->cookie) {
+ if (td->cookie_is_64)
+ (*(uint64_t *)(header + td->cookie_offset)) =
+ td->tx_cookie;
+ else
+ (*(uint32_t *)(header + td->cookie_offset)) =
+ td->tx_cookie;
+ }
+ if (td->has_counter) {
+ counter = (uint32_t *)(header + td->counter_offset);
+ if (td->pin_counter) {
+ *counter = 0;
+ } else {
+ td->counter++;
+ *counter = cpu_to_be32(td->counter);
+ }
+ }
+ return 0;
+}
+
+static int gre_form_header(uint8_t *header,
+ struct sk_buff *skb, struct vector_private *vp)
+{
+ struct uml_gre_data *td = vp->transport_data;
+ uint32_t *sequence;
+ *((uint32_t *) header) = *((uint32_t *) &td->expected_header);
+ if (td->key)
+ (*(uint32_t *) (header + td->key_offset)) = td->tx_key;
+ if (td->has_sequence) {
+ sequence = (uint32_t *)(header + td->sequence_offset);
+ if (td->pin_sequence)
+ *sequence = 0;
+ else
+ *sequence = cpu_to_be32(++td->sequence);
+ }
+ return 0;
+}
+
+static int raw_form_header(uint8_t *header,
+ struct sk_buff *skb, struct vector_private *vp)
+{
+ struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+ virtio_net_hdr_from_skb(
+ skb,
+ vheader,
+ virtio_legacy_is_little_endian(),
+ false
+ );
+
+ return 0;
+}
+
+static int l2tpv3_verify_header(
+ uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+ struct uml_l2tpv3_data *td = vp->transport_data;
+ uint32_t *session;
+ uint64_t cookie;
+
+ if ((!td->udp) && (!td->ipv6))
+ header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+ /* we do not do a strict check for "data" packets as per
+ * the RFC spec because the pure IP spec does not have
+ * that anyway.
+ */
+
+ if (td->cookie) {
+ if (td->cookie_is_64)
+ cookie = *(uint64_t *)(header + td->cookie_offset);
+ else
+ cookie = *(uint32_t *)(header + td->cookie_offset);
+ if (cookie != td->rx_cookie) {
+ if (net_ratelimit())
+ netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
+ return -1;
+ }
+ }
+ session = (uint32_t *) (header + td->session_offset);
+ if (*session != td->rx_session) {
+ if (net_ratelimit())
+ netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
+ return -1;
+ }
+ return 0;
+}
+
+static int gre_verify_header(
+ uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+
+ uint32_t key;
+ struct uml_gre_data *td = vp->transport_data;
+
+ if (!td->ipv6)
+ header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+ if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
+ if (net_ratelimit())
+ netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
+ *((uint32_t *) &td->expected_header),
+ *((uint32_t *) header)
+ );
+ return -1;
+ }
+
+ if (td->key) {
+ key = (*(uint32_t *)(header + td->key_offset));
+ if (key != td->rx_key) {
+ if (net_ratelimit())
+ netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
+ key, td->rx_key);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int raw_verify_header(
+ uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+ struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+ if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
+ (vp->req_size != 65536)) {
+ if (net_ratelimit())
+ netdev_err(
+ vp->dev,
+ GSO_ERROR
+ );
+ }
+ if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
+ return 1;
+
+ virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
+ return 0;
+}
+
+static bool get_uint_param(
+ struct arglist *def, char *param, unsigned int *result)
+{
+ char *arg = uml_vector_fetch_arg(def, param);
+
+ if (arg != NULL) {
+ if (kstrtoint(arg, 0, result) == 0)
+ return true;
+ }
+ return false;
+}
+
+static bool get_ulong_param(
+ struct arglist *def, char *param, unsigned long *result)
+{
+ char *arg = uml_vector_fetch_arg(def, param);
+
+ if (arg != NULL) {
+ if (kstrtoul(arg, 0, result) == 0)
+ return true;
+ return true;
+ }
+ return false;
+}
+
+static int build_gre_transport_data(struct vector_private *vp)
+{
+ struct uml_gre_data *td;
+ int temp_int;
+ int temp_rx;
+ int temp_tx;
+
+ vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
+ if (vp->transport_data == NULL)
+ return -ENOMEM;
+ td = vp->transport_data;
+ td->sequence = 0;
+
+ td->expected_header.arptype = GRE_IRB;
+ td->expected_header.header = 0;
+
+ vp->form_header = &gre_form_header;
+ vp->verify_header = &gre_verify_header;
+ vp->header_size = 4;
+ td->key_offset = 4;
+ td->sequence_offset = 4;
+ td->checksum_offset = 4;
+
+ td->ipv6 = false;
+ if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+ if (temp_int > 0)
+ td->ipv6 = true;
+ }
+ td->key = false;
+ if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
+ if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
+ td->key = true;
+ td->expected_header.header |= GRE_MODE_KEY;
+ td->rx_key = cpu_to_be32(temp_rx);
+ td->tx_key = cpu_to_be32(temp_tx);
+ vp->header_size += 4;
+ td->sequence_offset += 4;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ td->sequence = false;
+ if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
+ if (temp_int > 0) {
+ vp->header_size += 4;
+ td->has_sequence = true;
+ td->expected_header.header |= GRE_MODE_SEQUENCE;
+ if (get_uint_param(
+ vp->parsed, "pin_sequence", &temp_int)) {
+ if (temp_int > 0)
+ td->pin_sequence = true;
+ }
+ }
+ }
+ vp->rx_header_size = vp->header_size;
+ if (!td->ipv6)
+ vp->rx_header_size += sizeof(struct iphdr);
+ return 0;
+}
+
+static int build_l2tpv3_transport_data(struct vector_private *vp)
+{
+
+ struct uml_l2tpv3_data *td;
+ int temp_int, temp_rxs, temp_txs;
+ unsigned long temp_rx;
+ unsigned long temp_tx;
+
+ vp->transport_data = kmalloc(
+ sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
+
+ if (vp->transport_data == NULL)
+ return -ENOMEM;
+
+ td = vp->transport_data;
+
+ vp->form_header = &l2tpv3_form_header;
+ vp->verify_header = &l2tpv3_verify_header;
+ td->counter = 0;
+
+ vp->header_size = 4;
+ td->session_offset = 0;
+ td->cookie_offset = 4;
+ td->counter_offset = 4;
+
+
+ td->ipv6 = false;
+ if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+ if (temp_int > 0)
+ td->ipv6 = true;
+ }
+
+ if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
+ if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
+ td->tx_session = cpu_to_be32(temp_txs);
+ td->rx_session = cpu_to_be32(temp_rxs);
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ td->cookie_is_64 = false;
+ if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
+ if (temp_int > 0)
+ td->cookie_is_64 = true;
+ }
+ td->cookie = false;
+ if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
+ if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
+ td->cookie = true;
+ if (td->cookie_is_64) {
+ td->rx_cookie = cpu_to_be64(temp_rx);
+ td->tx_cookie = cpu_to_be64(temp_tx);
+ vp->header_size += 8;
+ td->counter_offset += 8;
+ } else {
+ td->rx_cookie = cpu_to_be32(temp_rx);
+ td->tx_cookie = cpu_to_be32(temp_tx);
+ vp->header_size += 4;
+ td->counter_offset += 4;
+ }
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ td->has_counter = false;
+ if (get_uint_param(vp->parsed, "counter", &temp_int)) {
+ if (temp_int > 0) {
+ td->has_counter = true;
+ vp->header_size += 4;
+ if (get_uint_param(
+ vp->parsed, "pin_counter", &temp_int)) {
+ if (temp_int > 0)
+ td->pin_counter = true;
+ }
+ }
+ }
+
+ if (get_uint_param(vp->parsed, "udp", &temp_int)) {
+ if (temp_int > 0) {
+ td->udp = true;
+ vp->header_size += 4;
+ td->counter_offset += 4;
+ td->session_offset += 4;
+ td->cookie_offset += 4;
+ }
+ }
+
+ vp->rx_header_size = vp->header_size;
+ if ((!td->ipv6) && (!td->udp))
+ vp->rx_header_size += sizeof(struct iphdr);
+
+ return 0;
+}
+
+static int build_raw_transport_data(struct vector_private *vp)
+{
+ if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+ if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
+ return -1;
+ vp->form_header = &raw_form_header;
+ vp->verify_header = &raw_verify_header;
+ vp->header_size = sizeof(struct virtio_net_hdr);
+ vp->rx_header_size = sizeof(struct virtio_net_hdr);
+ vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
+ vp->dev->features |=
+ (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+ NETIF_F_TSO | NETIF_F_GRO);
+ netdev_info(
+ vp->dev,
+ "raw: using vnet headers for tso and tx/rx checksum"
+ );
+ }
+ return 0;
+}
+
+static int build_tap_transport_data(struct vector_private *vp)
+{
+ if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+ vp->form_header = &raw_form_header;
+ vp->verify_header = &raw_verify_header;
+ vp->header_size = sizeof(struct virtio_net_hdr);
+ vp->rx_header_size = sizeof(struct virtio_net_hdr);
+ vp->dev->hw_features |=
+ (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+ vp->dev->features |=
+ (NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+ NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+ netdev_info(
+ vp->dev,
+ "tap/raw: using vnet headers for tso and tx/rx checksum"
+ );
+ } else {
+ return 0; /* do not try to enable tap too if raw failed */
+ }
+ if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
+ return 0;
+ return -1;
+}
+
+int build_transport_data(struct vector_private *vp)
+{
+ char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
+
+ if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+ return build_gre_transport_data(vp);
+ if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+ return build_l2tpv3_transport_data(vp);
+ if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+ return build_raw_transport_data(vp);
+ if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+ return build_tap_transport_data(vp);
+ return 0;
+}
+
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644
index 000000000000..4d6a78e31089
--- /dev/null
+++ b/arch/um/drivers/vector_user.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/ether.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <linux/virtio_net.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <os.h>
+#include <um_malloc.h>
+#include "vector_user.h"
+
+#define ID_GRE 0
+#define ID_L2TPV3 1
+#define ID_MAX 1
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
+#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
+#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
+
+/* This is very ugly and brute force lookup, but it is done
+ * only once at initialization so not worth doing hashes or
+ * anything more intelligent
+ */
+
+char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
+{
+ int i;
+
+ for (i = 0; i < ifspec->numargs; i++) {
+ if (strcmp(ifspec->tokens[i], token) == 0)
+ return ifspec->values[i];
+ }
+ return NULL;
+
+}
+
+struct arglist *uml_parse_vector_ifspec(char *arg)
+{
+ struct arglist *result;
+ int pos, len;
+ bool parsing_token = true, next_starts = true;
+
+ if (arg == NULL)
+ return NULL;
+ result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
+ if (result == NULL)
+ return NULL;
+ result->numargs = 0;
+ len = strlen(arg);
+ for (pos = 0; pos < len; pos++) {
+ if (next_starts) {
+ if (parsing_token) {
+ result->tokens[result->numargs] = arg + pos;
+ } else {
+ result->values[result->numargs] = arg + pos;
+ result->numargs++;
+ }
+ next_starts = false;
+ }
+ if (*(arg + pos) == '=') {
+ if (parsing_token)
+ parsing_token = false;
+ else
+ goto cleanup;
+ next_starts = true;
+ (*(arg + pos)) = '\0';
+ }
+ if (*(arg + pos) == ',') {
+ parsing_token = true;
+ next_starts = true;
+ (*(arg + pos)) = '\0';
+ }
+ }
+ return result;
+cleanup:
+ printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
+ kfree(result);
+ return NULL;
+}
+
+/*
+ * Socket/FD configuration functions. These return an structure
+ * of rx and tx descriptors to cover cases where these are not
+ * the same (f.e. read via raw socket and write via tap).
+ */
+
+#define PATH_NET_TUN "/dev/net/tun"
+
+static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+{
+ struct ifreq ifr;
+ int fd = -1;
+ struct sockaddr_ll sock;
+ int err = -ENOMEM, offload;
+ char *iface;
+ struct vector_fds *result = NULL;
+
+ iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+ if (iface == NULL) {
+ printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
+ goto tap_cleanup;
+ }
+
+ result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+ if (result == NULL) {
+ printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+ goto tap_cleanup;
+ }
+ result->rx_fd = -1;
+ result->tx_fd = -1;
+ result->remote_addr = NULL;
+ result->remote_addr_size = 0;
+
+ /* TAP */
+
+ fd = open(PATH_NET_TUN, O_RDWR);
+ if (fd < 0) {
+ printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+ goto tap_cleanup;
+ }
+ result->tx_fd = fd;
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+ strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+
+ err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (err != 0) {
+ printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
+ goto tap_cleanup;
+ }
+
+ offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+ ioctl(fd, TUNSETOFFLOAD, offload);
+
+ /* RAW */
+
+ fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (fd == -1) {
+ printk(UM_KERN_ERR
+ "uml_tap: failed to create socket: %i\n", -errno);
+ goto tap_cleanup;
+ }
+ result->rx_fd = fd;
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+ if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+ printk(UM_KERN_ERR
+ "uml_tap: failed to set interface: %i\n", -errno);
+ goto tap_cleanup;
+ }
+
+ sock.sll_family = AF_PACKET;
+ sock.sll_protocol = htons(ETH_P_ALL);
+ sock.sll_ifindex = ifr.ifr_ifindex;
+
+ if (bind(fd,
+ (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+ printk(UM_KERN_ERR
+ "user_init_tap: failed to bind raw pair, err %d\n",
+ -errno);
+ goto tap_cleanup;
+ }
+ return result;
+tap_cleanup:
+ printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
+ if (result != NULL) {
+ if (result->rx_fd >= 0)
+ os_close_file(result->rx_fd);
+ if (result->tx_fd >= 0)
+ os_close_file(result->tx_fd);
+ kfree(result);
+ }
+ return NULL;
+}
+
+
+static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
+{
+ struct ifreq ifr;
+ int rxfd = -1, txfd = -1;
+ struct sockaddr_ll sock;
+ int err = -ENOMEM;
+ char *iface;
+ struct vector_fds *result = NULL;
+
+ iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+ if (iface == NULL)
+ goto cleanup;
+
+ rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
+ if (rxfd == -1) {
+ err = -errno;
+ goto cleanup;
+ }
+ txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
+ if (txfd == -1) {
+ err = -errno;
+ goto cleanup;
+ }
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+ if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+ err = -errno;
+ goto cleanup;
+ }
+
+ sock.sll_family = AF_PACKET;
+ sock.sll_protocol = htons(ETH_P_ALL);
+ sock.sll_ifindex = ifr.ifr_ifindex;
+
+ if (bind(rxfd,
+ (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+ err = -errno;
+ goto cleanup;
+ }
+
+ sock.sll_family = AF_PACKET;
+ sock.sll_protocol = htons(ETH_P_IP);
+ sock.sll_ifindex = ifr.ifr_ifindex;
+
+ if (bind(txfd,
+ (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+ err = -errno;
+ goto cleanup;
+ }
+
+ result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+ if (result != NULL) {
+ result->rx_fd = rxfd;
+ result->tx_fd = txfd;
+ result->remote_addr = NULL;
+ result->remote_addr_size = 0;
+ }
+ return result;
+cleanup:
+ printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+ if (rxfd >= 0)
+ os_close_file(rxfd);
+ if (txfd >= 0)
+ os_close_file(txfd);
+ if (result != NULL)
+ kfree(result);
+ return NULL;
+}
+
+
+bool uml_raw_enable_qdisc_bypass(int fd)
+{
+ int optval = 1;
+
+ if (setsockopt(fd,
+ SOL_PACKET, PACKET_QDISC_BYPASS,
+ &optval, sizeof(optval)) != 0) {
+ return false;
+ }
+ return true;
+}
+
+bool uml_raw_enable_vnet_headers(int fd)
+{
+ int optval = 1;
+
+ if (setsockopt(fd,
+ SOL_PACKET, PACKET_VNET_HDR,
+ &optval, sizeof(optval)) != 0) {
+ printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
+ return false;
+ }
+ return true;
+}
+bool uml_tap_enable_vnet_headers(int fd)
+{
+ unsigned int features;
+ int len = sizeof(struct virtio_net_hdr);
+
+ if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+ printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
+ return false;
+ }
+ if ((features & IFF_VNET_HDR) == 0) {
+ printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
+ return false;
+ }
+ ioctl(fd, TUNSETVNETHDRSZ, &len);
+ return true;
+}
+
+static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
+{
+ int err = -ENOMEM;
+ int fd = -1, gairet;
+ struct addrinfo srchints;
+ struct addrinfo dsthints;
+ bool v6, udp;
+ char *value;
+ char *src, *dst, *srcport, *dstport;
+ struct addrinfo *gairesult = NULL;
+ struct vector_fds *result = NULL;
+
+
+ value = uml_vector_fetch_arg(ifspec, "v6");
+ v6 = false;
+ udp = false;
+ if (value != NULL) {
+ if (strtol((const char *) value, NULL, 10) > 0)
+ v6 = true;
+ }
+
+ value = uml_vector_fetch_arg(ifspec, "udp");
+ if (value != NULL) {
+ if (strtol((const char *) value, NULL, 10) > 0)
+ udp = true;
+ }
+ src = uml_vector_fetch_arg(ifspec, "src");
+ dst = uml_vector_fetch_arg(ifspec, "dst");
+ srcport = uml_vector_fetch_arg(ifspec, "srcport");
+ dstport = uml_vector_fetch_arg(ifspec, "dstport");
+
+ memset(&dsthints, 0, sizeof(dsthints));
+
+ if (v6)
+ dsthints.ai_family = AF_INET6;
+ else
+ dsthints.ai_family = AF_INET;
+
+ switch (id) {
+ case ID_GRE:
+ dsthints.ai_socktype = SOCK_RAW;
+ dsthints.ai_protocol = IPPROTO_GRE;
+ break;
+ case ID_L2TPV3:
+ if (udp) {
+ dsthints.ai_socktype = SOCK_DGRAM;
+ dsthints.ai_protocol = 0;
+ } else {
+ dsthints.ai_socktype = SOCK_RAW;
+ dsthints.ai_protocol = IPPROTO_L2TP;
+ }
+ break;
+ default:
+ printk(KERN_ERR "Unsupported socket type\n");
+ return NULL;
+ }
+ memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
+
+ gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
+ if ((gairet != 0) || (gairesult == NULL)) {
+ printk(UM_KERN_ERR
+ "socket_open : could not resolve src, error = %s",
+ gai_strerror(gairet)
+ );
+ return NULL;
+ }
+ fd = socket(gairesult->ai_family,
+ gairesult->ai_socktype, gairesult->ai_protocol);
+ if (fd == -1) {
+ printk(UM_KERN_ERR
+ "socket_open : could not open socket, error = %d",
+ -errno
+ );
+ goto cleanup;
+ }
+ if (bind(fd,
+ (struct sockaddr *) gairesult->ai_addr,
+ gairesult->ai_addrlen)) {
+ printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
+ goto cleanup;
+ }
+
+ if (gairesult != NULL)
+ freeaddrinfo(gairesult);
+
+ gairesult = NULL;
+
+ gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
+ if ((gairet != 0) || (gairesult == NULL)) {
+ printk(UM_KERN_ERR
+ "socket_open : could not resolve dst, error = %s",
+ gai_strerror(gairet)
+ );
+ return NULL;
+ }
+
+ result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+ if (result != NULL) {
+ result->rx_fd = fd;
+ result->tx_fd = fd;
+ result->remote_addr = uml_kmalloc(
+ gairesult->ai_addrlen, UM_GFP_KERNEL);
+ if (result->remote_addr == NULL)
+ goto cleanup;
+ result->remote_addr_size = gairesult->ai_addrlen;
+ memcpy(
+ result->remote_addr,
+ gairesult->ai_addr,
+ gairesult->ai_addrlen
+ );
+ }
+ freeaddrinfo(gairesult);
+ return result;
+cleanup:
+ if (gairesult != NULL)
+ freeaddrinfo(gairesult);
+ printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
+ if (fd >= 0)
+ os_close_file(fd);
+ if (result != NULL) {
+ if (result->remote_addr != NULL)
+ kfree(result->remote_addr);
+ kfree(result);
+ }
+ return NULL;
+}
+
+struct vector_fds *uml_vector_user_open(
+ int unit,
+ struct arglist *parsed
+)
+{
+ char *transport;
+
+ if (parsed == NULL) {
+ printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
+ return NULL;
+ }
+ transport = uml_vector_fetch_arg(parsed, "transport");
+ if (transport == NULL) {
+ printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
+ return NULL;
+ }
+ if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+ return user_init_raw_fds(parsed);
+ if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+ return user_init_tap_fds(parsed);
+ if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+ return user_init_socket_fds(parsed, ID_GRE);
+ if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+ return user_init_socket_fds(parsed, ID_L2TPV3);
+ return NULL;
+}
+
+
+int uml_vector_sendmsg(int fd, void *hdr, int flags)
+{
+ int n;
+
+ CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr, flags));
+ if ((n < 0) && (errno == EAGAIN))
+ return 0;
+ if (n >= 0)
+ return n;
+ else
+ return -errno;
+}
+
+int uml_vector_recvmsg(int fd, void *hdr, int flags)
+{
+ int n;
+
+ CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr, flags));
+ if ((n < 0) && (errno == EAGAIN))
+ return 0;
+ if (n >= 0)
+ return n;
+ else
+ return -errno;
+}
+
+int uml_vector_writev(int fd, void *hdr, int iovcount)
+{
+ int n;
+
+ CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount));
+ if ((n < 0) && (errno == EAGAIN))
+ return 0;
+ if (n >= 0)
+ return n;
+ else
+ return -errno;
+}
+
+int uml_vector_sendmmsg(
+ int fd,
+ void *msgvec,
+ unsigned int vlen,
+ unsigned int flags)
+{
+ int n;
+
+ CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
+ if ((n < 0) && (errno == EAGAIN))
+ return 0;
+ if (n >= 0)
+ return n;
+ else
+ return -errno;
+}
+
+int uml_vector_recvmmsg(
+ int fd,
+ void *msgvec,
+ unsigned int vlen,
+ unsigned int flags)
+{
+ int n;
+
+ CATCH_EINTR(
+ n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
+ if ((n < 0) && (errno == EAGAIN))
+ return 0;
+ if (n >= 0)
+ return n;
+ else
+ return -errno;
+}
+int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
+{
+ int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
+
+ if (err < 0)
+ printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
+ return err;
+}
+
+#define DEFAULT_BPF_LEN 6
+
+void *uml_vector_default_bpf(int fd, void *mac)
+{
+ struct sock_filter *bpf;
+ uint32_t *mac1 = (uint32_t *)(mac + 2);
+ uint16_t *mac2 = (uint16_t *) mac;
+ struct sock_fprog bpf_prog = {
+ .len = 6,
+ .filter = NULL,
+ };
+
+ bpf = uml_kmalloc(
+ sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
+ if (bpf != NULL) {
+ bpf_prog.filter = bpf;
+ /* ld [8] */
+ bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
+ /* jeq #0xMAC[2-6] jt 2 jf 5*/
+ bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
+ /* ldh [6] */
+ bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
+ /* jeq #0xMAC[0-1] jt 4 jf 5 */
+ bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
+ /* ret #0 */
+ bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
+ /* ret #0x40000 */
+ bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
+ if (uml_vector_attach_bpf(
+ fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
+ kfree(bpf);
+ bpf = NULL;
+ }
+ }
+ return bpf;
+}
+
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644
index 000000000000..d7cbff73b7ff
--- /dev/null
+++ b/arch/um/drivers/vector_user.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_USER_H
+#define __UM_VECTOR_USER_H
+
+#define MAXVARGS 20
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define TRANS_TAP "tap"
+#define TRANS_TAP_LEN strlen(TRANS_TAP)
+
+
+#define TRANS_GRE "gre"
+#define TRANS_GRE_LEN strlen(TRANS_RAW)
+
+#define TRANS_L2TPV3 "l2tpv3"
+#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
+
+#ifndef IPPROTO_GRE
+#define IPPROTO_GRE 0x2F
+#endif
+
+#define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */
+#define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */
+#define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */
+#define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */
+
+#define GRE_IRB cpu_to_be16(0x6558)
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+struct arglist {
+ int numargs;
+ char *tokens[MAXVARGS];
+ char *values[MAXVARGS];
+};
+
+/* Separating read and write FDs allows us to have different
+ * rx and tx method. Example - read tap via raw socket using
+ * recvmmsg, write using legacy tap write calls
+ */
+
+struct vector_fds {
+ int rx_fd;
+ int tx_fd;
+ void *remote_addr;
+ int remote_addr_size;
+};
+
+#define VECTOR_READ 1
+#define VECTOR_WRITE (1 < 1)
+#define VECTOR_HEADERS (1 < 2)
+
+extern struct arglist *uml_parse_vector_ifspec(char *arg);
+
+extern struct vector_fds *uml_vector_user_open(
+ int unit,
+ struct arglist *parsed
+);
+
+extern char *uml_vector_fetch_arg(
+ struct arglist *ifspec,
+ char *token
+);
+
+extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
+extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
+extern int uml_vector_writev(int fd, void *hdr, int iovcount);
+extern int uml_vector_sendmmsg(
+ int fd, void *msgvec,
+ unsigned int vlen,
+ unsigned int flags
+);
+extern int uml_vector_recvmmsg(
+ int fd,
+ void *msgvec,
+ unsigned int vlen,
+ unsigned int flags
+);
+extern void *uml_vector_default_bpf(int fd, void *mac);
+extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
+extern bool uml_raw_enable_qdisc_bypass(int fd);
+extern bool uml_raw_enable_vnet_headers(int fd);
+extern bool uml_tap_enable_vnet_headers(int fd);
+
+
+#endif
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..5898a26daa0d
--- /dev/null
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -0,0 +1 @@
+#include <asm-generic/asm-prototypes.h>
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index b5cdd3f91157..49ed3e35b35a 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -18,7 +18,19 @@
#define XTERM_IRQ 13
#define RANDOM_IRQ 14
+#ifdef CONFIG_UML_NET_VECTOR
+
+#define VECTOR_BASE_IRQ 15
+#define VECTOR_IRQ_SPACE 8
+
+#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
+
+#else
+
#define LAST_IRQ RANDOM_IRQ
+
+#endif
+
#define NR_IRQS (LAST_IRQ + 1)
#endif
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df5633053957..a7a6120f19d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -7,6 +7,7 @@
#define __IRQ_USER_H__
#include <sysdep/ptrace.h>
+#include <stdbool.h>
struct irq_fd {
struct irq_fd *next;
@@ -15,10 +16,17 @@ struct irq_fd {
int type;
int irq;
int events;
- int current_events;
+ bool active;
+ bool pending;
+ bool purge;
};
-enum { IRQ_READ, IRQ_WRITE };
+#define IRQ_READ 0
+#define IRQ_WRITE 1
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+
struct siginfo;
extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
index 012ac87d4900..40442b98b173 100644
--- a/arch/um/include/shared/net_kern.h
+++ b/arch/um/include/shared/net_kern.h
@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
char **mac_out, char **gate_addr);
extern void register_transport(struct transport *new);
extern unsigned short eth_protocol(struct sk_buff *skb);
+extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
+
#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index d8ddaf9790d2..048ae37eb5aa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -290,15 +290,16 @@ extern void halt_skas(void);
extern void reboot_skas(void);
/* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
-extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
- struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
-extern void os_free_irq_later(struct irq_fd *active_fds,
- int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
+extern int os_waiting_for_events_epoll(void);
+extern void *os_epoll_get_data_pointer(int index);
+extern int os_epoll_triggered(int index, int events);
+extern int os_event_mask(int irq_type);
+extern int os_setup_epoll(void);
+extern int os_add_epoll_fd(int events, int fd, void *data);
+extern int os_mod_epoll_fd(int events, int fd, void *data);
+extern int os_del_epoll_fd(int fd);
extern void os_set_ioignore(void);
+extern void os_close_epoll_fd(void);
/* sigio.c */
extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb9350d47e..6b7f3827d6e4 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
* Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -16,243 +18,362 @@
#include <as-layout.h>
#include <kern_util.h>
#include <os.h>
+#include <irq_user.h>
-/*
- * This list is accessed under irq_lock, except in sigio_handler,
- * where it is safe from being modified. IRQ handlers won't change it -
- * if an IRQ source has vanished, it will be freed by free_irqs just
- * before returning from sigio_handler. That will process a separate
- * list of irqs to free, with its own locking, coming back here to
- * remove list elements, taking the irq_lock to do so.
+
+/* When epoll triggers we do not know why it did so
+ * we can also have different IRQs for read and write.
+ * This is why we keep a small irq_fd array for each fd -
+ * one entry per IRQ type
*/
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
-extern void free_irqs(void);
+struct irq_entry {
+ struct irq_entry *next;
+ int fd;
+ struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+};
+
+static struct irq_entry *active_fds;
+
+static DEFINE_SPINLOCK(irq_lock);
+
+static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+{
+/*
+ * irq->active guards against reentry
+ * irq->pending accumulates pending requests
+ * if pending is raised the irq_handler is re-run
+ * until pending is cleared
+ */
+ if (irq->active) {
+ irq->active = false;
+ do {
+ irq->pending = false;
+ do_IRQ(irq->irq, regs);
+ } while (irq->pending && (!irq->purge));
+ if (!irq->purge)
+ irq->active = true;
+ } else {
+ irq->pending = true;
+ }
+}
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
- struct irq_fd *irq_fd;
- int n;
+ struct irq_entry *irq_entry;
+ struct irq_fd *irq;
+
+ int n, i, j;
while (1) {
- n = os_waiting_for_events(active_fds);
+ /* This is now lockless - epoll keeps back-referencesto the irqs
+ * which have trigger it so there is no need to walk the irq
+ * list and lock it every time. We avoid locking by turning off
+ * IO for a specific fd by executing os_del_epoll_fd(fd) before
+ * we do any changes to the actual data structures
+ */
+ n = os_waiting_for_events_epoll();
+
if (n <= 0) {
if (n == -EINTR)
continue;
- else break;
+ else
+ break;
}
- for (irq_fd = active_fds; irq_fd != NULL;
- irq_fd = irq_fd->next) {
- if (irq_fd->current_events != 0) {
- irq_fd->current_events = 0;
- do_IRQ(irq_fd->irq, regs);
+ for (i = 0; i < n ; i++) {
+ /* Epoll back reference is the entry with 3 irq_fd
+ * leaves - one for each irq type.
+ */
+ irq_entry = (struct irq_entry *)
+ os_epoll_get_data_pointer(i);
+ for (j = 0; j < MAX_IRQ_TYPE ; j++) {
+ irq = irq_entry->irq_array[j];
+ if (irq == NULL)
+ continue;
+ if (os_epoll_triggered(i, irq->events) > 0)
+ irq_io_loop(irq, regs);
+ if (irq->purge) {
+ irq_entry->irq_array[j] = NULL;
+ kfree(irq);
+ }
}
}
}
+}
+
+static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+{
+ int i;
+ int events = 0;
+ struct irq_fd *irq;
- free_irqs();
+ for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+ irq = irq_entry->irq_array[i];
+ if (irq != NULL)
+ events = irq->events | events;
+ }
+ if (events > 0) {
+ /* os_add_epoll will call os_mod_epoll if this already exists */
+ return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+ }
+ /* No events - delete */
+ return os_del_epoll_fd(irq_entry->fd);
}
-static DEFINE_SPINLOCK(irq_lock);
+
static int activate_fd(int irq, int fd, int type, void *dev_id)
{
- struct pollfd *tmp_pfd;
- struct irq_fd *new_fd, *irq_fd;
+ struct irq_fd *new_fd;
+ struct irq_entry *irq_entry;
+ int i, err, events;
unsigned long flags;
- int events, err, n;
err = os_set_fd_async(fd);
if (err < 0)
goto out;
- err = -ENOMEM;
- new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
- if (new_fd == NULL)
- goto out;
+ spin_lock_irqsave(&irq_lock, flags);
- if (type == IRQ_READ)
- events = UM_POLLIN | UM_POLLPRI;
- else events = UM_POLLOUT;
- *new_fd = ((struct irq_fd) { .next = NULL,
- .id = dev_id,
- .fd = fd,
- .type = type,
- .irq = irq,
- .events = events,
- .current_events = 0 } );
+ /* Check if we have an entry for this fd */
err = -EBUSY;
- spin_lock_irqsave(&irq_lock, flags);
- for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
- if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
- printk(KERN_ERR "Registering fd %d twice\n", fd);
- printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
- printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
- dev_id);
+ for (irq_entry = active_fds;
+ irq_entry != NULL; irq_entry = irq_entry->next) {
+ if (irq_entry->fd == fd)
+ break;
+ }
+
+ if (irq_entry == NULL) {
+ /* This needs to be atomic as it may be called from an
+ * IRQ context.
+ */
+ irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
+ if (irq_entry == NULL) {
+ printk(KERN_ERR
+ "Failed to allocate new IRQ entry\n");
goto out_unlock;
}
+ irq_entry->fd = fd;
+ for (i = 0; i < MAX_IRQ_TYPE; i++)
+ irq_entry->irq_array[i] = NULL;
+ irq_entry->next = active_fds;
+ active_fds = irq_entry;
}
- if (type == IRQ_WRITE)
- fd = -1;
-
- tmp_pfd = NULL;
- n = 0;
+ /* Check if we are trying to re-register an interrupt for a
+ * particular fd
+ */
- while (1) {
- n = os_create_pollfd(fd, events, tmp_pfd, n);
- if (n == 0)
- break;
+ if (irq_entry->irq_array[type] != NULL) {
+ printk(KERN_ERR
+ "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+ irq, fd, type, dev_id
+ );
+ goto out_unlock;
+ } else {
+ /* New entry for this fd */
+
+ err = -ENOMEM;
+ new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
+ if (new_fd == NULL)
+ goto out_unlock;
- /*
- * n > 0
- * It means we couldn't put new pollfd to current pollfds
- * and tmp_fds is NULL or too small for new pollfds array.
- * Needed size is equal to n as minimum.
- *
- * Here we have to drop the lock in order to call
- * kmalloc, which might sleep.
- * If something else came in and changed the pollfds array
- * so we will not be able to put new pollfd struct to pollfds
- * then we free the buffer tmp_fds and try again.
+ events = os_event_mask(type);
+
+ *new_fd = ((struct irq_fd) {
+ .id = dev_id,
+ .irq = irq,
+ .type = type,
+ .events = events,
+ .active = true,
+ .pending = false,
+ .purge = false
+ });
+ /* Turn off any IO on this fd - allows us to
+ * avoid locking the IRQ loop
*/
- spin_unlock_irqrestore(&irq_lock, flags);
- kfree(tmp_pfd);
-
- tmp_pfd = kmalloc(n, GFP_KERNEL);
- if (tmp_pfd == NULL)
- goto out_kfree;
-
- spin_lock_irqsave(&irq_lock, flags);
+ os_del_epoll_fd(irq_entry->fd);
+ irq_entry->irq_array[type] = new_fd;
}
- *last_irq_ptr = new_fd;
- last_irq_ptr = &new_fd->next;
-
+ /* Turn back IO on with the correct (new) IO event mask */
+ assign_epoll_events_to_irq(irq_entry);
spin_unlock_irqrestore(&irq_lock, flags);
-
- /*
- * This calls activate_fd, so it has to be outside the critical
- * section.
- */
- maybe_sigio_broken(fd, (type == IRQ_READ));
+ maybe_sigio_broken(fd, (type != IRQ_NONE));
return 0;
-
- out_unlock:
+out_unlock:
spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
- kfree(new_fd);
- out:
+out:
return err;
}
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+/*
+ * Walk the IRQ list and dispose of any unused entries.
+ * Should be done under irq_lock.
+ */
+
+static void garbage_collect_irq_entries(void)
{
- unsigned long flags;
+ int i;
+ bool reap;
+ struct irq_entry *walk;
+ struct irq_entry *previous = NULL;
+ struct irq_entry *to_free;
- spin_lock_irqsave(&irq_lock, flags);
- os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
- spin_unlock_irqrestore(&irq_lock, flags);
+ if (active_fds == NULL)
+ return;
+ walk = active_fds;
+ while (walk != NULL) {
+ reap = true;
+ for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+ if (walk->irq_array[i] != NULL) {
+ reap = false;
+ break;
+ }
+ }
+ if (reap) {
+ if (previous == NULL)
+ active_fds = walk->next;
+ else
+ previous->next = walk->next;
+ to_free = walk;
+ } else {
+ to_free = NULL;
+ }
+ walk = walk->next;
+ if (to_free != NULL)
+ kfree(to_free);
+ }
}
-struct irq_and_dev {
- int irq;
- void *dev;
-};
+/*
+ * Walk the IRQ list and get the descriptor for our FD
+ */
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
{
- struct irq_and_dev *data = d;
+ struct irq_entry *walk = active_fds;
- return ((irq->irq == data->irq) && (irq->id == data->dev));
+ while (walk != NULL) {
+ if (walk->fd == fd)
+ return walk;
+ walk = walk->next;
+ }
+ return NULL;
}
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
- struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq,
- .dev = dev });
- free_irq_by_cb(same_irq_and_dev, &data);
-}
+/*
+ * Walk the IRQ list and dispose of an entry for a specific
+ * device, fd and number. Note - if sharing an IRQ for read
+ * and writefor the same FD it will be disposed in either case.
+ * If this behaviour is undesirable use different IRQ ids.
+ */
-static int same_fd(struct irq_fd *irq, void *fd)
-{
- return (irq->fd == *((int *)fd));
-}
+#define IGNORE_IRQ 1
+#define IGNORE_DEV (1<<1)
-void free_irq_by_fd(int fd)
+static void do_free_by_irq_and_dev(
+ struct irq_entry *irq_entry,
+ unsigned int irq,
+ void *dev,
+ int flags
+)
{
- free_irq_by_cb(same_fd, &fd);
+ int i;
+ struct irq_fd *to_free;
+
+ for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+ if (irq_entry->irq_array[i] != NULL) {
+ if (
+ ((flags & IGNORE_IRQ) ||
+ (irq_entry->irq_array[i]->irq == irq)) &&
+ ((flags & IGNORE_DEV) ||
+ (irq_entry->irq_array[i]->id == dev))
+ ) {
+ /* Turn off any IO on this fd - allows us to
+ * avoid locking the IRQ loop
+ */
+ os_del_epoll_fd(irq_entry->fd);
+ to_free = irq_entry->irq_array[i];
+ irq_entry->irq_array[i] = NULL;
+ assign_epoll_events_to_irq(irq_entry);
+ if (to_free->active)
+ to_free->purge = true;
+ else
+ kfree(to_free);
+ }
+ }
+ }
}
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+void free_irq_by_fd(int fd)
{
- struct irq_fd *irq;
- int i = 0;
- int fdi;
+ struct irq_entry *to_free;
+ unsigned long flags;
- for (irq = active_fds; irq != NULL; irq = irq->next) {
- if ((irq->fd == fd) && (irq->irq == irqnum))
- break;
- i++;
- }
- if (irq == NULL) {
- printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
- fd);
- goto out;
- }
- fdi = os_get_pollfd(i);
- if ((fdi != -1) && (fdi != fd)) {
- printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
- "and pollfds, fd %d vs %d, need %d\n", irq->fd,
- fdi, fd);
- irq = NULL;
- goto out;
+ spin_lock_irqsave(&irq_lock, flags);
+ to_free = get_irq_entry_by_fd(fd);
+ if (to_free != NULL) {
+ do_free_by_irq_and_dev(
+ to_free,
+ -1,
+ NULL,
+ IGNORE_IRQ | IGNORE_DEV
+ );
}
- *index_out = i;
- out:
- return irq;
+ garbage_collect_irq_entries();
+ spin_unlock_irqrestore(&irq_lock, flags);
}
+EXPORT_SYMBOL(free_irq_by_fd);
-void reactivate_fd(int fd, int irqnum)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
{
- struct irq_fd *irq;
+ struct irq_entry *to_free;
unsigned long flags;
- int i;
spin_lock_irqsave(&irq_lock, flags);
- irq = find_irq_by_fd(fd, irqnum, &i);
- if (irq == NULL) {
- spin_unlock_irqrestore(&irq_lock, flags);
- return;
+ to_free = active_fds;
+ while (to_free != NULL) {
+ do_free_by_irq_and_dev(
+ to_free,
+ irq,
+ dev,
+ 0
+ );
+ to_free = to_free->next;
}
- os_set_pollfd(i, irq->fd);
+ garbage_collect_irq_entries();
spin_unlock_irqrestore(&irq_lock, flags);
+}
- add_sigio_fd(fd);
+
+void reactivate_fd(int fd, int irqnum)
+{
+ /** NOP - we do auto-EOI now **/
}
void deactivate_fd(int fd, int irqnum)
{
- struct irq_fd *irq;
+ struct irq_entry *to_free;
unsigned long flags;
- int i;
+ os_del_epoll_fd(fd);
spin_lock_irqsave(&irq_lock, flags);
- irq = find_irq_by_fd(fd, irqnum, &i);
- if (irq == NULL) {
- spin_unlock_irqrestore(&irq_lock, flags);
- return;
+ to_free = get_irq_entry_by_fd(fd);
+ if (to_free != NULL) {
+ do_free_by_irq_and_dev(
+ to_free,
+ irqnum,
+ NULL,
+ IGNORE_DEV
+ );
}
-
- os_set_pollfd(i, -1);
+ garbage_collect_irq_entries();
spin_unlock_irqrestore(&irq_lock, flags);
-
ignore_sigio_fd(fd);
}
EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd);
*/
int deactivate_all_fds(void)
{
- struct irq_fd *irq;
- int err;
+ unsigned long flags;
+ struct irq_entry *to_free;
- for (irq = active_fds; irq != NULL; irq = irq->next) {
- err = os_clear_fd_async(irq->fd);
- if (err)
- return err;
- }
- /* If there is a signal already queued, after unblocking ignore it */
+ spin_lock_irqsave(&irq_lock, flags);
+ /* Stop IO. The IRQ loop has no lock so this is our
+ * only way of making sure we are safe to dispose
+ * of all IRQ handlers
+ */
os_set_ioignore();
-
+ to_free = active_fds;
+ while (to_free != NULL) {
+ do_free_by_irq_and_dev(
+ to_free,
+ -1,
+ NULL,
+ IGNORE_IRQ | IGNORE_DEV
+ );
+ to_free = to_free->next;
+ }
+ garbage_collect_irq_entries();
+ spin_unlock_irqrestore(&irq_lock, flags);
+ os_close_epoll_fd();
return 0;
}
@@ -353,8 +485,11 @@ void __init init_IRQ(void)
irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
+
for (i = 1; i < NR_IRQS; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+ /* Initialize EPOLL Loop */
+ os_setup_epoll();
}
/*
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 7f69d17de354..052de4c8acb2 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -121,12 +121,12 @@ static void __init um_timer_setup(void)
clockevents_register_device(&timer_clockevent);
}
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
long long nsecs = os_persistent_clock_emulation();
- set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
- nsecs % NSEC_PER_SEC);
+ set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
+ nsecs % NSEC_PER_SEC);
}
void __init time_init(void)
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 2db18cbbb0ea..c0197097c86e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -12,6 +12,7 @@
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <sys/un.h>
#include <sys/types.h>
#include <os.h>
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74b79ad..365823010346 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,135 +1,147 @@
/*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
#include <stdlib.h>
#include <errno.h>
-#include <poll.h>
+#include <sys/epoll.h>
#include <signal.h>
#include <string.h>
#include <irq_user.h>
#include <os.h>
#include <um_malloc.h>
+/* Epoll support */
+
+static int epollfd = -1;
+
+#define MAX_EPOLL_EVENTS 64
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/* Helper to return an Epoll data pointer from an epoll event structure.
+ * We need to keep this one on the userspace side to keep includes separate
+ */
+
+void *os_epoll_get_data_pointer(int index)
+{
+ return epoll_events[index].data.ptr;
+}
+
+/* Helper to compare events versus the events in the epoll structure.
+ * Same as above - needs to be on the userspace side
+ */
+
+
+int os_epoll_triggered(int index, int events)
+{
+ return epoll_events[index].events & events;
+}
+/* Helper to set the event mask.
+ * The event mask is opaque to the kernel side, because it does not have
+ * access to the right includes/defines for EPOLL constants.
+ */
+
+int os_event_mask(int irq_type)
+{
+ if (irq_type == IRQ_READ)
+ return EPOLLIN | EPOLLPRI;
+ if (irq_type == IRQ_WRITE)
+ return EPOLLOUT;
+ return 0;
+}
+
/*
- * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd
- * and os_free_irq_by_cb, which are called under irq_lock.
+ * Initial Epoll Setup
*/
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
+int os_setup_epoll(void)
+{
+ epollfd = epoll_create(MAX_EPOLL_EVENTS);
+ return epollfd;
+}
-int os_waiting_for_events(struct irq_fd *active_fds)
+/*
+ * Helper to run the actual epoll_wait
+ */
+int os_waiting_for_events_epoll(void)
{
- struct irq_fd *irq_fd;
- int i, n, err;
+ int n, err;
- n = poll(pollfds, pollfds_num, 0);
+ n = epoll_wait(epollfd,
+ (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
if (n < 0) {
err = -errno;
if (errno != EINTR)
- printk(UM_KERN_ERR "os_waiting_for_events:"
- " poll returned %d, errno = %d\n", n, errno);
+ printk(
+ UM_KERN_ERR "os_waiting_for_events:"
+ " epoll returned %d, error = %s\n", n,
+ strerror(errno)
+ );
return err;
}
-
- if (n == 0)
- return 0;
-
- irq_fd = active_fds;
-
- for (i = 0; i < pollfds_num; i++) {
- if (pollfds[i].revents != 0) {
- irq_fd->current_events = pollfds[i].revents;
- pollfds[i].fd = -1;
- }
- irq_fd = irq_fd->next;
- }
return n;
}
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
- if (pollfds_num == pollfds_size) {
- if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
- /* return min size needed for new pollfds area */
- return (pollfds_size + 1) * sizeof(pollfds[0]);
- }
-
- if (pollfds != NULL) {
- memcpy(tmp_pfd, pollfds,
- sizeof(pollfds[0]) * pollfds_size);
- /* remove old pollfds */
- kfree(pollfds);
- }
- pollfds = tmp_pfd;
- pollfds_size++;
- } else
- kfree(tmp_pfd); /* remove not used tmp_pfd */
-
- pollfds[pollfds_num] = ((struct pollfd) { .fd = fd,
- .events = events,
- .revents = 0 });
- pollfds_num++;
-
- return 0;
-}
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
- struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
+/*
+ * Helper to add a fd to epoll
+ */
+int os_add_epoll_fd(int events, int fd, void *data)
{
- struct irq_fd **prev;
- int i = 0;
-
- prev = &active_fds;
- while (*prev != NULL) {
- if ((*test)(*prev, arg)) {
- struct irq_fd *old_fd = *prev;
- if ((pollfds[i].fd != -1) &&
- (pollfds[i].fd != (*prev)->fd)) {
- printk(UM_KERN_ERR "os_free_irq_by_cb - "
- "mismatch between active_fds and "
- "pollfds, fd %d vs %d\n",
- (*prev)->fd, pollfds[i].fd);
- goto out;
- }
-
- pollfds_num--;
-
- /*
- * This moves the *whole* array after pollfds[i]
- * (though it doesn't spot as such)!
- */
- memmove(&pollfds[i], &pollfds[i + 1],
- (pollfds_num - i) * sizeof(pollfds[0]));
- if (*last_irq_ptr2 == &old_fd->next)
- *last_irq_ptr2 = prev;
-
- *prev = (*prev)->next;
- if (old_fd->type == IRQ_WRITE)
- ignore_sigio_fd(old_fd->fd);
- kfree(old_fd);
- continue;
- }
- prev = &(*prev)->next;
- i++;
- }
- out:
- return;
+ struct epoll_event event;
+ int result;
+
+ event.data.ptr = data;
+ event.events = events | EPOLLET;
+ result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+ if ((result) && (errno == EEXIST))
+ result = os_mod_epoll_fd(events, fd, data);
+ if (result)
+ printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+ return result;
}
-int os_get_pollfd(int i)
+/*
+ * Helper to mod the fd event mask and/or data backreference
+ */
+int os_mod_epoll_fd(int events, int fd, void *data)
{
- return pollfds[i].fd;
+ struct epoll_event event;
+ int result;
+
+ event.data.ptr = data;
+ event.events = events;
+ result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+ if (result)
+ printk(UM_KERN_ERR
+ "epollctl mod err fd %d, %s\n", fd, strerror(errno));
+ return result;
}
-void os_set_pollfd(int i, int fd)
+/*
+ * Helper to delete the epoll fd
+ */
+int os_del_epoll_fd(int fd)
{
- pollfds[i].fd = fd;
+ struct epoll_event event;
+ int result;
+ /* This is quiet as we use this as IO ON/OFF - so it is often
+ * invoked on a non-existent fd
+ */
+ result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+ return result;
}
void os_set_ioignore(void)
{
signal(SIGIO, SIG_IGN);
}
+
+void os_close_epoll_fd(void)
+{
+ /* Needed so we do not leak an fd when rebooting */
+ os_close_file(epollfd);
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index a86d7cc2c2d8..bf0acb8aad8b 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -16,6 +16,7 @@
#include <os.h>
#include <sysdep/mcontext.h>
#include <um_malloc.h>
+#include <sys/ucontext.h>
void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
[SIGTRAP] = relay_signal,
@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
static void hard_handler(int sig, siginfo_t *si, void *p)
{
- struct ucontext *uc = p;
+ ucontext_t *uc = p;
mcontext_t *mc = &uc->uc_mcontext;
unsigned long pending = 1UL << sig;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d234cca296db..00fcf81f2c56 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86_64
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
select X86_DEV_DMA_OPS
+ select ARCH_HAS_SYSCALL_WRAPPER
#
# Arch settings
@@ -2008,6 +2009,9 @@ config KEXEC_FILE
for kernel and initramfs as opposed to list of segments as
accepted by previous system call.
+config ARCH_HAS_KEXEC_PURGATORY
+ def_bool KEXEC_FILE
+
config KEXEC_VERIFY_SIG
bool "Verify kernel signature during kexec_file_load() syscall"
depends on KEXEC_FILE
@@ -2760,11 +2764,9 @@ config OLPC_XO1_RTC
config OLPC_XO1_SCI
bool "OLPC XO-1 SCI extras"
- depends on OLPC && OLPC_XO1_PM
+ depends on OLPC && OLPC_XO1_PM && GPIO_CS5535=y
depends on INPUT=y
select POWER_SUPPLY
- select GPIO_CS5535
- select MFD_CORE
---help---
Add support for SCI-based features of the OLPC XO-1 laptop:
- EC-driven system wakeups
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 66e42a098d70..a0a50b91ecef 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -54,6 +54,9 @@ unsigned int ptrs_per_p4d __ro_after_init = 1;
extern unsigned long get_cmd_line_ptr(void);
+/* Used by PAGE_KERN* macros: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+
/* Simplified build-specific string for starting entropy. */
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index be63330c5511..352e70cd33e8 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -114,7 +114,9 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
+ xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
+ xorl %ecx, %ecx /* nospec cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
xorl %r8d, %r8d /* nospec r8 */
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 74f6eee15179..fbf6a6c3fd2d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -266,14 +266,13 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
}
#ifdef CONFIG_X86_64
-__visible void do_syscall_64(struct pt_regs *regs)
+__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
- struct thread_info *ti = current_thread_info();
- unsigned long nr = regs->orig_ax;
+ struct thread_info *ti;
enter_from_user_mode();
local_irq_enable();
-
+ ti = current_thread_info();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
@@ -282,11 +281,10 @@ __visible void do_syscall_64(struct pt_regs *regs)
* table. The only functional difference is the x32 bit in
* regs->orig_ax, which changes the behavior of some syscalls.
*/
- if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
- nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
- regs->ax = sys_call_table[nr](
- regs->di, regs->si, regs->dx,
- regs->r10, regs->r8, regs->r9);
+ nr &= __SYSCALL_MASK;
+ if (likely(nr < NR_syscalls)) {
+ nr = array_index_nospec(nr, NR_syscalls);
+ regs->ax = sys_call_table[nr](regs);
}
syscall_return_slowpath(regs);
@@ -321,6 +319,9 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
if (likely(nr < IA32_NR_syscalls)) {
nr = array_index_nospec(nr, IA32_NR_syscalls);
+#ifdef CONFIG_IA32_EMULATION
+ regs->ax = ia32_sys_call_table[nr](regs);
+#else
/*
* It's possible that a 32-bit syscall implementation
* takes a 64-bit parameter but nonetheless assumes that
@@ -331,6 +332,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
(unsigned int)regs->bx, (unsigned int)regs->cx,
(unsigned int)regs->dx, (unsigned int)regs->si,
(unsigned int)regs->di, (unsigned int)regs->bp);
+#endif /* CONFIG_IA32_EMULATION */
}
syscall_return_slowpath(regs);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b0a4649e55ce..3166b9674429 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -233,7 +233,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
TRACE_IRQS_OFF
/* IRQs are off. */
- movq %rsp, %rdi
+ movq %rax, %rdi
+ movq %rsp, %rsi
call do_syscall_64 /* returns with IRQs disabled */
TRACE_IRQS_IRETQ /* we're about to change IF */
@@ -913,7 +914,7 @@ ENTRY(\sym)
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
- .if \paranoid < 2
+ .if \paranoid == 1
testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
jnz .Lfrom_usermode_switch_stack_\@
.endif
@@ -960,7 +961,7 @@ ENTRY(\sym)
jmp error_exit
.endif
- .if \paranoid < 2
+ .if \paranoid == 1
/*
* Entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 08425c42f8b7..9af927e59d49 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -220,8 +220,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
+ xorl %esi, %esi /* nospec si */
pushq %rdx /* pt_regs->dx */
+ xorl %edx, %edx /* nospec dx */
pushq %rbp /* pt_regs->cx (stashed in bp) */
+ xorl %ecx, %ecx /* nospec cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
xorl %r8d, %r8d /* nospec r8 */
@@ -365,8 +368,11 @@ ENTRY(entry_INT80_compat)
pushq (%rdi) /* pt_regs->di */
pushq %rsi /* pt_regs->si */
+ xorl %esi, %esi /* nospec si */
pushq %rdx /* pt_regs->dx */
+ xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
+ xorl %ecx, %ecx /* nospec cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
xorl %r8d, %r8d /* nospec r8 */
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 95c294963612..aa3336a7cb15 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -7,14 +7,23 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#ifdef CONFIG_IA32_EMULATION
+/* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
+
+/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
+extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+
+#else /* CONFIG_IA32_EMULATION */
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#endif /* CONFIG_IA32_EMULATION */
+
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
-extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
__visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index c176d2fab1da..d5252bc1e380 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -7,14 +7,14 @@
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
+extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
-extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c58f75b088c5..d6b27dab1b30 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -4,390 +4,395 @@
# The format is:
# <number> <abi> <name> <entry point> <compat entry point>
#
+# The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for
+# sys_*() system calls and compat_sys_*() compat system calls if
+# IA32_EMULATION is defined, and expect struct pt_regs *regs as their only
+# parameter.
+#
# The abi is always "i386" for this file.
#
-0 i386 restart_syscall sys_restart_syscall
-1 i386 exit sys_exit
-2 i386 fork sys_fork
-3 i386 read sys_read
-4 i386 write sys_write
-5 i386 open sys_open compat_sys_open
-6 i386 close sys_close
-7 i386 waitpid sys_waitpid
-8 i386 creat sys_creat
-9 i386 link sys_link
-10 i386 unlink sys_unlink
-11 i386 execve sys_execve compat_sys_execve
-12 i386 chdir sys_chdir
-13 i386 time sys_time compat_sys_time
-14 i386 mknod sys_mknod
-15 i386 chmod sys_chmod
-16 i386 lchown sys_lchown16
+0 i386 restart_syscall sys_restart_syscall __ia32_sys_restart_syscall
+1 i386 exit sys_exit __ia32_sys_exit
+2 i386 fork sys_fork __ia32_sys_fork
+3 i386 read sys_read __ia32_sys_read
+4 i386 write sys_write __ia32_sys_write
+5 i386 open sys_open __ia32_compat_sys_open
+6 i386 close sys_close __ia32_sys_close
+7 i386 waitpid sys_waitpid __ia32_sys_waitpid
+8 i386 creat sys_creat __ia32_sys_creat
+9 i386 link sys_link __ia32_sys_link
+10 i386 unlink sys_unlink __ia32_sys_unlink
+11 i386 execve sys_execve __ia32_compat_sys_execve
+12 i386 chdir sys_chdir __ia32_sys_chdir
+13 i386 time sys_time __ia32_compat_sys_time
+14 i386 mknod sys_mknod __ia32_sys_mknod
+15 i386 chmod sys_chmod __ia32_sys_chmod
+16 i386 lchown sys_lchown16 __ia32_sys_lchown16
17 i386 break
-18 i386 oldstat sys_stat
-19 i386 lseek sys_lseek compat_sys_lseek
-20 i386 getpid sys_getpid
-21 i386 mount sys_mount compat_sys_mount
-22 i386 umount sys_oldumount
-23 i386 setuid sys_setuid16
-24 i386 getuid sys_getuid16
-25 i386 stime sys_stime compat_sys_stime
-26 i386 ptrace sys_ptrace compat_sys_ptrace
-27 i386 alarm sys_alarm
-28 i386 oldfstat sys_fstat
-29 i386 pause sys_pause
-30 i386 utime sys_utime compat_sys_utime
+18 i386 oldstat sys_stat __ia32_sys_stat
+19 i386 lseek sys_lseek __ia32_compat_sys_lseek
+20 i386 getpid sys_getpid __ia32_sys_getpid
+21 i386 mount sys_mount __ia32_compat_sys_mount
+22 i386 umount sys_oldumount __ia32_sys_oldumount
+23 i386 setuid sys_setuid16 __ia32_sys_setuid16
+24 i386 getuid sys_getuid16 __ia32_sys_getuid16
+25 i386 stime sys_stime __ia32_compat_sys_stime
+26 i386 ptrace sys_ptrace __ia32_compat_sys_ptrace
+27 i386 alarm sys_alarm __ia32_sys_alarm
+28 i386 oldfstat sys_fstat __ia32_sys_fstat
+29 i386 pause sys_pause __ia32_sys_pause
+30 i386 utime sys_utime __ia32_compat_sys_utime
31 i386 stty
32 i386 gtty
-33 i386 access sys_access
-34 i386 nice sys_nice
+33 i386 access sys_access __ia32_sys_access
+34 i386 nice sys_nice __ia32_sys_nice
35 i386 ftime
-36 i386 sync sys_sync
-37 i386 kill sys_kill
-38 i386 rename sys_rename
-39 i386 mkdir sys_mkdir
-40 i386 rmdir sys_rmdir
-41 i386 dup sys_dup
-42 i386 pipe sys_pipe
-43 i386 times sys_times compat_sys_times
+36 i386 sync sys_sync __ia32_sys_sync
+37 i386 kill sys_kill __ia32_sys_kill
+38 i386 rename sys_rename __ia32_sys_rename
+39 i386 mkdir sys_mkdir __ia32_sys_mkdir
+40 i386 rmdir sys_rmdir __ia32_sys_rmdir
+41 i386 dup sys_dup __ia32_sys_dup
+42 i386 pipe sys_pipe __ia32_sys_pipe
+43 i386 times sys_times __ia32_compat_sys_times
44 i386 prof
-45 i386 brk sys_brk
-46 i386 setgid sys_setgid16
-47 i386 getgid sys_getgid16
-48 i386 signal sys_signal
-49 i386 geteuid sys_geteuid16
-50 i386 getegid sys_getegid16
-51 i386 acct sys_acct
-52 i386 umount2 sys_umount
+45 i386 brk sys_brk __ia32_sys_brk
+46 i386 setgid sys_setgid16 __ia32_sys_setgid16
+47 i386 getgid sys_getgid16 __ia32_sys_getgid16
+48 i386 signal sys_signal __ia32_sys_signal
+49 i386 geteuid sys_geteuid16 __ia32_sys_geteuid16
+50 i386 getegid sys_getegid16 __ia32_sys_getegid16
+51 i386 acct sys_acct __ia32_sys_acct
+52 i386 umount2 sys_umount __ia32_sys_umount
53 i386 lock
-54 i386 ioctl sys_ioctl compat_sys_ioctl
-55 i386 fcntl sys_fcntl compat_sys_fcntl64
+54 i386 ioctl sys_ioctl __ia32_compat_sys_ioctl
+55 i386 fcntl sys_fcntl __ia32_compat_sys_fcntl64
56 i386 mpx
-57 i386 setpgid sys_setpgid
+57 i386 setpgid sys_setpgid __ia32_sys_setpgid
58 i386 ulimit
-59 i386 oldolduname sys_olduname
-60 i386 umask sys_umask
-61 i386 chroot sys_chroot
-62 i386 ustat sys_ustat compat_sys_ustat
-63 i386 dup2 sys_dup2
-64 i386 getppid sys_getppid
-65 i386 getpgrp sys_getpgrp
-66 i386 setsid sys_setsid
-67 i386 sigaction sys_sigaction compat_sys_sigaction
-68 i386 sgetmask sys_sgetmask
-69 i386 ssetmask sys_ssetmask
-70 i386 setreuid sys_setreuid16
-71 i386 setregid sys_setregid16
-72 i386 sigsuspend sys_sigsuspend
-73 i386 sigpending sys_sigpending compat_sys_sigpending
-74 i386 sethostname sys_sethostname
-75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
-76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
-77 i386 getrusage sys_getrusage compat_sys_getrusage
-78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
-79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
-80 i386 getgroups sys_getgroups16
-81 i386 setgroups sys_setgroups16
-82 i386 select sys_old_select compat_sys_old_select
-83 i386 symlink sys_symlink
-84 i386 oldlstat sys_lstat
-85 i386 readlink sys_readlink
-86 i386 uselib sys_uselib
-87 i386 swapon sys_swapon
-88 i386 reboot sys_reboot
-89 i386 readdir sys_old_readdir compat_sys_old_readdir
-90 i386 mmap sys_old_mmap compat_sys_x86_mmap
-91 i386 munmap sys_munmap
-92 i386 truncate sys_truncate compat_sys_truncate
-93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
-94 i386 fchmod sys_fchmod
-95 i386 fchown sys_fchown16
-96 i386 getpriority sys_getpriority
-97 i386 setpriority sys_setpriority
+59 i386 oldolduname sys_olduname __ia32_sys_olduname
+60 i386 umask sys_umask __ia32_sys_umask
+61 i386 chroot sys_chroot __ia32_sys_chroot
+62 i386 ustat sys_ustat __ia32_compat_sys_ustat
+63 i386 dup2 sys_dup2 __ia32_sys_dup2
+64 i386 getppid sys_getppid __ia32_sys_getppid
+65 i386 getpgrp sys_getpgrp __ia32_sys_getpgrp
+66 i386 setsid sys_setsid __ia32_sys_setsid
+67 i386 sigaction sys_sigaction __ia32_compat_sys_sigaction
+68 i386 sgetmask sys_sgetmask __ia32_sys_sgetmask
+69 i386 ssetmask sys_ssetmask __ia32_sys_ssetmask
+70 i386 setreuid sys_setreuid16 __ia32_sys_setreuid16
+71 i386 setregid sys_setregid16 __ia32_sys_setregid16
+72 i386 sigsuspend sys_sigsuspend __ia32_sys_sigsuspend
+73 i386 sigpending sys_sigpending __ia32_compat_sys_sigpending
+74 i386 sethostname sys_sethostname __ia32_sys_sethostname
+75 i386 setrlimit sys_setrlimit __ia32_compat_sys_setrlimit
+76 i386 getrlimit sys_old_getrlimit __ia32_compat_sys_old_getrlimit
+77 i386 getrusage sys_getrusage __ia32_compat_sys_getrusage
+78 i386 gettimeofday sys_gettimeofday __ia32_compat_sys_gettimeofday
+79 i386 settimeofday sys_settimeofday __ia32_compat_sys_settimeofday
+80 i386 getgroups sys_getgroups16 __ia32_sys_getgroups16
+81 i386 setgroups sys_setgroups16 __ia32_sys_setgroups16
+82 i386 select sys_old_select __ia32_compat_sys_old_select
+83 i386 symlink sys_symlink __ia32_sys_symlink
+84 i386 oldlstat sys_lstat __ia32_sys_lstat
+85 i386 readlink sys_readlink __ia32_sys_readlink
+86 i386 uselib sys_uselib __ia32_sys_uselib
+87 i386 swapon sys_swapon __ia32_sys_swapon
+88 i386 reboot sys_reboot __ia32_sys_reboot
+89 i386 readdir sys_old_readdir __ia32_compat_sys_old_readdir
+90 i386 mmap sys_old_mmap __ia32_compat_sys_x86_mmap
+91 i386 munmap sys_munmap __ia32_sys_munmap
+92 i386 truncate sys_truncate __ia32_compat_sys_truncate
+93 i386 ftruncate sys_ftruncate __ia32_compat_sys_ftruncate
+94 i386 fchmod sys_fchmod __ia32_sys_fchmod
+95 i386 fchown sys_fchown16 __ia32_sys_fchown16
+96 i386 getpriority sys_getpriority __ia32_sys_getpriority
+97 i386 setpriority sys_setpriority __ia32_sys_setpriority
98 i386 profil
-99 i386 statfs sys_statfs compat_sys_statfs
-100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
-101 i386 ioperm sys_ioperm
-102 i386 socketcall sys_socketcall compat_sys_socketcall
-103 i386 syslog sys_syslog
-104 i386 setitimer sys_setitimer compat_sys_setitimer
-105 i386 getitimer sys_getitimer compat_sys_getitimer
-106 i386 stat sys_newstat compat_sys_newstat
-107 i386 lstat sys_newlstat compat_sys_newlstat
-108 i386 fstat sys_newfstat compat_sys_newfstat
-109 i386 olduname sys_uname
-110 i386 iopl sys_iopl
-111 i386 vhangup sys_vhangup
+99 i386 statfs sys_statfs __ia32_compat_sys_statfs
+100 i386 fstatfs sys_fstatfs __ia32_compat_sys_fstatfs
+101 i386 ioperm sys_ioperm __ia32_sys_ioperm
+102 i386 socketcall sys_socketcall __ia32_compat_sys_socketcall
+103 i386 syslog sys_syslog __ia32_sys_syslog
+104 i386 setitimer sys_setitimer __ia32_compat_sys_setitimer
+105 i386 getitimer sys_getitimer __ia32_compat_sys_getitimer
+106 i386 stat sys_newstat __ia32_compat_sys_newstat
+107 i386 lstat sys_newlstat __ia32_compat_sys_newlstat
+108 i386 fstat sys_newfstat __ia32_compat_sys_newfstat
+109 i386 olduname sys_uname __ia32_sys_uname
+110 i386 iopl sys_iopl __ia32_sys_iopl
+111 i386 vhangup sys_vhangup __ia32_sys_vhangup
112 i386 idle
113 i386 vm86old sys_vm86old sys_ni_syscall
-114 i386 wait4 sys_wait4 compat_sys_wait4
-115 i386 swapoff sys_swapoff
-116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
-117 i386 ipc sys_ipc compat_sys_ipc
-118 i386 fsync sys_fsync
+114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4
+115 i386 swapoff sys_swapoff __ia32_sys_swapoff
+116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo
+117 i386 ipc sys_ipc __ia32_compat_sys_ipc
+118 i386 fsync sys_fsync __ia32_sys_fsync
119 i386 sigreturn sys_sigreturn sys32_sigreturn
-120 i386 clone sys_clone compat_sys_x86_clone
-121 i386 setdomainname sys_setdomainname
-122 i386 uname sys_newuname
-123 i386 modify_ldt sys_modify_ldt
-124 i386 adjtimex sys_adjtimex compat_sys_adjtimex
-125 i386 mprotect sys_mprotect
-126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+120 i386 clone sys_clone __ia32_compat_sys_x86_clone
+121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname
+122 i386 uname sys_newuname __ia32_sys_newuname
+123 i386 modify_ldt sys_modify_ldt __ia32_sys_modify_ldt
+124 i386 adjtimex sys_adjtimex __ia32_compat_sys_adjtimex
+125 i386 mprotect sys_mprotect __ia32_sys_mprotect
+126 i386 sigprocmask sys_sigprocmask __ia32_compat_sys_sigprocmask
127 i386 create_module
-128 i386 init_module sys_init_module
-129 i386 delete_module sys_delete_module
+128 i386 init_module sys_init_module __ia32_sys_init_module
+129 i386 delete_module sys_delete_module __ia32_sys_delete_module
130 i386 get_kernel_syms
-131 i386 quotactl sys_quotactl compat_sys_quotactl32
-132 i386 getpgid sys_getpgid
-133 i386 fchdir sys_fchdir
-134 i386 bdflush sys_bdflush
-135 i386 sysfs sys_sysfs
-136 i386 personality sys_personality
+131 i386 quotactl sys_quotactl __ia32_compat_sys_quotactl32
+132 i386 getpgid sys_getpgid __ia32_sys_getpgid
+133 i386 fchdir sys_fchdir __ia32_sys_fchdir
+134 i386 bdflush sys_bdflush __ia32_sys_bdflush
+135 i386 sysfs sys_sysfs __ia32_sys_sysfs
+136 i386 personality sys_personality __ia32_sys_personality
137 i386 afs_syscall
-138 i386 setfsuid sys_setfsuid16
-139 i386 setfsgid sys_setfsgid16
-140 i386 _llseek sys_llseek
-141 i386 getdents sys_getdents compat_sys_getdents
-142 i386 _newselect sys_select compat_sys_select
-143 i386 flock sys_flock
-144 i386 msync sys_msync
-145 i386 readv sys_readv compat_sys_readv
-146 i386 writev sys_writev compat_sys_writev
-147 i386 getsid sys_getsid
-148 i386 fdatasync sys_fdatasync
-149 i386 _sysctl sys_sysctl compat_sys_sysctl
-150 i386 mlock sys_mlock
-151 i386 munlock sys_munlock
-152 i386 mlockall sys_mlockall
-153 i386 munlockall sys_munlockall
-154 i386 sched_setparam sys_sched_setparam
-155 i386 sched_getparam sys_sched_getparam
-156 i386 sched_setscheduler sys_sched_setscheduler
-157 i386 sched_getscheduler sys_sched_getscheduler
-158 i386 sched_yield sys_sched_yield
-159 i386 sched_get_priority_max sys_sched_get_priority_max
-160 i386 sched_get_priority_min sys_sched_get_priority_min
-161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
-162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
-163 i386 mremap sys_mremap
-164 i386 setresuid sys_setresuid16
-165 i386 getresuid sys_getresuid16
+138 i386 setfsuid sys_setfsuid16 __ia32_sys_setfsuid16
+139 i386 setfsgid sys_setfsgid16 __ia32_sys_setfsgid16
+140 i386 _llseek sys_llseek __ia32_sys_llseek
+141 i386 getdents sys_getdents __ia32_compat_sys_getdents
+142 i386 _newselect sys_select __ia32_compat_sys_select
+143 i386 flock sys_flock __ia32_sys_flock
+144 i386 msync sys_msync __ia32_sys_msync
+145 i386 readv sys_readv __ia32_compat_sys_readv
+146 i386 writev sys_writev __ia32_compat_sys_writev
+147 i386 getsid sys_getsid __ia32_sys_getsid
+148 i386 fdatasync sys_fdatasync __ia32_sys_fdatasync
+149 i386 _sysctl sys_sysctl __ia32_compat_sys_sysctl
+150 i386 mlock sys_mlock __ia32_sys_mlock
+151 i386 munlock sys_munlock __ia32_sys_munlock
+152 i386 mlockall sys_mlockall __ia32_sys_mlockall
+153 i386 munlockall sys_munlockall __ia32_sys_munlockall
+154 i386 sched_setparam sys_sched_setparam __ia32_sys_sched_setparam
+155 i386 sched_getparam sys_sched_getparam __ia32_sys_sched_getparam
+156 i386 sched_setscheduler sys_sched_setscheduler __ia32_sys_sched_setscheduler
+157 i386 sched_getscheduler sys_sched_getscheduler __ia32_sys_sched_getscheduler
+158 i386 sched_yield sys_sched_yield __ia32_sys_sched_yield
+159 i386 sched_get_priority_max sys_sched_get_priority_max __ia32_sys_sched_get_priority_max
+160 i386 sched_get_priority_min sys_sched_get_priority_min __ia32_sys_sched_get_priority_min
+161 i386 sched_rr_get_interval sys_sched_rr_get_interval __ia32_compat_sys_sched_rr_get_interval
+162 i386 nanosleep sys_nanosleep __ia32_compat_sys_nanosleep
+163 i386 mremap sys_mremap __ia32_sys_mremap
+164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16
+165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16
166 i386 vm86 sys_vm86 sys_ni_syscall
167 i386 query_module
-168 i386 poll sys_poll
+168 i386 poll sys_poll __ia32_sys_poll
169 i386 nfsservctl
-170 i386 setresgid sys_setresgid16
-171 i386 getresgid sys_getresgid16
-172 i386 prctl sys_prctl
+170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16
+171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16
+172 i386 prctl sys_prctl __ia32_sys_prctl
173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
-174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
-175 i386 rt_sigprocmask sys_rt_sigprocmask
-176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
-177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
-178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
-179 i386 rt_sigsuspend sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 compat_sys_x86_pread
-181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite
-182 i386 chown sys_chown16
-183 i386 getcwd sys_getcwd
-184 i386 capget sys_capget
-185 i386 capset sys_capset
-186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
-187 i386 sendfile sys_sendfile compat_sys_sendfile
+174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
+175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask
+176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
+177 i386 rt_sigtimedwait sys_rt_sigtimedwait __ia32_compat_sys_rt_sigtimedwait
+178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
+179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend
+180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
+181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
+182 i386 chown sys_chown16 __ia32_sys_chown16
+183 i386 getcwd sys_getcwd __ia32_sys_getcwd
+184 i386 capget sys_capget __ia32_sys_capget
+185 i386 capset sys_capset __ia32_sys_capset
+186 i386 sigaltstack sys_sigaltstack __ia32_compat_sys_sigaltstack
+187 i386 sendfile sys_sendfile __ia32_compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork
-191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
-192 i386 mmap2 sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64
-194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64
-195 i386 stat64 sys_stat64 compat_sys_x86_stat64
-196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64
-197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64
-198 i386 lchown32 sys_lchown
-199 i386 getuid32 sys_getuid
-200 i386 getgid32 sys_getgid
-201 i386 geteuid32 sys_geteuid
-202 i386 getegid32 sys_getegid
-203 i386 setreuid32 sys_setreuid
-204 i386 setregid32 sys_setregid
-205 i386 getgroups32 sys_getgroups
-206 i386 setgroups32 sys_setgroups
-207 i386 fchown32 sys_fchown
-208 i386 setresuid32 sys_setresuid
-209 i386 getresuid32 sys_getresuid
-210 i386 setresgid32 sys_setresgid
-211 i386 getresgid32 sys_getresgid
-212 i386 chown32 sys_chown
-213 i386 setuid32 sys_setuid
-214 i386 setgid32 sys_setgid
-215 i386 setfsuid32 sys_setfsuid
-216 i386 setfsgid32 sys_setfsgid
-217 i386 pivot_root sys_pivot_root
-218 i386 mincore sys_mincore
-219 i386 madvise sys_madvise
-220 i386 getdents64 sys_getdents64
-221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
+190 i386 vfork sys_vfork __ia32_sys_vfork
+191 i386 ugetrlimit sys_getrlimit __ia32_compat_sys_getrlimit
+192 i386 mmap2 sys_mmap_pgoff __ia32_sys_mmap_pgoff
+193 i386 truncate64 sys_truncate64 __ia32_compat_sys_x86_truncate64
+194 i386 ftruncate64 sys_ftruncate64 __ia32_compat_sys_x86_ftruncate64
+195 i386 stat64 sys_stat64 __ia32_compat_sys_x86_stat64
+196 i386 lstat64 sys_lstat64 __ia32_compat_sys_x86_lstat64
+197 i386 fstat64 sys_fstat64 __ia32_compat_sys_x86_fstat64
+198 i386 lchown32 sys_lchown __ia32_sys_lchown
+199 i386 getuid32 sys_getuid __ia32_sys_getuid
+200 i386 getgid32 sys_getgid __ia32_sys_getgid
+201 i386 geteuid32 sys_geteuid __ia32_sys_geteuid
+202 i386 getegid32 sys_getegid __ia32_sys_getegid
+203 i386 setreuid32 sys_setreuid __ia32_sys_setreuid
+204 i386 setregid32 sys_setregid __ia32_sys_setregid
+205 i386 getgroups32 sys_getgroups __ia32_sys_getgroups
+206 i386 setgroups32 sys_setgroups __ia32_sys_setgroups
+207 i386 fchown32 sys_fchown __ia32_sys_fchown
+208 i386 setresuid32 sys_setresuid __ia32_sys_setresuid
+209 i386 getresuid32 sys_getresuid __ia32_sys_getresuid
+210 i386 setresgid32 sys_setresgid __ia32_sys_setresgid
+211 i386 getresgid32 sys_getresgid __ia32_sys_getresgid
+212 i386 chown32 sys_chown __ia32_sys_chown
+213 i386 setuid32 sys_setuid __ia32_sys_setuid
+214 i386 setgid32 sys_setgid __ia32_sys_setgid
+215 i386 setfsuid32 sys_setfsuid __ia32_sys_setfsuid
+216 i386 setfsgid32 sys_setfsgid __ia32_sys_setfsgid
+217 i386 pivot_root sys_pivot_root __ia32_sys_pivot_root
+218 i386 mincore sys_mincore __ia32_sys_mincore
+219 i386 madvise sys_madvise __ia32_sys_madvise
+220 i386 getdents64 sys_getdents64 __ia32_sys_getdents64
+221 i386 fcntl64 sys_fcntl64 __ia32_compat_sys_fcntl64
# 222 is unused
# 223 is unused
-224 i386 gettid sys_gettid
-225 i386 readahead sys_readahead compat_sys_x86_readahead
-226 i386 setxattr sys_setxattr
-227 i386 lsetxattr sys_lsetxattr
-228 i386 fsetxattr sys_fsetxattr
-229 i386 getxattr sys_getxattr
-230 i386 lgetxattr sys_lgetxattr
-231 i386 fgetxattr sys_fgetxattr
-232 i386 listxattr sys_listxattr
-233 i386 llistxattr sys_llistxattr
-234 i386 flistxattr sys_flistxattr
-235 i386 removexattr sys_removexattr
-236 i386 lremovexattr sys_lremovexattr
-237 i386 fremovexattr sys_fremovexattr
-238 i386 tkill sys_tkill
-239 i386 sendfile64 sys_sendfile64
-240 i386 futex sys_futex compat_sys_futex
-241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
-242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
-243 i386 set_thread_area sys_set_thread_area
-244 i386 get_thread_area sys_get_thread_area
-245 i386 io_setup sys_io_setup compat_sys_io_setup
-246 i386 io_destroy sys_io_destroy
-247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
-248 i386 io_submit sys_io_submit compat_sys_io_submit
-249 i386 io_cancel sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64
+224 i386 gettid sys_gettid __ia32_sys_gettid
+225 i386 readahead sys_readahead __ia32_compat_sys_x86_readahead
+226 i386 setxattr sys_setxattr __ia32_sys_setxattr
+227 i386 lsetxattr sys_lsetxattr __ia32_sys_lsetxattr
+228 i386 fsetxattr sys_fsetxattr __ia32_sys_fsetxattr
+229 i386 getxattr sys_getxattr __ia32_sys_getxattr
+230 i386 lgetxattr sys_lgetxattr __ia32_sys_lgetxattr
+231 i386 fgetxattr sys_fgetxattr __ia32_sys_fgetxattr
+232 i386 listxattr sys_listxattr __ia32_sys_listxattr
+233 i386 llistxattr sys_llistxattr __ia32_sys_llistxattr
+234 i386 flistxattr sys_flistxattr __ia32_sys_flistxattr
+235 i386 removexattr sys_removexattr __ia32_sys_removexattr
+236 i386 lremovexattr sys_lremovexattr __ia32_sys_lremovexattr
+237 i386 fremovexattr sys_fremovexattr __ia32_sys_fremovexattr
+238 i386 tkill sys_tkill __ia32_sys_tkill
+239 i386 sendfile64 sys_sendfile64 __ia32_sys_sendfile64
+240 i386 futex sys_futex __ia32_compat_sys_futex
+241 i386 sched_setaffinity sys_sched_setaffinity __ia32_compat_sys_sched_setaffinity
+242 i386 sched_getaffinity sys_sched_getaffinity __ia32_compat_sys_sched_getaffinity
+243 i386 set_thread_area sys_set_thread_area __ia32_sys_set_thread_area
+244 i386 get_thread_area sys_get_thread_area __ia32_sys_get_thread_area
+245 i386 io_setup sys_io_setup __ia32_compat_sys_io_setup
+246 i386 io_destroy sys_io_destroy __ia32_sys_io_destroy
+247 i386 io_getevents sys_io_getevents __ia32_compat_sys_io_getevents
+248 i386 io_submit sys_io_submit __ia32_compat_sys_io_submit
+249 i386 io_cancel sys_io_cancel __ia32_sys_io_cancel
+250 i386 fadvise64 sys_fadvise64 __ia32_compat_sys_x86_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
-252 i386 exit_group sys_exit_group
-253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
-254 i386 epoll_create sys_epoll_create
-255 i386 epoll_ctl sys_epoll_ctl
-256 i386 epoll_wait sys_epoll_wait
-257 i386 remap_file_pages sys_remap_file_pages
-258 i386 set_tid_address sys_set_tid_address
-259 i386 timer_create sys_timer_create compat_sys_timer_create
-260 i386 timer_settime sys_timer_settime compat_sys_timer_settime
-261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime
-262 i386 timer_getoverrun sys_timer_getoverrun
-263 i386 timer_delete sys_timer_delete
-264 i386 clock_settime sys_clock_settime compat_sys_clock_settime
-265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime
-266 i386 clock_getres sys_clock_getres compat_sys_clock_getres
-267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
-268 i386 statfs64 sys_statfs64 compat_sys_statfs64
-269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
-270 i386 tgkill sys_tgkill
-271 i386 utimes sys_utimes compat_sys_utimes
-272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64
+252 i386 exit_group sys_exit_group __ia32_sys_exit_group
+253 i386 lookup_dcookie sys_lookup_dcookie __ia32_compat_sys_lookup_dcookie
+254 i386 epoll_create sys_epoll_create __ia32_sys_epoll_create
+255 i386 epoll_ctl sys_epoll_ctl __ia32_sys_epoll_ctl
+256 i386 epoll_wait sys_epoll_wait __ia32_sys_epoll_wait
+257 i386 remap_file_pages sys_remap_file_pages __ia32_sys_remap_file_pages
+258 i386 set_tid_address sys_set_tid_address __ia32_sys_set_tid_address
+259 i386 timer_create sys_timer_create __ia32_compat_sys_timer_create
+260 i386 timer_settime sys_timer_settime __ia32_compat_sys_timer_settime
+261 i386 timer_gettime sys_timer_gettime __ia32_compat_sys_timer_gettime
+262 i386 timer_getoverrun sys_timer_getoverrun __ia32_sys_timer_getoverrun
+263 i386 timer_delete sys_timer_delete __ia32_sys_timer_delete
+264 i386 clock_settime sys_clock_settime __ia32_compat_sys_clock_settime
+265 i386 clock_gettime sys_clock_gettime __ia32_compat_sys_clock_gettime
+266 i386 clock_getres sys_clock_getres __ia32_compat_sys_clock_getres
+267 i386 clock_nanosleep sys_clock_nanosleep __ia32_compat_sys_clock_nanosleep
+268 i386 statfs64 sys_statfs64 __ia32_compat_sys_statfs64
+269 i386 fstatfs64 sys_fstatfs64 __ia32_compat_sys_fstatfs64
+270 i386 tgkill sys_tgkill __ia32_sys_tgkill
+271 i386 utimes sys_utimes __ia32_compat_sys_utimes
+272 i386 fadvise64_64 sys_fadvise64_64 __ia32_compat_sys_x86_fadvise64_64
273 i386 vserver
-274 i386 mbind sys_mbind
-275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
-276 i386 set_mempolicy sys_set_mempolicy
-277 i386 mq_open sys_mq_open compat_sys_mq_open
-278 i386 mq_unlink sys_mq_unlink
-279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
-280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
-281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
-282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
-283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
-284 i386 waitid sys_waitid compat_sys_waitid
+274 i386 mbind sys_mbind __ia32_sys_mbind
+275 i386 get_mempolicy sys_get_mempolicy __ia32_compat_sys_get_mempolicy
+276 i386 set_mempolicy sys_set_mempolicy __ia32_sys_set_mempolicy
+277 i386 mq_open sys_mq_open __ia32_compat_sys_mq_open
+278 i386 mq_unlink sys_mq_unlink __ia32_sys_mq_unlink
+279 i386 mq_timedsend sys_mq_timedsend __ia32_compat_sys_mq_timedsend
+280 i386 mq_timedreceive sys_mq_timedreceive __ia32_compat_sys_mq_timedreceive
+281 i386 mq_notify sys_mq_notify __ia32_compat_sys_mq_notify
+282 i386 mq_getsetattr sys_mq_getsetattr __ia32_compat_sys_mq_getsetattr
+283 i386 kexec_load sys_kexec_load __ia32_compat_sys_kexec_load
+284 i386 waitid sys_waitid __ia32_compat_sys_waitid
# 285 sys_setaltroot
-286 i386 add_key sys_add_key
-287 i386 request_key sys_request_key
-288 i386 keyctl sys_keyctl compat_sys_keyctl
-289 i386 ioprio_set sys_ioprio_set
-290 i386 ioprio_get sys_ioprio_get
-291 i386 inotify_init sys_inotify_init
-292 i386 inotify_add_watch sys_inotify_add_watch
-293 i386 inotify_rm_watch sys_inotify_rm_watch
-294 i386 migrate_pages sys_migrate_pages
-295 i386 openat sys_openat compat_sys_openat
-296 i386 mkdirat sys_mkdirat
-297 i386 mknodat sys_mknodat
-298 i386 fchownat sys_fchownat
-299 i386 futimesat sys_futimesat compat_sys_futimesat
-300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat
-301 i386 unlinkat sys_unlinkat
-302 i386 renameat sys_renameat
-303 i386 linkat sys_linkat
-304 i386 symlinkat sys_symlinkat
-305 i386 readlinkat sys_readlinkat
-306 i386 fchmodat sys_fchmodat
-307 i386 faccessat sys_faccessat
-308 i386 pselect6 sys_pselect6 compat_sys_pselect6
-309 i386 ppoll sys_ppoll compat_sys_ppoll
-310 i386 unshare sys_unshare
-311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
-312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
-313 i386 splice sys_splice
-314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range
-315 i386 tee sys_tee
-316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
-317 i386 move_pages sys_move_pages compat_sys_move_pages
-318 i386 getcpu sys_getcpu
-319 i386 epoll_pwait sys_epoll_pwait
-320 i386 utimensat sys_utimensat compat_sys_utimensat
-321 i386 signalfd sys_signalfd compat_sys_signalfd
-322 i386 timerfd_create sys_timerfd_create
-323 i386 eventfd sys_eventfd
-324 i386 fallocate sys_fallocate compat_sys_x86_fallocate
-325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
-326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
-327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
-328 i386 eventfd2 sys_eventfd2
-329 i386 epoll_create1 sys_epoll_create1
-330 i386 dup3 sys_dup3
-331 i386 pipe2 sys_pipe2
-332 i386 inotify_init1 sys_inotify_init1
-333 i386 preadv sys_preadv compat_sys_preadv
-334 i386 pwritev sys_pwritev compat_sys_pwritev
-335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
-336 i386 perf_event_open sys_perf_event_open
-337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg
-338 i386 fanotify_init sys_fanotify_init
-339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
-340 i386 prlimit64 sys_prlimit64
-341 i386 name_to_handle_at sys_name_to_handle_at
-342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
-343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
-344 i386 syncfs sys_syncfs
-345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
-346 i386 setns sys_setns
-347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
-349 i386 kcmp sys_kcmp
-350 i386 finit_module sys_finit_module
-351 i386 sched_setattr sys_sched_setattr
-352 i386 sched_getattr sys_sched_getattr
-353 i386 renameat2 sys_renameat2
-354 i386 seccomp sys_seccomp
-355 i386 getrandom sys_getrandom
-356 i386 memfd_create sys_memfd_create
-357 i386 bpf sys_bpf
-358 i386 execveat sys_execveat compat_sys_execveat
-359 i386 socket sys_socket
-360 i386 socketpair sys_socketpair
-361 i386 bind sys_bind
-362 i386 connect sys_connect
-363 i386 listen sys_listen
-364 i386 accept4 sys_accept4
-365 i386 getsockopt sys_getsockopt compat_sys_getsockopt
-366 i386 setsockopt sys_setsockopt compat_sys_setsockopt
-367 i386 getsockname sys_getsockname
-368 i386 getpeername sys_getpeername
-369 i386 sendto sys_sendto
-370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
-371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
-372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
-373 i386 shutdown sys_shutdown
-374 i386 userfaultfd sys_userfaultfd
-375 i386 membarrier sys_membarrier
-376 i386 mlock2 sys_mlock2
-377 i386 copy_file_range sys_copy_file_range
-378 i386 preadv2 sys_preadv2 compat_sys_preadv2
-379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
-380 i386 pkey_mprotect sys_pkey_mprotect
-381 i386 pkey_alloc sys_pkey_alloc
-382 i386 pkey_free sys_pkey_free
-383 i386 statx sys_statx
-384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+286 i386 add_key sys_add_key __ia32_sys_add_key
+287 i386 request_key sys_request_key __ia32_sys_request_key
+288 i386 keyctl sys_keyctl __ia32_compat_sys_keyctl
+289 i386 ioprio_set sys_ioprio_set __ia32_sys_ioprio_set
+290 i386 ioprio_get sys_ioprio_get __ia32_sys_ioprio_get
+291 i386 inotify_init sys_inotify_init __ia32_sys_inotify_init
+292 i386 inotify_add_watch sys_inotify_add_watch __ia32_sys_inotify_add_watch
+293 i386 inotify_rm_watch sys_inotify_rm_watch __ia32_sys_inotify_rm_watch
+294 i386 migrate_pages sys_migrate_pages __ia32_sys_migrate_pages
+295 i386 openat sys_openat __ia32_compat_sys_openat
+296 i386 mkdirat sys_mkdirat __ia32_sys_mkdirat
+297 i386 mknodat sys_mknodat __ia32_sys_mknodat
+298 i386 fchownat sys_fchownat __ia32_sys_fchownat
+299 i386 futimesat sys_futimesat __ia32_compat_sys_futimesat
+300 i386 fstatat64 sys_fstatat64 __ia32_compat_sys_x86_fstatat
+301 i386 unlinkat sys_unlinkat __ia32_sys_unlinkat
+302 i386 renameat sys_renameat __ia32_sys_renameat
+303 i386 linkat sys_linkat __ia32_sys_linkat
+304 i386 symlinkat sys_symlinkat __ia32_sys_symlinkat
+305 i386 readlinkat sys_readlinkat __ia32_sys_readlinkat
+306 i386 fchmodat sys_fchmodat __ia32_sys_fchmodat
+307 i386 faccessat sys_faccessat __ia32_sys_faccessat
+308 i386 pselect6 sys_pselect6 __ia32_compat_sys_pselect6
+309 i386 ppoll sys_ppoll __ia32_compat_sys_ppoll
+310 i386 unshare sys_unshare __ia32_sys_unshare
+311 i386 set_robust_list sys_set_robust_list __ia32_compat_sys_set_robust_list
+312 i386 get_robust_list sys_get_robust_list __ia32_compat_sys_get_robust_list
+313 i386 splice sys_splice __ia32_sys_splice
+314 i386 sync_file_range sys_sync_file_range __ia32_compat_sys_x86_sync_file_range
+315 i386 tee sys_tee __ia32_sys_tee
+316 i386 vmsplice sys_vmsplice __ia32_compat_sys_vmsplice
+317 i386 move_pages sys_move_pages __ia32_compat_sys_move_pages
+318 i386 getcpu sys_getcpu __ia32_sys_getcpu
+319 i386 epoll_pwait sys_epoll_pwait __ia32_sys_epoll_pwait
+320 i386 utimensat sys_utimensat __ia32_compat_sys_utimensat
+321 i386 signalfd sys_signalfd __ia32_compat_sys_signalfd
+322 i386 timerfd_create sys_timerfd_create __ia32_sys_timerfd_create
+323 i386 eventfd sys_eventfd __ia32_sys_eventfd
+324 i386 fallocate sys_fallocate __ia32_compat_sys_x86_fallocate
+325 i386 timerfd_settime sys_timerfd_settime __ia32_compat_sys_timerfd_settime
+326 i386 timerfd_gettime sys_timerfd_gettime __ia32_compat_sys_timerfd_gettime
+327 i386 signalfd4 sys_signalfd4 __ia32_compat_sys_signalfd4
+328 i386 eventfd2 sys_eventfd2 __ia32_sys_eventfd2
+329 i386 epoll_create1 sys_epoll_create1 __ia32_sys_epoll_create1
+330 i386 dup3 sys_dup3 __ia32_sys_dup3
+331 i386 pipe2 sys_pipe2 __ia32_sys_pipe2
+332 i386 inotify_init1 sys_inotify_init1 __ia32_sys_inotify_init1
+333 i386 preadv sys_preadv __ia32_compat_sys_preadv
+334 i386 pwritev sys_pwritev __ia32_compat_sys_pwritev
+335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo __ia32_compat_sys_rt_tgsigqueueinfo
+336 i386 perf_event_open sys_perf_event_open __ia32_sys_perf_event_open
+337 i386 recvmmsg sys_recvmmsg __ia32_compat_sys_recvmmsg
+338 i386 fanotify_init sys_fanotify_init __ia32_sys_fanotify_init
+339 i386 fanotify_mark sys_fanotify_mark __ia32_compat_sys_fanotify_mark
+340 i386 prlimit64 sys_prlimit64 __ia32_sys_prlimit64
+341 i386 name_to_handle_at sys_name_to_handle_at __ia32_sys_name_to_handle_at
+342 i386 open_by_handle_at sys_open_by_handle_at __ia32_compat_sys_open_by_handle_at
+343 i386 clock_adjtime sys_clock_adjtime __ia32_compat_sys_clock_adjtime
+344 i386 syncfs sys_syncfs __ia32_sys_syncfs
+345 i386 sendmmsg sys_sendmmsg __ia32_compat_sys_sendmmsg
+346 i386 setns sys_setns __ia32_sys_setns
+347 i386 process_vm_readv sys_process_vm_readv __ia32_compat_sys_process_vm_readv
+348 i386 process_vm_writev sys_process_vm_writev __ia32_compat_sys_process_vm_writev
+349 i386 kcmp sys_kcmp __ia32_sys_kcmp
+350 i386 finit_module sys_finit_module __ia32_sys_finit_module
+351 i386 sched_setattr sys_sched_setattr __ia32_sys_sched_setattr
+352 i386 sched_getattr sys_sched_getattr __ia32_sys_sched_getattr
+353 i386 renameat2 sys_renameat2 __ia32_sys_renameat2
+354 i386 seccomp sys_seccomp __ia32_sys_seccomp
+355 i386 getrandom sys_getrandom __ia32_sys_getrandom
+356 i386 memfd_create sys_memfd_create __ia32_sys_memfd_create
+357 i386 bpf sys_bpf __ia32_sys_bpf
+358 i386 execveat sys_execveat __ia32_compat_sys_execveat
+359 i386 socket sys_socket __ia32_sys_socket
+360 i386 socketpair sys_socketpair __ia32_sys_socketpair
+361 i386 bind sys_bind __ia32_sys_bind
+362 i386 connect sys_connect __ia32_sys_connect
+363 i386 listen sys_listen __ia32_sys_listen
+364 i386 accept4 sys_accept4 __ia32_sys_accept4
+365 i386 getsockopt sys_getsockopt __ia32_compat_sys_getsockopt
+366 i386 setsockopt sys_setsockopt __ia32_compat_sys_setsockopt
+367 i386 getsockname sys_getsockname __ia32_sys_getsockname
+368 i386 getpeername sys_getpeername __ia32_sys_getpeername
+369 i386 sendto sys_sendto __ia32_sys_sendto
+370 i386 sendmsg sys_sendmsg __ia32_compat_sys_sendmsg
+371 i386 recvfrom sys_recvfrom __ia32_compat_sys_recvfrom
+372 i386 recvmsg sys_recvmsg __ia32_compat_sys_recvmsg
+373 i386 shutdown sys_shutdown __ia32_sys_shutdown
+374 i386 userfaultfd sys_userfaultfd __ia32_sys_userfaultfd
+375 i386 membarrier sys_membarrier __ia32_sys_membarrier
+376 i386 mlock2 sys_mlock2 __ia32_sys_mlock2
+377 i386 copy_file_range sys_copy_file_range __ia32_sys_copy_file_range
+378 i386 preadv2 sys_preadv2 __ia32_compat_sys_preadv2
+379 i386 pwritev2 sys_pwritev2 __ia32_compat_sys_pwritev2
+380 i386 pkey_mprotect sys_pkey_mprotect __ia32_sys_pkey_mprotect
+381 i386 pkey_alloc sys_pkey_alloc __ia32_sys_pkey_alloc
+382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
+383 i386 statx sys_statx __ia32_sys_statx
+384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5aef183e2f85..4dfe42666d0c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -4,379 +4,383 @@
# The format is:
# <number> <abi> <name> <entry point>
#
+# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
+#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read sys_read
-1 common write sys_write
-2 common open sys_open
-3 common close sys_close
-4 common stat sys_newstat
-5 common fstat sys_newfstat
-6 common lstat sys_newlstat
-7 common poll sys_poll
-8 common lseek sys_lseek
-9 common mmap sys_mmap
-10 common mprotect sys_mprotect
-11 common munmap sys_munmap
-12 common brk sys_brk
-13 64 rt_sigaction sys_rt_sigaction
-14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn sys_rt_sigreturn/ptregs
-16 64 ioctl sys_ioctl
-17 common pread64 sys_pread64
-18 common pwrite64 sys_pwrite64
-19 64 readv sys_readv
-20 64 writev sys_writev
-21 common access sys_access
-22 common pipe sys_pipe
-23 common select sys_select
-24 common sched_yield sys_sched_yield
-25 common mremap sys_mremap
-26 common msync sys_msync
-27 common mincore sys_mincore
-28 common madvise sys_madvise
-29 common shmget sys_shmget
-30 common shmat sys_shmat
-31 common shmctl sys_shmctl
-32 common dup sys_dup
-33 common dup2 sys_dup2
-34 common pause sys_pause
-35 common nanosleep sys_nanosleep
-36 common getitimer sys_getitimer
-37 common alarm sys_alarm
-38 common setitimer sys_setitimer
-39 common getpid sys_getpid
-40 common sendfile sys_sendfile64
-41 common socket sys_socket
-42 common connect sys_connect
-43 common accept sys_accept
-44 common sendto sys_sendto
-45 64 recvfrom sys_recvfrom
-46 64 sendmsg sys_sendmsg
-47 64 recvmsg sys_recvmsg
-48 common shutdown sys_shutdown
-49 common bind sys_bind
-50 common listen sys_listen
-51 common getsockname sys_getsockname
-52 common getpeername sys_getpeername
-53 common socketpair sys_socketpair
-54 64 setsockopt sys_setsockopt
-55 64 getsockopt sys_getsockopt
-56 common clone sys_clone/ptregs
-57 common fork sys_fork/ptregs
-58 common vfork sys_vfork/ptregs
-59 64 execve sys_execve/ptregs
-60 common exit sys_exit
-61 common wait4 sys_wait4
-62 common kill sys_kill
-63 common uname sys_newuname
-64 common semget sys_semget
-65 common semop sys_semop
-66 common semctl sys_semctl
-67 common shmdt sys_shmdt
-68 common msgget sys_msgget
-69 common msgsnd sys_msgsnd
-70 common msgrcv sys_msgrcv
-71 common msgctl sys_msgctl
-72 common fcntl sys_fcntl
-73 common flock sys_flock
-74 common fsync sys_fsync
-75 common fdatasync sys_fdatasync
-76 common truncate sys_truncate
-77 common ftruncate sys_ftruncate
-78 common getdents sys_getdents
-79 common getcwd sys_getcwd
-80 common chdir sys_chdir
-81 common fchdir sys_fchdir
-82 common rename sys_rename
-83 common mkdir sys_mkdir
-84 common rmdir sys_rmdir
-85 common creat sys_creat
-86 common link sys_link
-87 common unlink sys_unlink
-88 common symlink sys_symlink
-89 common readlink sys_readlink
-90 common chmod sys_chmod
-91 common fchmod sys_fchmod
-92 common chown sys_chown
-93 common fchown sys_fchown
-94 common lchown sys_lchown
-95 common umask sys_umask
-96 common gettimeofday sys_gettimeofday
-97 common getrlimit sys_getrlimit
-98 common getrusage sys_getrusage
-99 common sysinfo sys_sysinfo
-100 common times sys_times
-101 64 ptrace sys_ptrace
-102 common getuid sys_getuid
-103 common syslog sys_syslog
-104 common getgid sys_getgid
-105 common setuid sys_setuid
-106 common setgid sys_setgid
-107 common geteuid sys_geteuid
-108 common getegid sys_getegid
-109 common setpgid sys_setpgid
-110 common getppid sys_getppid
-111 common getpgrp sys_getpgrp
-112 common setsid sys_setsid
-113 common setreuid sys_setreuid
-114 common setregid sys_setregid
-115 common getgroups sys_getgroups
-116 common setgroups sys_setgroups
-117 common setresuid sys_setresuid
-118 common getresuid sys_getresuid
-119 common setresgid sys_setresgid
-120 common getresgid sys_getresgid
-121 common getpgid sys_getpgid
-122 common setfsuid sys_setfsuid
-123 common setfsgid sys_setfsgid
-124 common getsid sys_getsid
-125 common capget sys_capget
-126 common capset sys_capset
-127 64 rt_sigpending sys_rt_sigpending
-128 64 rt_sigtimedwait sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
-130 common rt_sigsuspend sys_rt_sigsuspend
-131 64 sigaltstack sys_sigaltstack
-132 common utime sys_utime
-133 common mknod sys_mknod
+0 common read __x64_sys_read
+1 common write __x64_sys_write
+2 common open __x64_sys_open
+3 common close __x64_sys_close
+4 common stat __x64_sys_newstat
+5 common fstat __x64_sys_newfstat
+6 common lstat __x64_sys_newlstat
+7 common poll __x64_sys_poll
+8 common lseek __x64_sys_lseek
+9 common mmap __x64_sys_mmap
+10 common mprotect __x64_sys_mprotect
+11 common munmap __x64_sys_munmap
+12 common brk __x64_sys_brk
+13 64 rt_sigaction __x64_sys_rt_sigaction
+14 common rt_sigprocmask __x64_sys_rt_sigprocmask
+15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
+16 64 ioctl __x64_sys_ioctl
+17 common pread64 __x64_sys_pread64
+18 common pwrite64 __x64_sys_pwrite64
+19 64 readv __x64_sys_readv
+20 64 writev __x64_sys_writev
+21 common access __x64_sys_access
+22 common pipe __x64_sys_pipe
+23 common select __x64_sys_select
+24 common sched_yield __x64_sys_sched_yield
+25 common mremap __x64_sys_mremap
+26 common msync __x64_sys_msync
+27 common mincore __x64_sys_mincore
+28 common madvise __x64_sys_madvise
+29 common shmget __x64_sys_shmget
+30 common shmat __x64_sys_shmat
+31 common shmctl __x64_sys_shmctl
+32 common dup __x64_sys_dup
+33 common dup2 __x64_sys_dup2
+34 common pause __x64_sys_pause
+35 common nanosleep __x64_sys_nanosleep
+36 common getitimer __x64_sys_getitimer
+37 common alarm __x64_sys_alarm
+38 common setitimer __x64_sys_setitimer
+39 common getpid __x64_sys_getpid
+40 common sendfile __x64_sys_sendfile64
+41 common socket __x64_sys_socket
+42 common connect __x64_sys_connect
+43 common accept __x64_sys_accept
+44 common sendto __x64_sys_sendto
+45 64 recvfrom __x64_sys_recvfrom
+46 64 sendmsg __x64_sys_sendmsg
+47 64 recvmsg __x64_sys_recvmsg
+48 common shutdown __x64_sys_shutdown
+49 common bind __x64_sys_bind
+50 common listen __x64_sys_listen
+51 common getsockname __x64_sys_getsockname
+52 common getpeername __x64_sys_getpeername
+53 common socketpair __x64_sys_socketpair
+54 64 setsockopt __x64_sys_setsockopt
+55 64 getsockopt __x64_sys_getsockopt
+56 common clone __x64_sys_clone/ptregs
+57 common fork __x64_sys_fork/ptregs
+58 common vfork __x64_sys_vfork/ptregs
+59 64 execve __x64_sys_execve/ptregs
+60 common exit __x64_sys_exit
+61 common wait4 __x64_sys_wait4
+62 common kill __x64_sys_kill
+63 common uname __x64_sys_newuname
+64 common semget __x64_sys_semget
+65 common semop __x64_sys_semop
+66 common semctl __x64_sys_semctl
+67 common shmdt __x64_sys_shmdt
+68 common msgget __x64_sys_msgget
+69 common msgsnd __x64_sys_msgsnd
+70 common msgrcv __x64_sys_msgrcv
+71 common msgctl __x64_sys_msgctl
+72 common fcntl __x64_sys_fcntl
+73 common flock __x64_sys_flock
+74 common fsync __x64_sys_fsync
+75 common fdatasync __x64_sys_fdatasync
+76 common truncate __x64_sys_truncate
+77 common ftruncate __x64_sys_ftruncate
+78 common getdents __x64_sys_getdents
+79 common getcwd __x64_sys_getcwd
+80 common chdir __x64_sys_chdir
+81 common fchdir __x64_sys_fchdir
+82 common rename __x64_sys_rename
+83 common mkdir __x64_sys_mkdir
+84 common rmdir __x64_sys_rmdir
+85 common creat __x64_sys_creat
+86 common link __x64_sys_link
+87 common unlink __x64_sys_unlink
+88 common symlink __x64_sys_symlink
+89 common readlink __x64_sys_readlink
+90 common chmod __x64_sys_chmod
+91 common fchmod __x64_sys_fchmod
+92 common chown __x64_sys_chown
+93 common fchown __x64_sys_fchown
+94 common lchown __x64_sys_lchown
+95 common umask __x64_sys_umask
+96 common gettimeofday __x64_sys_gettimeofday
+97 common getrlimit __x64_sys_getrlimit
+98 common getrusage __x64_sys_getrusage
+99 common sysinfo __x64_sys_sysinfo
+100 common times __x64_sys_times
+101 64 ptrace __x64_sys_ptrace
+102 common getuid __x64_sys_getuid
+103 common syslog __x64_sys_syslog
+104 common getgid __x64_sys_getgid
+105 common setuid __x64_sys_setuid
+106 common setgid __x64_sys_setgid
+107 common geteuid __x64_sys_geteuid
+108 common getegid __x64_sys_getegid
+109 common setpgid __x64_sys_setpgid
+110 common getppid __x64_sys_getppid
+111 common getpgrp __x64_sys_getpgrp
+112 common setsid __x64_sys_setsid
+113 common setreuid __x64_sys_setreuid
+114 common setregid __x64_sys_setregid
+115 common getgroups __x64_sys_getgroups
+116 common setgroups __x64_sys_setgroups
+117 common setresuid __x64_sys_setresuid
+118 common getresuid __x64_sys_getresuid
+119 common setresgid __x64_sys_setresgid
+120 common getresgid __x64_sys_getresgid
+121 common getpgid __x64_sys_getpgid
+122 common setfsuid __x64_sys_setfsuid
+123 common setfsgid __x64_sys_setfsgid
+124 common getsid __x64_sys_getsid
+125 common capget __x64_sys_capget
+126 common capset __x64_sys_capset
+127 64 rt_sigpending __x64_sys_rt_sigpending
+128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
+130 common rt_sigsuspend __x64_sys_rt_sigsuspend
+131 64 sigaltstack __x64_sys_sigaltstack
+132 common utime __x64_sys_utime
+133 common mknod __x64_sys_mknod
134 64 uselib
-135 common personality sys_personality
-136 common ustat sys_ustat
-137 common statfs sys_statfs
-138 common fstatfs sys_fstatfs
-139 common sysfs sys_sysfs
-140 common getpriority sys_getpriority
-141 common setpriority sys_setpriority
-142 common sched_setparam sys_sched_setparam
-143 common sched_getparam sys_sched_getparam
-144 common sched_setscheduler sys_sched_setscheduler
-145 common sched_getscheduler sys_sched_getscheduler
-146 common sched_get_priority_max sys_sched_get_priority_max
-147 common sched_get_priority_min sys_sched_get_priority_min
-148 common sched_rr_get_interval sys_sched_rr_get_interval
-149 common mlock sys_mlock
-150 common munlock sys_munlock
-151 common mlockall sys_mlockall
-152 common munlockall sys_munlockall
-153 common vhangup sys_vhangup
-154 common modify_ldt sys_modify_ldt
-155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
-157 common prctl sys_prctl
-158 common arch_prctl sys_arch_prctl
-159 common adjtimex sys_adjtimex
-160 common setrlimit sys_setrlimit
-161 common chroot sys_chroot
-162 common sync sys_sync
-163 common acct sys_acct
-164 common settimeofday sys_settimeofday
-165 common mount sys_mount
-166 common umount2 sys_umount
-167 common swapon sys_swapon
-168 common swapoff sys_swapoff
-169 common reboot sys_reboot
-170 common sethostname sys_sethostname
-171 common setdomainname sys_setdomainname
-172 common iopl sys_iopl/ptregs
-173 common ioperm sys_ioperm
+135 common personality __x64_sys_personality
+136 common ustat __x64_sys_ustat
+137 common statfs __x64_sys_statfs
+138 common fstatfs __x64_sys_fstatfs
+139 common sysfs __x64_sys_sysfs
+140 common getpriority __x64_sys_getpriority
+141 common setpriority __x64_sys_setpriority
+142 common sched_setparam __x64_sys_sched_setparam
+143 common sched_getparam __x64_sys_sched_getparam
+144 common sched_setscheduler __x64_sys_sched_setscheduler
+145 common sched_getscheduler __x64_sys_sched_getscheduler
+146 common sched_get_priority_max __x64_sys_sched_get_priority_max
+147 common sched_get_priority_min __x64_sys_sched_get_priority_min
+148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
+149 common mlock __x64_sys_mlock
+150 common munlock __x64_sys_munlock
+151 common mlockall __x64_sys_mlockall
+152 common munlockall __x64_sys_munlockall
+153 common vhangup __x64_sys_vhangup
+154 common modify_ldt __x64_sys_modify_ldt
+155 common pivot_root __x64_sys_pivot_root
+156 64 _sysctl __x64_sys_sysctl
+157 common prctl __x64_sys_prctl
+158 common arch_prctl __x64_sys_arch_prctl
+159 common adjtimex __x64_sys_adjtimex
+160 common setrlimit __x64_sys_setrlimit
+161 common chroot __x64_sys_chroot
+162 common sync __x64_sys_sync
+163 common acct __x64_sys_acct
+164 common settimeofday __x64_sys_settimeofday
+165 common mount __x64_sys_mount
+166 common umount2 __x64_sys_umount
+167 common swapon __x64_sys_swapon
+168 common swapoff __x64_sys_swapoff
+169 common reboot __x64_sys_reboot
+170 common sethostname __x64_sys_sethostname
+171 common setdomainname __x64_sys_setdomainname
+172 common iopl __x64_sys_iopl/ptregs
+173 common ioperm __x64_sys_ioperm
174 64 create_module
-175 common init_module sys_init_module
-176 common delete_module sys_delete_module
+175 common init_module __x64_sys_init_module
+176 common delete_module __x64_sys_delete_module
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl sys_quotactl
+179 common quotactl __x64_sys_quotactl
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid sys_gettid
-187 common readahead sys_readahead
-188 common setxattr sys_setxattr
-189 common lsetxattr sys_lsetxattr
-190 common fsetxattr sys_fsetxattr
-191 common getxattr sys_getxattr
-192 common lgetxattr sys_lgetxattr
-193 common fgetxattr sys_fgetxattr
-194 common listxattr sys_listxattr
-195 common llistxattr sys_llistxattr
-196 common flistxattr sys_flistxattr
-197 common removexattr sys_removexattr
-198 common lremovexattr sys_lremovexattr
-199 common fremovexattr sys_fremovexattr
-200 common tkill sys_tkill
-201 common time sys_time
-202 common futex sys_futex
-203 common sched_setaffinity sys_sched_setaffinity
-204 common sched_getaffinity sys_sched_getaffinity
+186 common gettid __x64_sys_gettid
+187 common readahead __x64_sys_readahead
+188 common setxattr __x64_sys_setxattr
+189 common lsetxattr __x64_sys_lsetxattr
+190 common fsetxattr __x64_sys_fsetxattr
+191 common getxattr __x64_sys_getxattr
+192 common lgetxattr __x64_sys_lgetxattr
+193 common fgetxattr __x64_sys_fgetxattr
+194 common listxattr __x64_sys_listxattr
+195 common llistxattr __x64_sys_llistxattr
+196 common flistxattr __x64_sys_flistxattr
+197 common removexattr __x64_sys_removexattr
+198 common lremovexattr __x64_sys_lremovexattr
+199 common fremovexattr __x64_sys_fremovexattr
+200 common tkill __x64_sys_tkill
+201 common time __x64_sys_time
+202 common futex __x64_sys_futex
+203 common sched_setaffinity __x64_sys_sched_setaffinity
+204 common sched_getaffinity __x64_sys_sched_getaffinity
205 64 set_thread_area
-206 64 io_setup sys_io_setup
-207 common io_destroy sys_io_destroy
-208 common io_getevents sys_io_getevents
-209 64 io_submit sys_io_submit
-210 common io_cancel sys_io_cancel
+206 64 io_setup __x64_sys_io_setup
+207 common io_destroy __x64_sys_io_destroy
+208 common io_getevents __x64_sys_io_getevents
+209 64 io_submit __x64_sys_io_submit
+210 common io_cancel __x64_sys_io_cancel
211 64 get_thread_area
-212 common lookup_dcookie sys_lookup_dcookie
-213 common epoll_create sys_epoll_create
+212 common lookup_dcookie __x64_sys_lookup_dcookie
+213 common epoll_create __x64_sys_epoll_create
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages sys_remap_file_pages
-217 common getdents64 sys_getdents64
-218 common set_tid_address sys_set_tid_address
-219 common restart_syscall sys_restart_syscall
-220 common semtimedop sys_semtimedop
-221 common fadvise64 sys_fadvise64
-222 64 timer_create sys_timer_create
-223 common timer_settime sys_timer_settime
-224 common timer_gettime sys_timer_gettime
-225 common timer_getoverrun sys_timer_getoverrun
-226 common timer_delete sys_timer_delete
-227 common clock_settime sys_clock_settime
-228 common clock_gettime sys_clock_gettime
-229 common clock_getres sys_clock_getres
-230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
-232 common epoll_wait sys_epoll_wait
-233 common epoll_ctl sys_epoll_ctl
-234 common tgkill sys_tgkill
-235 common utimes sys_utimes
+216 common remap_file_pages __x64_sys_remap_file_pages
+217 common getdents64 __x64_sys_getdents64
+218 common set_tid_address __x64_sys_set_tid_address
+219 common restart_syscall __x64_sys_restart_syscall
+220 common semtimedop __x64_sys_semtimedop
+221 common fadvise64 __x64_sys_fadvise64
+222 64 timer_create __x64_sys_timer_create
+223 common timer_settime __x64_sys_timer_settime
+224 common timer_gettime __x64_sys_timer_gettime
+225 common timer_getoverrun __x64_sys_timer_getoverrun
+226 common timer_delete __x64_sys_timer_delete
+227 common clock_settime __x64_sys_clock_settime
+228 common clock_gettime __x64_sys_clock_gettime
+229 common clock_getres __x64_sys_clock_getres
+230 common clock_nanosleep __x64_sys_clock_nanosleep
+231 common exit_group __x64_sys_exit_group
+232 common epoll_wait __x64_sys_epoll_wait
+233 common epoll_ctl __x64_sys_epoll_ctl
+234 common tgkill __x64_sys_tgkill
+235 common utimes __x64_sys_utimes
236 64 vserver
-237 common mbind sys_mbind
-238 common set_mempolicy sys_set_mempolicy
-239 common get_mempolicy sys_get_mempolicy
-240 common mq_open sys_mq_open
-241 common mq_unlink sys_mq_unlink
-242 common mq_timedsend sys_mq_timedsend
-243 common mq_timedreceive sys_mq_timedreceive
-244 64 mq_notify sys_mq_notify
-245 common mq_getsetattr sys_mq_getsetattr
-246 64 kexec_load sys_kexec_load
-247 64 waitid sys_waitid
-248 common add_key sys_add_key
-249 common request_key sys_request_key
-250 common keyctl sys_keyctl
-251 common ioprio_set sys_ioprio_set
-252 common ioprio_get sys_ioprio_get
-253 common inotify_init sys_inotify_init
-254 common inotify_add_watch sys_inotify_add_watch
-255 common inotify_rm_watch sys_inotify_rm_watch
-256 common migrate_pages sys_migrate_pages
-257 common openat sys_openat
-258 common mkdirat sys_mkdirat
-259 common mknodat sys_mknodat
-260 common fchownat sys_fchownat
-261 common futimesat sys_futimesat
-262 common newfstatat sys_newfstatat
-263 common unlinkat sys_unlinkat
-264 common renameat sys_renameat
-265 common linkat sys_linkat
-266 common symlinkat sys_symlinkat
-267 common readlinkat sys_readlinkat
-268 common fchmodat sys_fchmodat
-269 common faccessat sys_faccessat
-270 common pselect6 sys_pselect6
-271 common ppoll sys_ppoll
-272 common unshare sys_unshare
-273 64 set_robust_list sys_set_robust_list
-274 64 get_robust_list sys_get_robust_list
-275 common splice sys_splice
-276 common tee sys_tee
-277 common sync_file_range sys_sync_file_range
-278 64 vmsplice sys_vmsplice
-279 64 move_pages sys_move_pages
-280 common utimensat sys_utimensat
-281 common epoll_pwait sys_epoll_pwait
-282 common signalfd sys_signalfd
-283 common timerfd_create sys_timerfd_create
-284 common eventfd sys_eventfd
-285 common fallocate sys_fallocate
-286 common timerfd_settime sys_timerfd_settime
-287 common timerfd_gettime sys_timerfd_gettime
-288 common accept4 sys_accept4
-289 common signalfd4 sys_signalfd4
-290 common eventfd2 sys_eventfd2
-291 common epoll_create1 sys_epoll_create1
-292 common dup3 sys_dup3
-293 common pipe2 sys_pipe2
-294 common inotify_init1 sys_inotify_init1
-295 64 preadv sys_preadv
-296 64 pwritev sys_pwritev
-297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
-298 common perf_event_open sys_perf_event_open
-299 64 recvmmsg sys_recvmmsg
-300 common fanotify_init sys_fanotify_init
-301 common fanotify_mark sys_fanotify_mark
-302 common prlimit64 sys_prlimit64
-303 common name_to_handle_at sys_name_to_handle_at
-304 common open_by_handle_at sys_open_by_handle_at
-305 common clock_adjtime sys_clock_adjtime
-306 common syncfs sys_syncfs
-307 64 sendmmsg sys_sendmmsg
-308 common setns sys_setns
-309 common getcpu sys_getcpu
-310 64 process_vm_readv sys_process_vm_readv
-311 64 process_vm_writev sys_process_vm_writev
-312 common kcmp sys_kcmp
-313 common finit_module sys_finit_module
-314 common sched_setattr sys_sched_setattr
-315 common sched_getattr sys_sched_getattr
-316 common renameat2 sys_renameat2
-317 common seccomp sys_seccomp
-318 common getrandom sys_getrandom
-319 common memfd_create sys_memfd_create
-320 common kexec_file_load sys_kexec_file_load
-321 common bpf sys_bpf
-322 64 execveat sys_execveat/ptregs
-323 common userfaultfd sys_userfaultfd
-324 common membarrier sys_membarrier
-325 common mlock2 sys_mlock2
-326 common copy_file_range sys_copy_file_range
-327 64 preadv2 sys_preadv2
-328 64 pwritev2 sys_pwritev2
-329 common pkey_mprotect sys_pkey_mprotect
-330 common pkey_alloc sys_pkey_alloc
-331 common pkey_free sys_pkey_free
-332 common statx sys_statx
+237 common mbind __x64_sys_mbind
+238 common set_mempolicy __x64_sys_set_mempolicy
+239 common get_mempolicy __x64_sys_get_mempolicy
+240 common mq_open __x64_sys_mq_open
+241 common mq_unlink __x64_sys_mq_unlink
+242 common mq_timedsend __x64_sys_mq_timedsend
+243 common mq_timedreceive __x64_sys_mq_timedreceive
+244 64 mq_notify __x64_sys_mq_notify
+245 common mq_getsetattr __x64_sys_mq_getsetattr
+246 64 kexec_load __x64_sys_kexec_load
+247 64 waitid __x64_sys_waitid
+248 common add_key __x64_sys_add_key
+249 common request_key __x64_sys_request_key
+250 common keyctl __x64_sys_keyctl
+251 common ioprio_set __x64_sys_ioprio_set
+252 common ioprio_get __x64_sys_ioprio_get
+253 common inotify_init __x64_sys_inotify_init
+254 common inotify_add_watch __x64_sys_inotify_add_watch
+255 common inotify_rm_watch __x64_sys_inotify_rm_watch
+256 common migrate_pages __x64_sys_migrate_pages
+257 common openat __x64_sys_openat
+258 common mkdirat __x64_sys_mkdirat
+259 common mknodat __x64_sys_mknodat
+260 common fchownat __x64_sys_fchownat
+261 common futimesat __x64_sys_futimesat
+262 common newfstatat __x64_sys_newfstatat
+263 common unlinkat __x64_sys_unlinkat
+264 common renameat __x64_sys_renameat
+265 common linkat __x64_sys_linkat
+266 common symlinkat __x64_sys_symlinkat
+267 common readlinkat __x64_sys_readlinkat
+268 common fchmodat __x64_sys_fchmodat
+269 common faccessat __x64_sys_faccessat
+270 common pselect6 __x64_sys_pselect6
+271 common ppoll __x64_sys_ppoll
+272 common unshare __x64_sys_unshare
+273 64 set_robust_list __x64_sys_set_robust_list
+274 64 get_robust_list __x64_sys_get_robust_list
+275 common splice __x64_sys_splice
+276 common tee __x64_sys_tee
+277 common sync_file_range __x64_sys_sync_file_range
+278 64 vmsplice __x64_sys_vmsplice
+279 64 move_pages __x64_sys_move_pages
+280 common utimensat __x64_sys_utimensat
+281 common epoll_pwait __x64_sys_epoll_pwait
+282 common signalfd __x64_sys_signalfd
+283 common timerfd_create __x64_sys_timerfd_create
+284 common eventfd __x64_sys_eventfd
+285 common fallocate __x64_sys_fallocate
+286 common timerfd_settime __x64_sys_timerfd_settime
+287 common timerfd_gettime __x64_sys_timerfd_gettime
+288 common accept4 __x64_sys_accept4
+289 common signalfd4 __x64_sys_signalfd4
+290 common eventfd2 __x64_sys_eventfd2
+291 common epoll_create1 __x64_sys_epoll_create1
+292 common dup3 __x64_sys_dup3
+293 common pipe2 __x64_sys_pipe2
+294 common inotify_init1 __x64_sys_inotify_init1
+295 64 preadv __x64_sys_preadv
+296 64 pwritev __x64_sys_pwritev
+297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
+298 common perf_event_open __x64_sys_perf_event_open
+299 64 recvmmsg __x64_sys_recvmmsg
+300 common fanotify_init __x64_sys_fanotify_init
+301 common fanotify_mark __x64_sys_fanotify_mark
+302 common prlimit64 __x64_sys_prlimit64
+303 common name_to_handle_at __x64_sys_name_to_handle_at
+304 common open_by_handle_at __x64_sys_open_by_handle_at
+305 common clock_adjtime __x64_sys_clock_adjtime
+306 common syncfs __x64_sys_syncfs
+307 64 sendmmsg __x64_sys_sendmmsg
+308 common setns __x64_sys_setns
+309 common getcpu __x64_sys_getcpu
+310 64 process_vm_readv __x64_sys_process_vm_readv
+311 64 process_vm_writev __x64_sys_process_vm_writev
+312 common kcmp __x64_sys_kcmp
+313 common finit_module __x64_sys_finit_module
+314 common sched_setattr __x64_sys_sched_setattr
+315 common sched_getattr __x64_sys_sched_getattr
+316 common renameat2 __x64_sys_renameat2
+317 common seccomp __x64_sys_seccomp
+318 common getrandom __x64_sys_getrandom
+319 common memfd_create __x64_sys_memfd_create
+320 common kexec_file_load __x64_sys_kexec_file_load
+321 common bpf __x64_sys_bpf
+322 64 execveat __x64_sys_execveat/ptregs
+323 common userfaultfd __x64_sys_userfaultfd
+324 common membarrier __x64_sys_membarrier
+325 common mlock2 __x64_sys_mlock2
+326 common copy_file_range __x64_sys_copy_file_range
+327 64 preadv2 __x64_sys_preadv2
+328 64 pwritev2 __x64_sys_pwritev2
+329 common pkey_mprotect __x64_sys_pkey_mprotect
+330 common pkey_alloc __x64_sys_pkey_alloc
+331 common pkey_free __x64_sys_pkey_free
+332 common statx __x64_sys_statx
#
# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation.
+# for native 64-bit operation. The __x32_compat_sys stubs are created
+# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
+# is defined.
#
-512 x32 rt_sigaction compat_sys_rt_sigaction
+512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
-517 x32 recvfrom compat_sys_recvfrom
-518 x32 sendmsg compat_sys_sendmsg
-519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve compat_sys_execve/ptregs
-521 x32 ptrace compat_sys_ptrace
-522 x32 rt_sigpending compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
-524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack compat_sys_sigaltstack
-526 x32 timer_create compat_sys_timer_create
-527 x32 mq_notify compat_sys_mq_notify
-528 x32 kexec_load compat_sys_kexec_load
-529 x32 waitid compat_sys_waitid
-530 x32 set_robust_list compat_sys_set_robust_list
-531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
-533 x32 move_pages compat_sys_move_pages
-534 x32 preadv compat_sys_preadv64
-535 x32 pwritev compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg compat_sys_recvmmsg
-538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
-541 x32 setsockopt compat_sys_setsockopt
-542 x32 getsockopt compat_sys_getsockopt
-543 x32 io_setup compat_sys_io_setup
-544 x32 io_submit compat_sys_io_submit
-545 x32 execveat compat_sys_execveat/ptregs
-546 x32 preadv2 compat_sys_preadv64v2
-547 x32 pwritev2 compat_sys_pwritev64v2
+514 x32 ioctl __x32_compat_sys_ioctl
+515 x32 readv __x32_compat_sys_readv
+516 x32 writev __x32_compat_sys_writev
+517 x32 recvfrom __x32_compat_sys_recvfrom
+518 x32 sendmsg __x32_compat_sys_sendmsg
+519 x32 recvmsg __x32_compat_sys_recvmsg
+520 x32 execve __x32_compat_sys_execve/ptregs
+521 x32 ptrace __x32_compat_sys_ptrace
+522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait
+524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack __x32_compat_sys_sigaltstack
+526 x32 timer_create __x32_compat_sys_timer_create
+527 x32 mq_notify __x32_compat_sys_mq_notify
+528 x32 kexec_load __x32_compat_sys_kexec_load
+529 x32 waitid __x32_compat_sys_waitid
+530 x32 set_robust_list __x32_compat_sys_set_robust_list
+531 x32 get_robust_list __x32_compat_sys_get_robust_list
+532 x32 vmsplice __x32_compat_sys_vmsplice
+533 x32 move_pages __x32_compat_sys_move_pages
+534 x32 preadv __x32_compat_sys_preadv64
+535 x32 pwritev __x32_compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg __x32_compat_sys_recvmmsg
+538 x32 sendmmsg __x32_compat_sys_sendmmsg
+539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
+540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
+541 x32 setsockopt __x32_compat_sys_setsockopt
+542 x32 getsockopt __x32_compat_sys_getsockopt
+543 x32 io_setup __x32_compat_sys_io_setup
+544 x32 io_submit __x32_compat_sys_io_submit
+545 x32 execveat __x32_compat_sys_execveat/ptregs
+546 x32 preadv2 __x32_compat_sys_preadv64v2
+547 x32 pwritev2 __x32_compat_sys_pwritev64v2
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index d71ef4bd3615..94fcd1951aca 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -25,15 +25,27 @@ emit() {
nr="$2"
entry="$3"
compat="$4"
+ umlentry=""
if [ "$abi" = "64" -a -n "$compat" ]; then
echo "a compat entry for a 64-bit syscall makes no sense" >&2
exit 1
fi
+ # For CONFIG_UML, we need to strip the __x64_sys prefix
+ if [ "$abi" = "64" -a "${entry}" != "${entry#__x64_sys}" ]; then
+ umlentry="sys${entry#__x64_sys}"
+ fi
+
if [ -z "$compat" ]; then
- if [ -n "$entry" ]; then
+ if [ -n "$entry" -a -z "$umlentry" ]; then
syscall_macro "$abi" "$nr" "$entry"
+ elif [ -n "$umlentry" ]; then # implies -n "$entry"
+ echo "#ifdef CONFIG_X86"
+ syscall_macro "$abi" "$nr" "$entry"
+ echo "#else /* CONFIG_UML */"
+ syscall_macro "$abi" "$nr" "$umlentry"
+ echo "#endif"
fi
else
echo "#ifdef CONFIG_X86_32"
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 1943aebadede..d998a487c9b1 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -42,9 +42,7 @@ vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles)
-targets += $(vdso_img_sodbg)
-.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
- $(vdso_img-y:%=$(obj)/vdso%.so)
+targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
export CPPFLAGS_vdso.lds += -P -C
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 317be365bce3..70b7845434cb 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -127,6 +127,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
int vsyscall_nr, syscall_nr, tmp;
int prev_sig_on_uaccess_err;
long ret;
+ unsigned long orig_dx;
/*
* No point in checking CS -- the only way to get here is a user mode
@@ -227,19 +228,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
- ret = sys_gettimeofday(
- (struct timeval __user *)regs->di,
- (struct timezone __user *)regs->si);
+ /* this decodes regs->di and regs->si on its own */
+ ret = __x64_sys_gettimeofday(regs);
break;
case 1:
- ret = sys_time((time_t __user *)regs->di);
+ /* this decodes regs->di on its own */
+ ret = __x64_sys_time(regs);
break;
case 2:
- ret = sys_getcpu((unsigned __user *)regs->di,
- (unsigned __user *)regs->si,
- NULL);
+ /* while we could clobber regs->dx, we didn't in the past... */
+ orig_dx = regs->dx;
+ regs->dx = 0;
+ /* this decodes regs->di, regs->si and regs->dx on its own */
+ ret = __x64_sys_getcpu(regs);
+ regs->dx = orig_dx;
break;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index da6780122786..8a10a045b57b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1153,7 +1153,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
if (pebs == NULL)
return;
- regs->flags &= ~PERF_EFLAGS_EXACT;
sample_type = event->attr.sample_type;
dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
@@ -1197,7 +1196,13 @@ static void setup_pebs_sample_data(struct perf_event *event,
* and PMI.
*/
*regs = *iregs;
- regs->flags = pebs->flags;
+
+ /*
+ * Initialize regs_>flags from PEBS,
+ * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
+ * i.e., do not rely on it being zero:
+ */
+ regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs->ax = pebs->ax;
@@ -1217,10 +1222,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
regs->sp = pebs->sp;
}
- /*
- * Preserve PERF_EFLAGS_VM from set_linear_ip().
- */
- regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
#ifndef CONFIG_X86_32
regs->r8 = pebs->r8;
regs->r9 = pebs->r9;
@@ -1234,20 +1235,33 @@ static void setup_pebs_sample_data(struct perf_event *event,
}
if (event->attr.precise_ip > 1) {
- /* Haswell and later have the eventing IP, so use it: */
+ /*
+ * Haswell and later processors have an 'eventing IP'
+ * (real IP) which fixes the off-by-1 skid in hardware.
+ * Use it when precise_ip >= 2 :
+ */
if (x86_pmu.intel_cap.pebs_format >= 2) {
set_linear_ip(regs, pebs->real_ip);
regs->flags |= PERF_EFLAGS_EXACT;
} else {
- /* Otherwise use PEBS off-by-1 IP: */
+ /* Otherwise, use PEBS off-by-1 IP: */
set_linear_ip(regs, pebs->ip);
- /* ... and try to fix it up using the LBR entries: */
+ /*
+ * With precise_ip >= 2, try to fix up the off-by-1 IP
+ * using the LBR. If successful, the fixup function
+ * corrects regs->ip and calls set_linear_ip() on regs:
+ */
if (intel_pmu_pebs_fixup_ip(regs))
regs->flags |= PERF_EFLAGS_EXACT;
}
- } else
+ } else {
+ /*
+ * When precise_ip == 1, return the PEBS off-by-1 IP,
+ * no fixup attempted:
+ */
set_linear_ip(regs, pebs->ip);
+ }
if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 40a3d3642f3a..08acd954f00e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -313,7 +313,7 @@ struct apic {
/* Probe, setup and smpboot functions */
int (*probe)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
- int (*apic_id_valid)(int apicid);
+ int (*apic_id_valid)(u32 apicid);
int (*apic_id_registered)(void);
bool (*check_apicid_used)(physid_mask_t *map, int apicid);
@@ -486,7 +486,7 @@ static inline unsigned int read_apic_id(void)
return apic->get_apic_id(reg);
}
-extern int default_apic_id_valid(int apicid);
+extern int default_apic_id_valid(u32 apicid);
extern int default_acpi_madt_oem_check(char *, char *);
extern void default_setup_apic_routing(void);
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
index 9f07cff43705..df89ee7d3e9e 100644
--- a/arch/x86/include/asm/kexec-bzimage64.h
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -2,6 +2,6 @@
#ifndef _ASM_KEXEC_BZIMAGE64_H
#define _ASM_KEXEC_BZIMAGE64_H
-extern struct kexec_file_ops kexec_bzImage64_ops;
+extern const struct kexec_file_ops kexec_bzImage64_ops;
#endif /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 89d5c8886c85..5f49b4ff0c24 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -526,22 +526,39 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
return protval;
}
+static inline pgprotval_t check_pgprot(pgprot_t pgprot)
+{
+ pgprotval_t massaged_val = massage_pgprot(pgprot);
+
+ /* mmdebug.h can not be included here because of dependencies */
+#ifdef CONFIG_DEBUG_VM
+ WARN_ONCE(pgprot_val(pgprot) != massaged_val,
+ "attempted to set unsupported pgprot: %016llx "
+ "bits: %016llx supported: %016llx\n",
+ (u64)pgprot_val(pgprot),
+ (u64)pgprot_val(pgprot) ^ massaged_val,
+ (u64)__supported_pte_mask);
+#endif
+
+ return massaged_val;
+}
+
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
- massage_pgprot(pgprot));
+ check_pgprot(pgprot));
}
static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
- massage_pgprot(pgprot));
+ check_pgprot(pgprot));
}
static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
{
return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
- massage_pgprot(pgprot));
+ check_pgprot(pgprot));
}
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -553,7 +570,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
* the newprot (if present):
*/
val &= _PAGE_CHG_MASK;
- val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
return __pte(val);
}
@@ -563,7 +580,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmdval_t val = pmd_val(pmd);
val &= _HPAGE_CHG_MASK;
- val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
return __pmd(val);
}
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index acfe755562a6..1e5a40673953 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -196,19 +196,21 @@ enum page_cache_mode {
#define __PAGE_KERNEL_NOENC (__PAGE_KERNEL)
#define __PAGE_KERNEL_NOENC_WP (__PAGE_KERNEL_WP)
-#define PAGE_KERNEL __pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC __pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC __pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
-
-#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define default_pgprot(x) __pgprot((x) & __default_kernel_pte_mask)
+
+#define PAGE_KERNEL default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
+#define PAGE_KERNEL_NOENC default_pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC default_pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
+#define PAGE_KERNEL_NOCACHE default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE_EXEC default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_VVAR default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+
+#define PAGE_KERNEL_IO default_pgprot(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
#endif /* __ASSEMBLY__ */
@@ -483,6 +485,7 @@ static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
+extern pteval_t __default_kernel_pte_mask;
extern void set_nx(void);
extern int nx_enabled;
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 0b5ef05b2d2d..38a17f1d5c9d 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,8 +6,10 @@
#ifdef CONFIG_PAGE_TABLE_ISOLATION
extern void pti_init(void);
extern void pti_check_boottime_disable(void);
+extern void pti_clone_kernel_text(void);
#else
static inline void pti_check_boottime_disable(void) { }
+static inline void pti_clone_kernel_text(void) { }
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 03eedc21246d..d653139857af 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -20,9 +20,13 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+#ifdef CONFIG_X86_64
+typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
+#else
typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
+#endif /* CONFIG_X86_64 */
extern const sys_call_ptr_t sys_call_table[];
#if defined(CONFIG_X86_32)
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..e046a405743d
--- /dev/null
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - x86 specific wrappers to syscall definitions
+ */
+
+#ifndef _ASM_X86_SYSCALL_WRAPPER_H
+#define _ASM_X86_SYSCALL_WRAPPER_H
+
+/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
+#define SC_X86_64_REGS_TO_ARGS(x, ...) \
+ __MAP(x,__SC_ARGS \
+ ,,regs->di,,regs->si,,regs->dx \
+ ,,regs->r10,,regs->r8,,regs->r9) \
+
+/* Mapping of registers to parameters for syscalls on i386 */
+#define SC_IA32_REGS_TO_ARGS(x, ...) \
+ __MAP(x,__SC_ARGS \
+ ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \
+ ,,(unsigned int)regs->dx,,(unsigned int)regs->si \
+ ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+
+#ifdef CONFIG_IA32_EMULATION
+/*
+ * For IA32 emulation, we need to handle "compat" syscalls *and* create
+ * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
+ * ia32 regs in the proper order for shared or "common" syscalls. As some
+ * syscalls may not be implemented, we need to expand COND_SYSCALL in
+ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
+ * case as well.
+ */
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
+ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
+ ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \
+ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
+ { \
+ return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+ } \
+
+#define __IA32_SYS_STUBx(x, name, ...) \
+ asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \
+ asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+ }
+
+/*
+ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
+ * named __ia32_sys_*()
+ */
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __x64_sys_##sname(void); \
+ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
+ SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
+ asmlinkage long __x64_sys_##sname(void)
+
+#define COND_SYSCALL(name) \
+ cond_syscall(__x64_sys_##name); \
+ cond_syscall(__ia32_sys_##name)
+
+#define SYS_NI(name) \
+ SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \
+ SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers)
+
+#else /* CONFIG_IA32_EMULATION */
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...)
+#define __IA32_SYS_STUBx(x, fullname, name, ...)
+#endif /* CONFIG_IA32_EMULATION */
+
+
+#ifdef CONFIG_X86_X32
+/*
+ * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware
+ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
+ * with x86_64 obviously do not need such care.
+ */
+#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
+ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
+ ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \
+ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
+ { \
+ return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
+ } \
+
+#else /* CONFIG_X86_X32 */
+#define __X32_COMPAT_SYS_STUBx(x, name, ...)
+#endif /* CONFIG_X86_X32 */
+
+
+#ifdef CONFIG_COMPAT
+/*
+ * Compat means IA32_EMULATION and/or X86_X32. As they use a different
+ * mapping of registers to parameters, we need to generate stubs for each
+ * of them.
+ */
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
+ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+ __IA32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \
+ __X32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \
+ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\
+ } \
+ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+/*
+ * As some compat syscalls may not be implemented, we need to expand
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
+ * kernel/time/posix-stubs.c to cover this case as well.
+ */
+#define COND_SYSCALL_COMPAT(name) \
+ cond_syscall(__ia32_compat_sys_##name); \
+ cond_syscall(__x32_compat_sys_##name)
+
+#define COMPAT_SYS_NI(name) \
+ SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \
+ SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers)
+
+#endif /* CONFIG_COMPAT */
+
+
+/*
+ * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
+ * struct pt_regs *regs as the only argument of the syscall stub named
+ * __x64_sys_*(). It decodes just the registers it needs and passes them on to
+ * the __se_sys_*() wrapper performing sign extension and then to the
+ * __do_sys_*() function doing the actual job. These wrappers and functions
+ * are inlined (at least in very most cases), meaning that the assembly looks
+ * as follows (slightly re-ordered for better readability):
+ *
+ * <__x64_sys_recv>: <-- syscall with 4 parameters
+ * callq <__fentry__>
+ *
+ * mov 0x70(%rdi),%rdi <-- decode regs->di
+ * mov 0x68(%rdi),%rsi <-- decode regs->si
+ * mov 0x60(%rdi),%rdx <-- decode regs->dx
+ * mov 0x38(%rdi),%rcx <-- decode regs->r10
+ *
+ * xor %r9d,%r9d <-- clear %r9
+ * xor %r8d,%r8d <-- clear %r8
+ *
+ * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom()
+ * which takes 6 arguments
+ *
+ * cltq <-- extend return value to 64-bit
+ * retq <-- return
+ *
+ * This approach avoids leaking random user-provided register content down
+ * the call chain.
+ *
+ * If IA32_EMULATION is enabled, this macro generates an additional wrapper
+ * named __ia32_sys_*() which decodes the struct pt_regs *regs according
+ * to the i386 calling convention (bx, cx, dx, si, di, bp).
+ */
+#define __SYSCALL_DEFINEx(x, name, ...) \
+ asmlinkage long __x64_sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+ asmlinkage long __x64_sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
+ } \
+ __IA32_SYS_STUBx(x, name, __VA_ARGS__) \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
+ __MAP(x,__SC_TEST,__VA_ARGS__); \
+ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
+ return ret; \
+ } \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+/*
+ * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
+ * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
+ * hurt, we only need to re-define it here to keep the naming congruent to
+ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
+ * macros to work correctly.
+ */
+#ifndef SYSCALL_DEFINE0
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __x64_sys_##sname(void); \
+ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
+ asmlinkage long __x64_sys_##sname(void)
+#endif
+
+#ifndef COND_SYSCALL
+#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
+#endif
+
+#ifndef SYS_NI
+#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);
+#endif
+
+
+/*
+ * For VSYSCALLS, we need to declare these three syscalls with the new
+ * pt_regs-based calling convention for in-kernel use.
+ */
+struct pt_regs;
+asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
+asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
+asmlinkage long __x64_sys_time(const struct pt_regs *regs);
+
+#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index ae6e05fdc24b..9fa979dd0d9d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,6 +18,12 @@
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
long ksys_ioperm(unsigned long from, unsigned long num, int turn_on);
+
+#ifdef CONFIG_X86_32
+/*
+ * These definitions are only valid on pure 32-bit systems; x86-64 uses a
+ * different syscall calling convention
+ */
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
asmlinkage long sys_iopl(unsigned int);
@@ -32,7 +38,6 @@ asmlinkage long sys_set_thread_area(struct user_desc __user *);
asmlinkage long sys_get_thread_area(struct user_desc __user *);
/* X86_32 only */
-#ifdef CONFIG_X86_32
/* kernel/signal.c */
asmlinkage long sys_sigreturn(void);
@@ -42,15 +47,5 @@ struct vm86_struct;
asmlinkage long sys_vm86old(struct vm86_struct __user *);
asmlinkage long sys_vm86(unsigned long, unsigned long);
-#else /* CONFIG_X86_32 */
-
-/* X86_64 only */
-/* kernel/process_64.c */
-asmlinkage long sys_arch_prctl(int, unsigned long);
-
-/* kernel/sys_x86_64.c */
-asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
- unsigned long, unsigned long, unsigned long);
-
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_SYSCALLS_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 84137c22fdfa..6690cd3fc8b1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -131,7 +131,12 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
- VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+ /*
+ * Use boot_cpu_has() instead of this_cpu_has() as this function
+ * might be called during early boot. This should work even after
+ * boot because all CPU's the have same capabilities:
+ */
+ VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
}
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index aebf60357758..a06cbf019744 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -137,15 +137,15 @@ struct boot_e820_entry {
* setup data structure.
*/
struct jailhouse_setup_data {
- u16 version;
- u16 compatible_version;
- u16 pm_timer_address;
- u16 num_cpus;
- u64 pci_mmconfig_base;
- u32 tsc_khz;
- u32 apic_khz;
- u8 standard_ioapic;
- u8 cpu_ids[255];
+ __u16 version;
+ __u16 compatible_version;
+ __u16 pm_timer_address;
+ __u16 num_cpus;
+ __u64 pci_mmconfig_base;
+ __u32 tsc_khz;
+ __u32 apic_khz;
+ __u8 standard_ioapic;
+ __u8 cpu_ids[255];
} __attribute__((packed));
/* The so-called "zeropage" */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7a37d9357bc4..dde444f932c1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -200,7 +200,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_local_x2apic *processor = NULL;
#ifdef CONFIG_X86_X2APIC
- int apic_id;
+ u32 apic_id;
u8 enabled;
#endif
@@ -222,10 +222,13 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
* to not preallocating memory for all NR_CPUS
* when we use CPU hotplug.
*/
- if (!apic->apic_id_valid(apic_id) && enabled)
- printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
- else
- acpi_register_lapic(apic_id, processor->uid, enabled);
+ if (!apic->apic_id_valid(apic_id)) {
+ if (enabled)
+ pr_warn(PREFIX "x2apic entry ignored\n");
+ return 0;
+ }
+
+ acpi_register_lapic(apic_id, processor->uid, enabled);
#else
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
#endif
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index a360801779ae..02b4839478b1 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -40,7 +40,7 @@ int default_check_phys_apicid_present(int phys_apicid)
return physid_isset(phys_apicid, phys_cpu_present_map);
}
-int default_apic_id_valid(int apicid)
+int default_apic_id_valid(u32 apicid)
{
return (apicid < 255);
}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 134e04506ab4..78778b54f904 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -56,7 +56,7 @@ static u32 numachip2_set_apic_id(unsigned int id)
return id << 24;
}
-static int numachip_apic_id_valid(int apicid)
+static int numachip_apic_id_valid(u32 apicid)
{
/* Trust what bootloader passes in MADT */
return 1;
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
index b107de381cb5..a49b3604027f 100644
--- a/arch/x86/kernel/apic/x2apic.h
+++ b/arch/x86/kernel/apic/x2apic.h
@@ -1,6 +1,6 @@
/* Common bits for X2APIC cluster/physical modes. */
-int x2apic_apic_id_valid(int apicid);
+int x2apic_apic_id_valid(u32 apicid);
int x2apic_apic_id_registered(void);
void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
unsigned int x2apic_get_apic_id(unsigned long id);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e2829bf40e4a..b5cf9e7b3830 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -101,7 +101,7 @@ static int x2apic_phys_probe(void)
}
/* Common x2apic functions, also used by x2apic_cluster */
-int x2apic_apic_id_valid(int apicid)
+int x2apic_apic_id_valid(u32 apicid)
{
return 1;
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f11910b44638..efaf2d4f9c3c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -557,7 +557,7 @@ static void uv_send_IPI_all(int vector)
uv_send_IPI_mask(cpu_online_mask, vector);
}
-static int uv_apic_id_valid(int apicid)
+static int uv_apic_id_valid(u32 apicid)
{
return 1;
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4702fbd98f92..8a5b185735e1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -848,18 +848,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_power = edx;
}
- if (c->extended_cpuid_level >= 0x80000008) {
- cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
-
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- c->x86_capability[CPUID_8000_0008_EBX] = ebx;
- }
-#ifdef CONFIG_X86_32
- else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
- c->x86_phys_bits = 36;
-#endif
-
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
@@ -874,6 +862,23 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
apply_forced_caps(c);
}
+static void get_cpu_address_sizes(struct cpuinfo_x86 *c)
+{
+ u32 eax, ebx, ecx, edx;
+
+ if (c->extended_cpuid_level >= 0x80000008) {
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
+ c->x86_capability[CPUID_8000_0008_EBX] = ebx;
+ }
+#ifdef CONFIG_X86_32
+ else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
+ c->x86_phys_bits = 36;
+#endif
+}
+
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
@@ -965,6 +970,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
cpu_detect(c);
get_cpu_vendor(c);
get_cpu_cap(c);
+ get_cpu_address_sizes(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
if (this_cpu->c_early_init)
@@ -1097,6 +1103,8 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_cpu_cap(c);
+ get_cpu_address_sizes(c);
+
if (c->cpuid_level >= 0x00000001) {
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 904b0a3c4e53..2c0bd38a44ab 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -19,7 +19,7 @@ struct cpuid_dep {
* called from cpu hotplug. It shouldn't do anything in this case,
* but it's difficult to tell that to the init reference checker.
*/
-const static struct cpuid_dep cpuid_deps[] = {
+static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 1f6680427ff0..f631a3f15587 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -38,37 +38,6 @@
#include <asm/virtext.h>
#include <asm/intel_pt.h>
-/* Alignment required for elf header segment */
-#define ELF_CORE_HEADER_ALIGN 4096
-
-/* This primarily represents number of split ranges due to exclusion */
-#define CRASH_MAX_RANGES 16
-
-struct crash_mem_range {
- u64 start, end;
-};
-
-struct crash_mem {
- unsigned int nr_ranges;
- struct crash_mem_range ranges[CRASH_MAX_RANGES];
-};
-
-/* Misc data about ram ranges needed to prepare elf headers */
-struct crash_elf_data {
- struct kimage *image;
- /*
- * Total number of ram ranges we have after various adjustments for
- * crash reserved region, etc.
- */
- unsigned int max_nr_ranges;
-
- /* Pointer to elf header */
- void *ehdr;
- /* Pointer to next phdr */
- void *bufp;
- struct crash_mem mem;
-};
-
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
struct boot_params *params;
@@ -218,124 +187,49 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
return 0;
}
-
/* Gather all the required information to prepare elf headers for ram regions */
-static void fill_up_crash_elf_data(struct crash_elf_data *ced,
- struct kimage *image)
+static struct crash_mem *fill_up_crash_elf_data(void)
{
unsigned int nr_ranges = 0;
-
- ced->image = image;
+ struct crash_mem *cmem;
walk_system_ram_res(0, -1, &nr_ranges,
get_nr_ram_ranges_callback);
+ if (!nr_ranges)
+ return NULL;
- ced->max_nr_ranges = nr_ranges;
-
- /* Exclusion of crash region could split memory ranges */
- ced->max_nr_ranges++;
-
- /* If crashk_low_res is not 0, another range split possible */
- if (crashk_low_res.end)
- ced->max_nr_ranges++;
-}
-
-static int exclude_mem_range(struct crash_mem *mem,
- unsigned long long mstart, unsigned long long mend)
-{
- int i, j;
- unsigned long long start, end;
- struct crash_mem_range temp_range = {0, 0};
-
- for (i = 0; i < mem->nr_ranges; i++) {
- start = mem->ranges[i].start;
- end = mem->ranges[i].end;
-
- if (mstart > end || mend < start)
- continue;
-
- /* Truncate any area outside of range */
- if (mstart < start)
- mstart = start;
- if (mend > end)
- mend = end;
-
- /* Found completely overlapping range */
- if (mstart == start && mend == end) {
- mem->ranges[i].start = 0;
- mem->ranges[i].end = 0;
- if (i < mem->nr_ranges - 1) {
- /* Shift rest of the ranges to left */
- for (j = i; j < mem->nr_ranges - 1; j++) {
- mem->ranges[j].start =
- mem->ranges[j+1].start;
- mem->ranges[j].end =
- mem->ranges[j+1].end;
- }
- }
- mem->nr_ranges--;
- return 0;
- }
-
- if (mstart > start && mend < end) {
- /* Split original range */
- mem->ranges[i].end = mstart - 1;
- temp_range.start = mend + 1;
- temp_range.end = end;
- } else if (mstart != start)
- mem->ranges[i].end = mstart - 1;
- else
- mem->ranges[i].start = mend + 1;
- break;
- }
+ /*
+ * Exclusion of crash region and/or crashk_low_res may cause
+ * another range split. So add extra two slots here.
+ */
+ nr_ranges += 2;
+ cmem = vzalloc(sizeof(struct crash_mem) +
+ sizeof(struct crash_mem_range) * nr_ranges);
+ if (!cmem)
+ return NULL;
- /* If a split happend, add the split to array */
- if (!temp_range.end)
- return 0;
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
- /* Split happened */
- if (i == CRASH_MAX_RANGES - 1) {
- pr_err("Too many crash ranges after split\n");
- return -ENOMEM;
- }
-
- /* Location where new range should go */
- j = i + 1;
- if (j < mem->nr_ranges) {
- /* Move over all ranges one slot towards the end */
- for (i = mem->nr_ranges - 1; i >= j; i--)
- mem->ranges[i + 1] = mem->ranges[i];
- }
-
- mem->ranges[j].start = temp_range.start;
- mem->ranges[j].end = temp_range.end;
- mem->nr_ranges++;
- return 0;
+ return cmem;
}
/*
* Look for any unwanted ranges between mstart, mend and remove them. This
- * might lead to split and split ranges are put in ced->mem.ranges[] array
+ * might lead to split and split ranges are put in cmem->ranges[] array
*/
-static int elf_header_exclude_ranges(struct crash_elf_data *ced,
- unsigned long long mstart, unsigned long long mend)
+static int elf_header_exclude_ranges(struct crash_mem *cmem)
{
- struct crash_mem *cmem = &ced->mem;
int ret = 0;
- memset(cmem->ranges, 0, sizeof(cmem->ranges));
-
- cmem->ranges[0].start = mstart;
- cmem->ranges[0].end = mend;
- cmem->nr_ranges = 1;
-
/* Exclude crashkernel region */
- ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
if (ret)
return ret;
if (crashk_low_res.end) {
- ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+ ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
+ crashk_low_res.end);
if (ret)
return ret;
}
@@ -345,144 +239,12 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
{
- struct crash_elf_data *ced = arg;
- Elf64_Ehdr *ehdr;
- Elf64_Phdr *phdr;
- unsigned long mstart, mend;
- struct kimage *image = ced->image;
- struct crash_mem *cmem;
- int ret, i;
-
- ehdr = ced->ehdr;
-
- /* Exclude unwanted mem ranges */
- ret = elf_header_exclude_ranges(ced, res->start, res->end);
- if (ret)
- return ret;
-
- /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
- cmem = &ced->mem;
-
- for (i = 0; i < cmem->nr_ranges; i++) {
- mstart = cmem->ranges[i].start;
- mend = cmem->ranges[i].end;
-
- phdr = ced->bufp;
- ced->bufp += sizeof(Elf64_Phdr);
-
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = mstart;
-
- /*
- * If a range matches backup region, adjust offset to backup
- * segment.
- */
- if (mstart == image->arch.backup_src_start &&
- (mend - mstart + 1) == image->arch.backup_src_sz)
- phdr->p_offset = image->arch.backup_load_addr;
-
- phdr->p_paddr = mstart;
- phdr->p_vaddr = (unsigned long long) __va(mstart);
- phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
- phdr->p_align = 0;
- ehdr->e_phnum++;
- pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
- phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
- ehdr->e_phnum, phdr->p_offset);
- }
-
- return ret;
-}
-
-static int prepare_elf64_headers(struct crash_elf_data *ced,
- void **addr, unsigned long *sz)
-{
- Elf64_Ehdr *ehdr;
- Elf64_Phdr *phdr;
- unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
- unsigned char *buf, *bufp;
- unsigned int cpu;
- unsigned long long notes_addr;
- int ret;
+ struct crash_mem *cmem = arg;
- /* extra phdr for vmcoreinfo elf note */
- nr_phdr = nr_cpus + 1;
- nr_phdr += ced->max_nr_ranges;
-
- /*
- * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
- * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
- * I think this is required by tools like gdb. So same physical
- * memory will be mapped in two elf headers. One will contain kernel
- * text virtual addresses and other will have __va(physical) addresses.
- */
+ cmem->ranges[cmem->nr_ranges].start = res->start;
+ cmem->ranges[cmem->nr_ranges].end = res->end;
+ cmem->nr_ranges++;
- nr_phdr++;
- elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
- elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
-
- buf = vzalloc(elf_sz);
- if (!buf)
- return -ENOMEM;
-
- bufp = buf;
- ehdr = (Elf64_Ehdr *)bufp;
- bufp += sizeof(Elf64_Ehdr);
- memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
- ehdr->e_ident[EI_CLASS] = ELFCLASS64;
- ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
- ehdr->e_ident[EI_VERSION] = EV_CURRENT;
- ehdr->e_ident[EI_OSABI] = ELF_OSABI;
- memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
- ehdr->e_type = ET_CORE;
- ehdr->e_machine = ELF_ARCH;
- ehdr->e_version = EV_CURRENT;
- ehdr->e_phoff = sizeof(Elf64_Ehdr);
- ehdr->e_ehsize = sizeof(Elf64_Ehdr);
- ehdr->e_phentsize = sizeof(Elf64_Phdr);
-
- /* Prepare one phdr of type PT_NOTE for each present cpu */
- for_each_present_cpu(cpu) {
- phdr = (Elf64_Phdr *)bufp;
- bufp += sizeof(Elf64_Phdr);
- phdr->p_type = PT_NOTE;
- notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
- phdr->p_offset = phdr->p_paddr = notes_addr;
- phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
- (ehdr->e_phnum)++;
- }
-
- /* Prepare one PT_NOTE header for vmcoreinfo */
- phdr = (Elf64_Phdr *)bufp;
- bufp += sizeof(Elf64_Phdr);
- phdr->p_type = PT_NOTE;
- phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
- phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
- (ehdr->e_phnum)++;
-
-#ifdef CONFIG_X86_64
- /* Prepare PT_LOAD type program header for kernel text region */
- phdr = (Elf64_Phdr *)bufp;
- bufp += sizeof(Elf64_Phdr);
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_vaddr = (Elf64_Addr)_text;
- phdr->p_filesz = phdr->p_memsz = _end - _text;
- phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
- (ehdr->e_phnum)++;
-#endif
-
- /* Prepare PT_LOAD headers for system ram chunks. */
- ced->ehdr = ehdr;
- ced->bufp = bufp;
- ret = walk_system_ram_res(0, -1, ced,
- prepare_elf64_ram_headers_callback);
- if (ret < 0)
- return ret;
-
- *addr = buf;
- *sz = elf_sz;
return 0;
}
@@ -490,18 +252,46 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
static int prepare_elf_headers(struct kimage *image, void **addr,
unsigned long *sz)
{
- struct crash_elf_data *ced;
- int ret;
+ struct crash_mem *cmem;
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ int ret, i;
- ced = kzalloc(sizeof(*ced), GFP_KERNEL);
- if (!ced)
+ cmem = fill_up_crash_elf_data();
+ if (!cmem)
return -ENOMEM;
- fill_up_crash_elf_data(ced, image);
+ ret = walk_system_ram_res(0, -1, cmem,
+ prepare_elf64_ram_headers_callback);
+ if (ret)
+ goto out;
+
+ /* Exclude unwanted mem ranges */
+ ret = elf_header_exclude_ranges(cmem);
+ if (ret)
+ goto out;
/* By default prepare 64bit headers */
- ret = prepare_elf64_headers(ced, addr, sz);
- kfree(ced);
+ ret = crash_prepare_elf64_headers(cmem,
+ IS_ENABLED(CONFIG_X86_64), addr, sz);
+ if (ret)
+ goto out;
+
+ /*
+ * If a range matches backup region, adjust offset to backup
+ * segment.
+ */
+ ehdr = (Elf64_Ehdr *)*addr;
+ phdr = (Elf64_Phdr *)(ehdr + 1);
+ for (i = 0; i < ehdr->e_phnum; phdr++, i++)
+ if (phdr->p_type == PT_LOAD &&
+ phdr->p_paddr == image->arch.backup_src_start &&
+ phdr->p_memsz == image->arch.backup_src_sz) {
+ phdr->p_offset = image->arch.backup_load_addr;
+ break;
+ }
+out:
+ vfree(cmem);
return ret;
}
@@ -547,14 +337,14 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
/* Exclude Backup region */
start = image->arch.backup_load_addr;
end = start + image->arch.backup_src_sz - 1;
- ret = exclude_mem_range(cmem, start, end);
+ ret = crash_exclude_mem_range(cmem, start, end);
if (ret)
return ret;
/* Exclude elf header region */
start = image->arch.elf_load_addr;
end = start + image->arch.elf_headers_sz - 1;
- return exclude_mem_range(cmem, start, end);
+ return crash_exclude_mem_range(cmem, start, end);
}
/* Prepare memory map for crash dump kernel */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index e5ec3cafa72e..aebd0d5bc086 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -195,6 +195,10 @@ void init_espfix_ap(int cpu)
pte_p = pte_offset_kernel(&pmd, addr);
stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
+ /*
+ * __PAGE_KERNEL_* includes _PAGE_GLOBAL, which we want since
+ * this is mapped to userspace.
+ */
pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask));
for (n = 0; n < ESPFIX_PTE_CLONES; n++)
set_pte(&pte_p[n*PTE_STRIDE], pte);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0c855deee165..0c408f8c4ed4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -195,6 +195,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pud[i + 1] = (pudval_t)pmd + pgtable_flags;
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+ /* Filter out unsupported __PAGE_KERNEL_* bits: */
+ pmd_entry &= __supported_pte_mask;
pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 48385c1074a5..8344dd2f310a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -399,8 +399,13 @@ NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.fill 511, 8, 0
NEXT_PAGE(level2_ident_pgt)
- /* Since I easily can, map the first 1G.
+ /*
+ * Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
+ *
+ * Note: This sets _PAGE_GLOBAL despite whether
+ * the CPU supports it or it is enabled. But,
+ * the CPU should ignore the bit.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#else
@@ -431,6 +436,10 @@ NEXT_PAGE(level2_kernel_pgt)
* (NOTE: at +512MB starts the module area, see MODULES_VADDR.
* If you want to increase this then increase MODULES_VADDR
* too.)
+ *
+ * This table is eventually used by the kernel during normal
+ * runtime. Care must be taken to clear out undesired bits
+ * later, like _PAGE_RW or _PAGE_GLOBAL in some cases.
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
KERNEL_IMAGE_SIZE/PMD_SIZE)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index fb095ba0c02f..3182908b7e6c 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -334,7 +334,6 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
unsigned long setup_header_size, params_cmdline_sz;
struct boot_params *params;
unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
- unsigned long purgatory_load_addr;
struct bzimage64_data *ldata;
struct kexec_entry64_regs regs64;
void *stack;
@@ -342,6 +341,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX,
.top_down = true };
+ struct kexec_buf pbuf = { .image = image, .buf_min = MIN_PURGATORY_ADDR,
+ .buf_max = ULONG_MAX, .top_down = true };
header = (struct setup_header *)(kernel + setup_hdr_offset);
setup_sects = header->setup_sects;
@@ -379,14 +380,13 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
* Load purgatory. For 64bit entry point, purgatory code can be
* anywhere.
*/
- ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
- &purgatory_load_addr);
+ ret = kexec_load_purgatory(image, &pbuf);
if (ret) {
pr_err("Loading purgatory failed\n");
return ERR_PTR(ret);
}
- pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+ pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
/*
@@ -538,7 +538,7 @@ static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
}
#endif
-struct kexec_file_ops kexec_bzImage64_ops = {
+const struct kexec_file_ops kexec_bzImage64_ops = {
.probe = bzImage64_probe,
.load = bzImage64_load,
.cleanup = bzImage64_cleanup,
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 26d713ecad34..d41d896481b8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -145,6 +145,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
unsigned long offset = i << PAGE_SHIFT;
const void *src = (char *)ldt->entries + offset;
unsigned long pfn;
+ pgprot_t pte_prot;
pte_t pte, *ptep;
va = (unsigned long)ldt_slot_va(slot) + offset;
@@ -163,7 +164,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
* target via some kernel interface which misses a
* permission check.
*/
- pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+ pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
+ /* Filter out unsuppored __PAGE_KERNEL* bits: */
+ pgprot_val(pte_prot) |= __supported_pte_mask;
+ pte = pfn_pte(pfn, pte_prot);
set_pte_at(mm, va, ptep, pte);
pte_unmap_unlock(ptep, ptl);
}
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 93bd4fb603d1..a5e55d832d0a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -30,8 +30,9 @@
#include <asm/set_memory.h>
#ifdef CONFIG_KEXEC_FILE
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_bzImage64_ops,
+ NULL
};
#endif
@@ -364,27 +365,6 @@ void arch_crash_save_vmcoreinfo(void)
/* arch-dependent functionality related to kexec file-based syscall */
#ifdef CONFIG_KEXEC_FILE
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
- int i, ret = -ENOEXEC;
- struct kexec_file_ops *fops;
-
- for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
- fops = kexec_file_loaders[i];
- if (!fops || !fops->probe)
- continue;
-
- ret = fops->probe(buf, buf_len);
- if (!ret) {
- image->fops = fops;
- return ret;
- }
- }
-
- return ret;
-}
-
void *arch_kexec_kernel_image_load(struct kimage *image)
{
vfree(image->arch.elf_headers);
@@ -399,88 +379,53 @@ void *arch_kexec_kernel_image_load(struct kimage *image)
image->cmdline_buf_len);
}
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
- if (!image->fops || !image->fops->cleanup)
- return 0;
-
- return image->fops->cleanup(image->image_loader_data);
-}
-
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
- unsigned long kernel_len)
-{
- if (!image->fops || !image->fops->verify_sig) {
- pr_debug("kernel loader does not support signature verification.");
- return -EKEYREJECTED;
- }
-
- return image->fops->verify_sig(kernel, kernel_len);
-}
-#endif
-
/*
* Apply purgatory relocations.
*
- * ehdr: Pointer to elf headers
- * sechdrs: Pointer to section headers.
- * relsec: section index of SHT_RELA section.
+ * @pi: Purgatory to be relocated.
+ * @section: Section relocations applying to.
+ * @relsec: Section containing RELAs.
+ * @symtabsec: Corresponding symtab.
*
* TODO: Some of the code belongs to generic code. Move that in kexec.c.
*/
-int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
- Elf64_Shdr *sechdrs, unsigned int relsec)
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+ Elf_Shdr *section, const Elf_Shdr *relsec,
+ const Elf_Shdr *symtabsec)
{
unsigned int i;
Elf64_Rela *rel;
Elf64_Sym *sym;
void *location;
- Elf64_Shdr *section, *symtabsec;
unsigned long address, sec_base, value;
const char *strtab, *name, *shstrtab;
+ const Elf_Shdr *sechdrs;
- /*
- * ->sh_offset has been modified to keep the pointer to section
- * contents in memory
- */
- rel = (void *)sechdrs[relsec].sh_offset;
-
- /* Section to which relocations apply */
- section = &sechdrs[sechdrs[relsec].sh_info];
-
- pr_debug("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
-
- /* Associated symbol table */
- symtabsec = &sechdrs[sechdrs[relsec].sh_link];
-
- /* String table */
- if (symtabsec->sh_link >= ehdr->e_shnum) {
- /* Invalid strtab section number */
- pr_err("Invalid string table section index %d\n",
- symtabsec->sh_link);
- return -ENOEXEC;
- }
+ /* String & section header string table */
+ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+ strtab = (char *)pi->ehdr + sechdrs[symtabsec->sh_link].sh_offset;
+ shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
- strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+ rel = (void *)pi->ehdr + relsec->sh_offset;
- /* section header string table */
- shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+ pr_debug("Applying relocate section %s to %u\n",
+ shstrtab + relsec->sh_name, relsec->sh_info);
- for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+ for (i = 0; i < relsec->sh_size / sizeof(*rel); i++) {
/*
* rel[i].r_offset contains byte offset from beginning
* of section to the storage unit affected.
*
- * This is location to update (->sh_offset). This is temporary
- * buffer where section is currently loaded. This will finally
- * be loaded to a different address later, pointed to by
+ * This is location to update. This is temporary buffer
+ * where section is currently loaded. This will finally be
+ * loaded to a different address later, pointed to by
* ->sh_addr. kexec takes care of moving it
* (kexec_load_segment()).
*/
- location = (void *)(section->sh_offset + rel[i].r_offset);
+ location = pi->purgatory_buf;
+ location += section->sh_offset;
+ location += rel[i].r_offset;
/* Final address of the location */
address = section->sh_addr + rel[i].r_offset;
@@ -491,8 +436,8 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
* to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
* these respectively.
*/
- sym = (Elf64_Sym *)symtabsec->sh_offset +
- ELF64_R_SYM(rel[i].r_info);
+ sym = (void *)pi->ehdr + symtabsec->sh_offset;
+ sym += ELF64_R_SYM(rel[i].r_info);
if (sym->st_name)
name = strtab + sym->st_name;
@@ -515,12 +460,12 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
if (sym->st_shndx == SHN_ABS)
sec_base = 0;
- else if (sym->st_shndx >= ehdr->e_shnum) {
+ else if (sym->st_shndx >= pi->ehdr->e_shnum) {
pr_err("Invalid section %d for symbol %s\n",
sym->st_shndx, name);
return -ENOEXEC;
} else
- sec_base = sechdrs[sym->st_shndx].sh_addr;
+ sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
value = sym->st_value;
value += sec_base;
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index df92605d8724..14c057f29979 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -26,7 +26,7 @@ static inline void signal_compat_build_tests(void)
* new fields are handled in copy_siginfo_to_user32()!
*/
BUILD_BUG_ON(NSIGILL != 11);
- BUILD_BUG_ON(NSIGFPE != 14);
+ BUILD_BUG_ON(NSIGFPE != 15);
BUILD_BUG_ON(NSIGSEGV != 7);
BUILD_BUG_ON(NSIGBUS != 5);
BUILD_BUG_ON(NSIGTRAP != 4);
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 476d810639a8..b45f5aaefd74 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -27,8 +27,20 @@ EXPORT_SYMBOL(get_cpu_entry_area);
void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
{
unsigned long va = (unsigned long) cea_vaddr;
+ pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
- set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+ /*
+ * The cpu_entry_area is shared between the user and kernel
+ * page tables. All of its ptes can safely be global.
+ * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
+ * non-present PTEs, so be careful not to set it in that
+ * case to avoid confusion.
+ */
+ if (boot_cpu_has(X86_FEATURE_PGE) &&
+ (pgprot_val(flags) & _PAGE_PRESENT))
+ pte = pte_set_flags(pte, _PAGE_GLOBAL);
+
+ set_pte_vaddr(va, pte);
}
static void __init
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 9aa22be8331e..a2f0c7e20fb0 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -98,6 +98,9 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
if (!info->kernpg_flag)
info->kernpg_flag = _KERNPG_TABLE;
+ /* Filter out unsupported __PAGE_KERNEL_* bits: */
+ info->kernpg_flag &= __default_kernel_pte_mask;
+
for (; addr < end; addr = next) {
pgd_t *pgd = pgd_page + pgd_index(addr);
p4d_t *p4d;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 82f5252c723a..fec82b577c18 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,12 +161,6 @@ struct map_range {
static int page_size_mask;
-static void enable_global_pages(void)
-{
- if (!static_cpu_has(X86_FEATURE_PTI))
- __supported_pte_mask |= _PAGE_GLOBAL;
-}
-
static void __init probe_page_size_mask(void)
{
/*
@@ -187,9 +181,15 @@ static void __init probe_page_size_mask(void)
__supported_pte_mask &= ~_PAGE_GLOBAL;
if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
- enable_global_pages();
+ __supported_pte_mask |= _PAGE_GLOBAL;
}
+ /* By the default is everything supported: */
+ __default_kernel_pte_mask = __supported_pte_mask;
+ /* Except when with PTI where the kernel is mostly non-Global: */
+ if (cpu_feature_enabled(X86_FEATURE_PTI))
+ __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
+
/* Enable 1 GB linear kernel mappings if available: */
if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
printk(KERN_INFO "Using GB pages for direct mapping\n");
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8008db2bddb3..c893c6a3d707 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -558,8 +558,14 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base);
}
-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
+#define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
+/* Bits supported by the hardware: */
+pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
+/* Bits allowed in normal kernel mappings: */
+pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
+/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
+EXPORT_SYMBOL(__default_kernel_pte_mask);
/* user-defined highmem size */
static unsigned int highmem_pages = -1;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 66de40e45f58..0a400606dea0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -65,8 +65,13 @@
* around without checking the pgd every time.
*/
+/* Bits supported by the hardware: */
pteval_t __supported_pte_mask __read_mostly = ~0;
+/* Bits allowed in normal kernel mappings: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
+/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
+EXPORT_SYMBOL(__default_kernel_pte_mask);
int force_personality32;
@@ -1286,6 +1291,12 @@ void mark_rodata_ro(void)
(unsigned long) __va(__pa_symbol(_sdata)));
debug_checkwx();
+
+ /*
+ * Do this after all of the manipulation of the
+ * kernel text page tables are complete.
+ */
+ pti_clone_kernel_text();
}
int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ada98b39b8ad..b3294d36769d 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -44,6 +44,9 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
return ret;
*prot = __pgprot(__PAGE_KERNEL | cachemode2protval(pcm));
+ /* Filter out unsupported __PAGE_KERNEL* bits: */
+ pgprot_val(*prot) &= __default_kernel_pte_mask;
+
return 0;
}
EXPORT_SYMBOL_GPL(iomap_create_wc);
@@ -88,6 +91,9 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
prot = __pgprot(__PAGE_KERNEL |
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
+ /* Filter out unsupported __PAGE_KERNEL* bits: */
+ pgprot_val(prot) &= __default_kernel_pte_mask;
+
return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index e2db83bebc3b..c63a545ec199 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -816,6 +816,9 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
}
pte = early_ioremap_pte(addr);
+ /* Sanitize 'prot' against any unsupported bits: */
+ pgprot_val(flags) &= __default_kernel_pte_mask;
+
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d8ff013ea9d0..980dbebd0ca7 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -269,6 +269,12 @@ void __init kasan_early_init(void)
pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE;
+ /* Mask out unsupported __PAGE_KERNEL bits: */
+ pte_val &= __default_kernel_pte_mask;
+ pmd_val &= __default_kernel_pte_mask;
+ pud_val &= __default_kernel_pte_mask;
+ p4d_val &= __default_kernel_pte_mask;
+
for (i = 0; i < PTRS_PER_PTE; i++)
kasan_zero_pte[i] = __pte(pte_val);
@@ -371,7 +377,13 @@ void __init kasan_init(void)
*/
memset(kasan_zero_page, 0, PAGE_SIZE);
for (i = 0; i < PTRS_PER_PTE; i++) {
- pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC);
+ pte_t pte;
+ pgprot_t prot;
+
+ prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC);
+ pgprot_val(prot) &= __default_kernel_pte_mask;
+
+ pte = __pte(__pa(kasan_zero_page) | pgprot_val(prot));
set_pte(&kasan_zero_pte[i], pte);
}
/* Flush TLBs again to be sure that write protection applied. */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 85cf12219dea..0f3d50f4c48c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -298,9 +298,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
/*
* The .rodata section needs to be read-only. Using the pfn
- * catches all aliases.
+ * catches all aliases. This also includes __ro_after_init,
+ * so do not enforce until kernel_set_to_readonly is true.
*/
- if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+ if (kernel_set_to_readonly &&
+ within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
__pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
@@ -512,6 +514,23 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
#endif
}
+static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
+{
+ /*
+ * _PAGE_GLOBAL means "global page" for present PTEs.
+ * But, it is also used to indicate _PAGE_PROTNONE
+ * for non-present PTEs.
+ *
+ * This ensures that a _PAGE_GLOBAL PTE going from
+ * present to non-present is not confused as
+ * _PAGE_PROTNONE.
+ */
+ if (!(pgprot_val(prot) & _PAGE_PRESENT))
+ pgprot_val(prot) &= ~_PAGE_GLOBAL;
+
+ return prot;
+}
+
static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
@@ -566,6 +585,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* up accordingly.
*/
old_pte = *kpte;
+ /* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
req_prot = pgprot_large_2_4k(old_prot);
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
@@ -577,19 +597,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* different bit positions in the two formats.
*/
req_prot = pgprot_4k_2_large(req_prot);
-
- /*
- * Set the PSE and GLOBAL flags only if the PRESENT flag is
- * set otherwise pmd_present/pmd_huge will return true even on
- * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
- * for the ancient hardware that doesn't support it.
- */
+ req_prot = pgprot_clear_protnone_bits(req_prot);
if (pgprot_val(req_prot) & _PAGE_PRESENT)
- pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
- else
- pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
-
- req_prot = canon_pgprot(req_prot);
+ pgprot_val(req_prot) |= _PAGE_PSE;
/*
* old_pfn points to the large page base pfn. So we need
@@ -674,8 +684,12 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
switch (level) {
case PG_LEVEL_2M:
ref_prot = pmd_pgprot(*(pmd_t *)kpte);
- /* clear PSE and promote PAT bit to correct position */
+ /*
+ * Clear PSE (aka _PAGE_PAT) and move
+ * PAT bit to correct position.
+ */
ref_prot = pgprot_large_2_4k(ref_prot);
+
ref_pfn = pmd_pfn(*(pmd_t *)kpte);
break;
@@ -698,23 +712,14 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
return 1;
}
- /*
- * Set the GLOBAL flags only if the PRESENT flag is set
- * otherwise pmd/pte_present will return true even on a non
- * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
- * for the ancient hardware that doesn't support it.
- */
- if (pgprot_val(ref_prot) & _PAGE_PRESENT)
- pgprot_val(ref_prot) |= _PAGE_GLOBAL;
- else
- pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
+ ref_prot = pgprot_clear_protnone_bits(ref_prot);
/*
* Get the target pfn from the original entry:
*/
pfn = ref_pfn;
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
- set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
+ set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
if (virt_addr_valid(address)) {
unsigned long pfn = PFN_DOWN(__pa(address));
@@ -930,19 +935,7 @@ static void populate_pte(struct cpa_data *cpa,
pte = pte_offset_kernel(pmd, start);
- /*
- * Set the GLOBAL flags only if the PRESENT flag is
- * set otherwise pte_present will return true even on
- * a non present pte. The canon_pgprot will clear
- * _PAGE_GLOBAL for the ancient hardware that doesn't
- * support it.
- */
- if (pgprot_val(pgprot) & _PAGE_PRESENT)
- pgprot_val(pgprot) |= _PAGE_GLOBAL;
- else
- pgprot_val(pgprot) &= ~_PAGE_GLOBAL;
-
- pgprot = canon_pgprot(pgprot);
+ pgprot = pgprot_clear_protnone_bits(pgprot);
while (num_pages-- && start < end) {
set_pte(pte, pfn_pte(cpa->pfn, pgprot));
@@ -1234,24 +1227,14 @@ repeat:
new_prot = static_protections(new_prot, address, pfn);
- /*
- * Set the GLOBAL flags only if the PRESENT flag is
- * set otherwise pte_present will return true even on
- * a non present pte. The canon_pgprot will clear
- * _PAGE_GLOBAL for the ancient hardware that doesn't
- * support it.
- */
- if (pgprot_val(new_prot) & _PAGE_PRESENT)
- pgprot_val(new_prot) |= _PAGE_GLOBAL;
- else
- pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
+ new_prot = pgprot_clear_protnone_bits(new_prot);
/*
* We need to keep the pfn from the existing PTE,
* after all we're only going to change it's attributes
* not the memory it points to
*/
- new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
+ new_pte = pfn_pte(pfn, new_prot);
cpa->pfn = pfn;
/*
* Do we really change anything ?
@@ -1428,11 +1411,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
memset(&cpa, 0, sizeof(cpa));
/*
- * Check, if we are requested to change a not supported
- * feature:
+ * Check, if we are requested to set a not supported
+ * feature. Clearing non-supported features is OK.
*/
mask_set = canon_pgprot(mask_set);
- mask_clr = canon_pgprot(mask_clr);
+
if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
return 0;
@@ -1775,6 +1758,12 @@ int set_memory_4k(unsigned long addr, int numpages)
__pgprot(0), 1, 0, NULL);
}
+int set_memory_nonglobal(unsigned long addr, int numpages)
+{
+ return change_page_attr_clear(&addr, numpages,
+ __pgprot(_PAGE_GLOBAL), 0);
+}
+
static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
struct cpa_data cpa;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 34cda7e0551b..ffc8c13c50e4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/gfp.h>
+#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
@@ -583,6 +584,9 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
pgprot_t flags)
{
+ /* Sanitize 'prot' against any unsupported bits: */
+ pgprot_val(flags) &= __default_kernel_pte_mask;
+
__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
}
@@ -636,6 +640,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
(mtrr != MTRR_TYPE_WRBACK))
return 0;
+ /* Bail out if we are we on a populated non-leaf entry: */
+ if (pud_present(*pud) && !pud_huge(*pud))
+ return 0;
+
prot = pgprot_4k_2_large(prot);
set_pte((pte_t *)pud, pfn_pte(
@@ -664,6 +672,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
return 0;
}
+ /* Bail out if we are we on a populated non-leaf entry: */
+ if (pmd_present(*pmd) && !pmd_huge(*pmd))
+ return 0;
+
prot = pgprot_4k_2_large(prot);
set_pte((pte_t *)pmd, pfn_pte(
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 631507f0c198..f1fd52f449e0 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -66,12 +66,22 @@ static void __init pti_print_if_secure(const char *reason)
pr_info("%s\n", reason);
}
+enum pti_mode {
+ PTI_AUTO = 0,
+ PTI_FORCE_OFF,
+ PTI_FORCE_ON
+} pti_mode;
+
void __init pti_check_boottime_disable(void)
{
char arg[5];
int ret;
+ /* Assume mode is auto unless overridden. */
+ pti_mode = PTI_AUTO;
+
if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+ pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on XEN PV.");
return;
}
@@ -79,18 +89,23 @@ void __init pti_check_boottime_disable(void)
ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
if (ret > 0) {
if (ret == 3 && !strncmp(arg, "off", 3)) {
+ pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on command line.");
return;
}
if (ret == 2 && !strncmp(arg, "on", 2)) {
+ pti_mode = PTI_FORCE_ON;
pti_print_if_secure("force enabled on command line.");
goto enable;
}
- if (ret == 4 && !strncmp(arg, "auto", 4))
+ if (ret == 4 && !strncmp(arg, "auto", 4)) {
+ pti_mode = PTI_AUTO;
goto autosel;
+ }
}
if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on command line.");
return;
}
@@ -149,7 +164,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
-static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -177,7 +192,7 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
-static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -267,7 +282,7 @@ static void __init pti_setup_vsyscall(void)
static void __init pti_setup_vsyscall(void) { }
#endif
-static void __init
+static void
pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
{
unsigned long addr;
@@ -300,6 +315,27 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
return;
/*
+ * Only clone present PMDs. This ensures only setting
+ * _PAGE_GLOBAL on present PMDs. This should only be
+ * called on well-known addresses anyway, so a non-
+ * present PMD would be a surprise.
+ */
+ if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
+ return;
+
+ /*
+ * Setting 'target_pmd' below creates a mapping in both
+ * the user and kernel page tables. It is effectively
+ * global, so set it as global in both copies. Note:
+ * the X86_FEATURE_PGE check is not _required_ because
+ * the CPU ignores _PAGE_GLOBAL when PGE is not
+ * supported. The check keeps consistentency with
+ * code that only set this bit when supported.
+ */
+ if (boot_cpu_has(X86_FEATURE_PGE))
+ *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
+
+ /*
* Copy the PMD. That is, the kernelmode and usermode
* tables will share the last-level page tables of this
* address range
@@ -348,7 +384,83 @@ static void __init pti_clone_entry_text(void)
{
pti_clone_pmds((unsigned long) __entry_text_start,
(unsigned long) __irqentry_text_end,
- _PAGE_RW | _PAGE_GLOBAL);
+ _PAGE_RW);
+}
+
+/*
+ * Global pages and PCIDs are both ways to make kernel TLB entries
+ * live longer, reduce TLB misses and improve kernel performance.
+ * But, leaving all kernel text Global makes it potentially accessible
+ * to Meltdown-style attacks which make it trivial to find gadgets or
+ * defeat KASLR.
+ *
+ * Only use global pages when it is really worth it.
+ */
+static inline bool pti_kernel_image_global_ok(void)
+{
+ /*
+ * Systems with PCIDs get litlle benefit from global
+ * kernel text and are not worth the downsides.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_PCID))
+ return false;
+
+ /*
+ * Only do global kernel image for pti=auto. Do the most
+ * secure thing (not global) if pti=on specified.
+ */
+ if (pti_mode != PTI_AUTO)
+ return false;
+
+ /*
+ * K8 may not tolerate the cleared _PAGE_RW on the userspace
+ * global kernel image pages. Do the safe thing (disable
+ * global kernel image). This is unlikely to ever be
+ * noticed because PTI is disabled by default on AMD CPUs.
+ */
+ if (boot_cpu_has(X86_FEATURE_K8))
+ return false;
+
+ return true;
+}
+
+/*
+ * For some configurations, map all of kernel text into the user page
+ * tables. This reduces TLB misses, especially on non-PCID systems.
+ */
+void pti_clone_kernel_text(void)
+{
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+
+ if (!pti_kernel_image_global_ok())
+ return;
+
+ pti_clone_pmds(start, end, _PAGE_RW);
+}
+
+/*
+ * This is the only user for it and it is not arch-generic like
+ * the other set_memory.h functions. Just extern it.
+ */
+extern int set_memory_nonglobal(unsigned long addr, int numpages);
+void pti_set_kernel_image_nonglobal(void)
+{
+ /*
+ * The identity map is created with PMDs, regardless of the
+ * actual length of the kernel. We need to clear
+ * _PAGE_GLOBAL up to a PMD boundary, not just to the end
+ * of the image.
+ */
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+
+ if (pti_kernel_image_global_ok())
+ return;
+
+ pr_debug("set kernel image non-global\n");
+
+ set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
}
/*
@@ -362,6 +474,10 @@ void __init pti_init(void)
pr_info("enabled\n");
pti_clone_user_shared();
+
+ /* Undo all global bits from the init pagetables in head_64.S: */
+ pti_set_kernel_image_nonglobal();
+ /* Replace some of the global bits just for shared entry text: */
pti_clone_entry_text();
pti_setup_espfix64();
pti_setup_vsyscall();
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 74a532989308..48b14b534897 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -51,6 +51,12 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
pmd_t *pmd;
pud_t *pud;
p4d_t *p4d = NULL;
+ pgprot_t pgtable_prot = __pgprot(_KERNPG_TABLE);
+ pgprot_t pmd_text_prot = __pgprot(__PAGE_KERNEL_LARGE_EXEC);
+
+ /* Filter out unsupported __PAGE_KERNEL* bits: */
+ pgprot_val(pmd_text_prot) &= __default_kernel_pte_mask;
+ pgprot_val(pgtable_prot) &= __default_kernel_pte_mask;
/*
* The new mapping only has to cover the page containing the image
@@ -81,15 +87,19 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
return -ENOMEM;
set_pmd(pmd + pmd_index(restore_jump_address),
- __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+ __pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot)));
set_pud(pud + pud_index(restore_jump_address),
- __pud(__pa(pmd) | _KERNPG_TABLE));
+ __pud(__pa(pmd) | pgprot_val(pgtable_prot)));
if (p4d) {
- set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
- set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
+ p4d_t new_p4d = __p4d(__pa(pud) | pgprot_val(pgtable_prot));
+ pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+
+ set_p4d(p4d + p4d_index(restore_jump_address), new_p4d);
+ set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
} else {
/* No p4d for 4-level paging: point the pgd to the pud page table */
- set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
+ pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+ set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
}
return 0;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index d70c15de417b..2e9ee023e6bc 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,6 +6,9 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
targets += $(purgatory-y)
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+$(obj)/sha256.o: $(srctree)/lib/sha256.c
+ $(call if_changed_rule,cc_o_c)
+
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
targets += purgatory.ro
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 470edad96bb9..025c34ac0d84 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,9 +11,9 @@
*/
#include <linux/bug.h>
+#include <linux/sha256.h>
#include <asm/purgatory.h>
-#include "sha256.h"
#include "../boot/string.h"
unsigned long purgatory_backup_dest __section(.kexec-purgatory);
diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c
deleted file mode 100644
index 548ca675a14a..000000000000
--- a/arch/x86/purgatory/sha256.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * SHA-256, as specified in
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- *
- * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * Copyright (c) 2014 Red Hat Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/bitops.h>
-#include <asm/byteorder.h>
-#include "sha256.h"
-#include "../boot/string.h"
-
-static inline u32 Ch(u32 x, u32 y, u32 z)
-{
- return z ^ (x & (y ^ z));
-}
-
-static inline u32 Maj(u32 x, u32 y, u32 z)
-{
- return (x & y) | (z & (x | y));
-}
-
-#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
-#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
-#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
-#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
-{
- W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
-}
-
-static inline void BLEND_OP(int I, u32 *W)
-{
- W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-static void sha256_transform(u32 *state, const u8 *input)
-{
- u32 a, b, c, d, e, f, g, h, t1, t2;
- u32 W[64];
- int i;
-
- /* load the input */
- for (i = 0; i < 16; i++)
- LOAD_OP(i, W, input);
-
- /* now blend */
- for (i = 16; i < 64; i++)
- BLEND_OP(i, W);
-
- /* load the state into our registers */
- a = state[0]; b = state[1]; c = state[2]; d = state[3];
- e = state[4]; f = state[5]; g = state[6]; h = state[7];
-
- /* now iterate */
- t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
- t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
- t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
- t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
- t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
- t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
- t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
- t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
- t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
- t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
- t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
- t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
- t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
- t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
- t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
- t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
-
- state[0] += a; state[1] += b; state[2] += c; state[3] += d;
- state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
- /* clear any sensitive info... */
- a = b = c = d = e = f = g = h = t1 = t2 = 0;
- memset(W, 0, 64 * sizeof(u32));
-}
-
-int sha256_init(struct sha256_state *sctx)
-{
- sctx->state[0] = SHA256_H0;
- sctx->state[1] = SHA256_H1;
- sctx->state[2] = SHA256_H2;
- sctx->state[3] = SHA256_H3;
- sctx->state[4] = SHA256_H4;
- sctx->state[5] = SHA256_H5;
- sctx->state[6] = SHA256_H6;
- sctx->state[7] = SHA256_H7;
- sctx->count = 0;
-
- return 0;
-}
-
-int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
-{
- unsigned int partial, done;
- const u8 *src;
-
- partial = sctx->count & 0x3f;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) > 63) {
- if (partial) {
- done = -partial;
- memcpy(sctx->buf + partial, data, done + 64);
- src = sctx->buf;
- }
-
- do {
- sha256_transform(sctx->state, src);
- done += 64;
- src = data + done;
- } while (done + 63 < len);
-
- partial = 0;
- }
- memcpy(sctx->buf + partial, src, len - done);
-
- return 0;
-}
-
-int sha256_final(struct sha256_state *sctx, u8 *out)
-{
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- unsigned int index, pad_len;
- int i;
- static const u8 padding[64] = { 0x80, };
-
- /* Save number of bits */
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64. */
- index = sctx->count & 0x3f;
- pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
- sha256_update(sctx, padding, pad_len);
-
- /* Append length (before padding) */
- sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Zeroize sensitive information. */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
deleted file mode 100644
index 2867d9825a57..000000000000
--- a/arch/x86/purgatory/sha256.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2014 Red Hat Inc.
- *
- * Author: Vivek Goyal <vgoyal@redhat.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#ifndef SHA256_H
-#define SHA256_H
-
-#include <linux/types.h>
-#include <crypto/sha.h>
-
-extern int sha256_init(struct sha256_state *sctx);
-extern int sha256_update(struct sha256_state *sctx, const u8 *input,
- unsigned int length);
-extern int sha256_final(struct sha256_state *sctx, u8 *hash);
-
-#endif /* SHA256_H */
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
index d886b1fa36f0..795ca4f2cb3c 100644
--- a/arch/x86/purgatory/string.c
+++ b/arch/x86/purgatory/string.c
@@ -10,4 +10,16 @@
* Version 2. See the file COPYING for more details.
*/
+#include <linux/types.h>
+
#include "../boot/string.c"
+
+void *memcpy(void *dst, const void *src, size_t len)
+{
+ return __builtin_memcpy(dst, src, len);
+}
+
+void *memset(void *dst, int c, size_t len)
+{
+ return __builtin_memset(dst, c, len);
+}
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index 1518d2805ae8..27361cbb7ca9 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -6,11 +6,12 @@
#include <sysdep/stub.h>
#include <sysdep/faultinfo.h>
#include <sysdep/mcontext.h>
+#include <sys/ucontext.h>
void __attribute__ ((__section__ (".__syscall_stub")))
stub_segv_handler(int sig, siginfo_t *info, void *p)
{
- struct ucontext *uc = p;
+ ucontext_t *uc = p;
GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
&uc->uc_mcontext);
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 2163888497d3..5e53bfbe5823 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -112,7 +112,7 @@ static int xen_madt_oem_check(char *oem_id, char *oem_table_id)
return xen_pv_domain();
}
-static int xen_id_always_valid(int apicid)
+static int xen_id_always_valid(u32 apicid)
{
return 1;
}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 3c2c2530737e..c36d23aa6c35 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
- /* Work out if we support NX */
- get_cpu_cap(&boot_cpu_data);
- x86_configure_nx();
-
/* Get mfn list */
xen_build_dynamic_phys_to_machine();
@@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
xen_setup_gdt(0);
+ /* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
+ x86_configure_nx();
+
xen_init_irq_ops();
/* Let's presume PV guests always boot on vCPU with id 0. */
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index c0c756c76afe..2e20ae2fa2d6 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
* data back is to call:
*/
tick_nohz_idle_enter();
+ tick_nohz_idle_stop_tick_protected();
cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
}
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 96f26e026783..5077ead5e59c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -89,7 +89,9 @@ END(hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
.ascii "!writable_page_tables|pae_pgdir_above_4gb")
ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
- .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
+ .long (1 << XENFEAT_writable_page_tables) | \
+ (1 << XENFEAT_dom0) | \
+ (1 << XENFEAT_linux_rsdp_unrestricted))
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,