summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile17
-rw-r--r--arch/arm/include/asm/futex.h9
-rw-r--r--arch/riscv/include/asm/csr.h3
-rw-r--r--arch/riscv/include/asm/hwcap.h22
-rw-r--r--arch/riscv/include/asm/set_memory.h8
-rw-r--r--arch/riscv/kernel/cpu_ops.c4
-rw-r--r--arch/riscv/kernel/cpufeature.c83
-rw-r--r--arch/riscv/kernel/smp.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile2
-rw-r--r--arch/riscv/kernel/vdso/note.S12
-rw-r--r--arch/riscv/mm/init.c19
-rw-r--r--arch/x86/entry/calling.h40
-rw-r--r--arch/x86/entry/entry_64.S14
-rw-r--r--arch/x86/include/asm/ftrace.h5
-rw-r--r--arch/x86/include/asm/unwind.h2
-rw-r--r--arch/x86/kernel/apic/apic.c27
-rw-r--r--arch/x86/kernel/dumpstack_64.c3
-rw-r--r--arch/x86/kernel/unwind_frame.c3
-rw-r--r--arch/x86/kernel/unwind_orc.c113
-rw-r--r--arch/x86/mm/pat/set_memory.c12
-rw-r--r--block/bfq-iosched.c6
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-iocost.c117
-rw-r--r--crypto/lrw.c6
-rw-r--r--crypto/xts.c6
-rw-r--r--drivers/firmware/efi/tpm.c2
-rw-r--r--drivers/iommu/amd_iommu.c198
-rw-r--r--drivers/iommu/amd_iommu_types.h9
-rw-r--r--drivers/iommu/virtio-iommu.c2
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/io_uring.c65
-rw-r--r--fs/splice.c45
-rw-r--r--fs/vboxsf/super.c2
-rw-r--r--include/linux/backing-dev-defs.h1
-rw-r--r--include/linux/backing-dev.h9
-rw-r--r--include/trace/events/wbt.h8
-rw-r--r--init/Kconfig18
-rw-r--r--init/initramfs.c2
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--mm/backing-dev.c13
-rw-r--r--tools/cgroup/iocost_monitor.py7
-rw-r--r--tools/objtool/check.c17
-rw-r--r--tools/objtool/elf.h7
45 files changed, 608 insertions, 345 deletions
diff --git a/Makefile b/Makefile
index 3512f7b243dc..11fe9b1535de 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 7
SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
@@ -729,10 +729,6 @@ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
endif
-ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED
-KBUILD_CFLAGS += -Wno-maybe-uninitialized
-endif
-
# Tell gcc to never replace conditional load with a non-conditional one
KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0)
KBUILD_CFLAGS += $(call cc-option,-fno-allow-store-data-races)
@@ -881,6 +877,17 @@ KBUILD_CFLAGS += -Wno-pointer-sign
# disable stringop warnings in gcc 8+
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+# We'll want to enable this eventually, but it's not going away for 5.7 at least
+KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds)
+KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
+
+# Another good warning that we'll want to enable eventually
+KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
+
+# Enabled with W=2, disabled by default as noisy
+KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
+
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index e133da303a98..a9151884bc85 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -165,8 +165,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
preempt_enable();
#endif
- if (!ret)
- *oval = oldval;
+ /*
+ * Store unconditionally. If ret != 0 the extra store is the least
+ * of the worries but GCC cannot figure out that __futex_atomic_op()
+ * is either setting ret to -EFAULT or storing the old value in
+ * oldval which results in a uninitialized warning at the call site.
+ */
+ *oval = oldval;
return ret;
}
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 8e18d2c64399..cec462e198ce 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -51,13 +51,10 @@
#define CAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
/* Interrupt causes (minus the high bit) */
-#define IRQ_U_SOFT 0
#define IRQ_S_SOFT 1
#define IRQ_M_SOFT 3
-#define IRQ_U_TIMER 4
#define IRQ_S_TIMER 5
#define IRQ_M_TIMER 7
-#define IRQ_U_EXT 8
#define IRQ_S_EXT 9
#define IRQ_M_EXT 11
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 1bb0cd04aec3..5ce50468aff1 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -8,6 +8,7 @@
#ifndef _ASM_RISCV_HWCAP_H
#define _ASM_RISCV_HWCAP_H
+#include <linux/bits.h>
#include <uapi/asm/hwcap.h>
#ifndef __ASSEMBLY__
@@ -22,6 +23,27 @@ enum {
};
extern unsigned long elf_hwcap;
+
+#define RISCV_ISA_EXT_a ('a' - 'a')
+#define RISCV_ISA_EXT_c ('c' - 'a')
+#define RISCV_ISA_EXT_d ('d' - 'a')
+#define RISCV_ISA_EXT_f ('f' - 'a')
+#define RISCV_ISA_EXT_h ('h' - 'a')
+#define RISCV_ISA_EXT_i ('i' - 'a')
+#define RISCV_ISA_EXT_m ('m' - 'a')
+#define RISCV_ISA_EXT_s ('s' - 'a')
+#define RISCV_ISA_EXT_u ('u' - 'a')
+
+#define RISCV_ISA_EXT_MAX 64
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+ __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
#endif
#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index c38df4771c09..4c5bae7ca01c 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -22,14 +22,6 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_ro(void);
-void set_kernel_text_rw(void);
-#else
-static inline void set_kernel_text_ro(void) { }
-static inline void set_kernel_text_rw(void) { }
-#endif
-
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index c4c33bf02369..0ec22354018c 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -15,8 +15,8 @@
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-void *__cpu_up_stack_pointer[NR_CPUS];
-void *__cpu_up_task_pointer[NR_CPUS];
+void *__cpu_up_stack_pointer[NR_CPUS] __section(.data);
+void *__cpu_up_task_pointer[NR_CPUS] __section(.data);
extern const struct cpu_operations cpu_ops_sbi;
extern const struct cpu_operations cpu_ops_spinwait;
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index a5ad00043104..ac202f44a670 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -6,6 +6,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/bitmap.h>
#include <linux/of.h>
#include <asm/processor.h>
#include <asm/hwcap.h>
@@ -13,15 +14,57 @@
#include <asm/switch_to.h>
unsigned long elf_hwcap __read_mostly;
+
+/* Host ISA bitmap */
+static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+
#ifdef CONFIG_FPU
bool has_fpu __read_mostly;
#endif
+/**
+ * riscv_isa_extension_base() - Get base extension word
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * Return: base extension word as unsigned long value
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
+{
+ if (!isa_bitmap)
+ return riscv_isa[0];
+ return isa_bitmap[0];
+}
+EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
+
+/**
+ * __riscv_isa_extension_available() - Check whether given extension
+ * is available or not
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+{
+ const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
+
+ if (bit >= RISCV_ISA_EXT_MAX)
+ return false;
+
+ return test_bit(bit, bmap) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
+
void riscv_fill_hwcap(void)
{
struct device_node *node;
const char *isa;
- size_t i;
+ char print_str[BITS_PER_LONG + 1];
+ size_t i, j, isa_len;
static unsigned long isa2hwcap[256] = {0};
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -33,8 +76,11 @@ void riscv_fill_hwcap(void)
elf_hwcap = 0;
+ bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
+
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
+ unsigned long this_isa = 0;
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -44,8 +90,24 @@ void riscv_fill_hwcap(void)
continue;
}
- for (i = 0; i < strlen(isa); ++i)
+ i = 0;
+ isa_len = strlen(isa);
+#if IS_ENABLED(CONFIG_32BIT)
+ if (!strncmp(isa, "rv32", 4))
+ i += 4;
+#elif IS_ENABLED(CONFIG_64BIT)
+ if (!strncmp(isa, "rv64", 4))
+ i += 4;
+#endif
+ for (; i < isa_len; ++i) {
this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+ /*
+ * TODO: X, Y and Z extension parsing for Host ISA
+ * bitmap will be added in-future.
+ */
+ if ('a' <= isa[i] && isa[i] < 'x')
+ this_isa |= (1UL << (isa[i] - 'a'));
+ }
/*
* All "okay" hart should have same isa. Set HWCAP based on
@@ -56,6 +118,11 @@ void riscv_fill_hwcap(void)
elf_hwcap &= this_hwcap;
else
elf_hwcap = this_hwcap;
+
+ if (riscv_isa[0])
+ riscv_isa[0] &= this_isa;
+ else
+ riscv_isa[0] = this_isa;
}
/* We don't support systems with F but without D, so mask those out
@@ -65,7 +132,17 @@ void riscv_fill_hwcap(void)
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
}
- pr_info("elf_hwcap is 0x%lx\n", elf_hwcap);
+ memset(print_str, 0, sizeof(print_str));
+ for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ if (riscv_isa[0] & BIT_MASK(i))
+ print_str[j++] = (char)('a' + i);
+ pr_info("riscv: ISA extensions %s\n", print_str);
+
+ memset(print_str, 0, sizeof(print_str));
+ for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ if (elf_hwcap & BIT_MASK(i))
+ print_str[j++] = (char)('a' + i);
+ pr_info("riscv: ELF capabilities %s\n", print_str);
#ifdef CONFIG_FPU
if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index e0a6293093f1..a65a8fa0c22d 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -10,6 +10,7 @@
#include <linux/cpu.h>
#include <linux/interrupt.h>
+#include <linux/module.h>
#include <linux/profile.h>
#include <linux/smp.h>
#include <linux/sched.h>
@@ -63,6 +64,7 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
for_each_cpu(cpu, in)
cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
}
+EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index a4ee3a0e7d20..4c8b2a4a6a70 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -12,7 +12,7 @@ vdso-syms += getcpu
vdso-syms += flush_icache
# Files to link into the vdso
-obj-vdso = $(patsubst %, %.o, $(vdso-syms))
+obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
diff --git a/arch/riscv/kernel/vdso/note.S b/arch/riscv/kernel/vdso/note.S
new file mode 100644
index 000000000000..2a956c942211
--- /dev/null
+++ b/arch/riscv/kernel/vdso/note.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/elfnote.h>
+#include <linux/version.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b55be44ff9bd..27a334106708 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -150,7 +150,8 @@ void __init setup_bootmem(void)
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
set_max_mapnr(PFN_DOWN(mem_size));
- max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ max_low_pfn = max_pfn;
#ifdef CONFIG_BLK_DEV_INITRD
setup_initrd();
@@ -501,22 +502,6 @@ static inline void setup_vm_final(void)
#endif /* CONFIG_MMU */
#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_rw(void)
-{
- unsigned long text_start = (unsigned long)_text;
- unsigned long text_end = (unsigned long)_etext;
-
- set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
-void set_kernel_text_ro(void)
-{
- unsigned long text_start = (unsigned long)_text;
- unsigned long text_end = (unsigned long)_etext;
-
- set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
void mark_rodata_ro(void)
{
unsigned long text_start = (unsigned long)_text;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 0789e13ece90..1c7f13bb6728 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
- /*
- * Push registers and sanitize registers of values that a
- * speculation attack might otherwise want to exploit. The
- * lower registers are likely clobbered well before they
- * could be put to use in a speculative execution gadget.
- * Interleave XOR with PUSH for better uop scheduling:
- */
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
- xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
- xorl %ecx, %ecx /* nospec cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
- xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
- xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
+
.if \save_ret
pushq %rsi /* return address on top of stack */
.endif
+
+ /*
+ * Sanitize registers of values that a speculation attack might
+ * otherwise want to exploit. The lower registers are likely clobbered
+ * well before they could be put to use in a speculative execution
+ * gadget.
+ */
+ xorl %edx, %edx /* nospec dx */
+ xorl %ecx, %ecx /* nospec cx */
+ xorl %r8d, %r8d /* nospec r8 */
+ xorl %r9d, %r9d /* nospec r9 */
+ xorl %r10d, %r10d /* nospec r10 */
+ xorl %r11d, %r11d /* nospec r11 */
+ xorl %ebx, %ebx /* nospec rbx */
+ xorl %ebp, %ebp /* nospec rbp */
+ xorl %r12d, %r12d /* nospec r12 */
+ xorl %r13d, %r13d /* nospec r13 */
+ xorl %r14d, %r14d /* nospec r14 */
+ xorl %r15d, %r15d /* nospec r15 */
+
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..3063aa9090f9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
- UNWIND_HINT_EMPTY
POP_REGS pop_rdi=0 skip_r11rcx=1
/*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
* %rdi: prev task
* %rsi: next task
*/
-SYM_CODE_START(__switch_to_asm)
- UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
popq %rbp
jmp __switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
/*
* A newly forked process directly context switches into this address.
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
* +----------------------------------------------------+
*/
SYM_CODE_START(interrupt_entry)
- UNWIND_HINT_FUNC
+ UNWIND_HINT_IRET_REGS offset=16
ASM_CLAC
cld
@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
+ UNWIND_HINT_IRET_REGS
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
movq (%rdi), %rdi
jmp 2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+ UNWIND_HINT_EMPTY
/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
- UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+ UNWIND_HINT_REGS
call do_exit
SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 85be2f506272..70b96cae5b42 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -61,11 +61,12 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
{
/*
* Compare the symbol name with the system call name. Skip the
- * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+ * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
*/
return !strcmp(sym + 3, name + 3) ||
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
- (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+ (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+ (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}
#ifndef COMPILE_OFFSETS
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 499578f7e6d7..70fc159ebe69 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
- struct pt_regs *regs;
+ struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 81b9c63dae1b..e53dda210cd7 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* According to Intel, MFENCE can do the serialization here.
*/
asm volatile("mfence" : : : "memory");
-
- printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}
@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-static u32 hsx_deadline_rev(void)
+static __init u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
return ~0U;
}
-static u32 bdx_deadline_rev(void)
+static __init u32 bdx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
-static u32 skx_deadline_rev(void)
+static __init u32 skx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
return ~0U;
}
-static const struct x86_cpu_id deadline_match[] = {
+static const struct x86_cpu_id deadline_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
{},
};
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
{
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
- boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return;
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
m = x86_match_cpu(deadline_match);
if (!m)
- return;
+ return true;
/*
* Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
rev = (u32)m->driver_data;
if (boot_cpu_data.microcode >= rev)
- return;
+ return true;
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
+ return false;
}
/*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;
- apic_check_deadline_errata();
+ if (apic_validate_deadline_timer())
+ pr_debug("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a881..460ae7f66818 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -183,7 +183,8 @@ recursion_check:
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
- printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+ if (task == current)
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a224b5ab103f..54226110bc7f 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -344,6 +344,9 @@ bad_address:
if (IS_ENABLED(CONFIG_X86_32))
goto the_end;
+ if (state->task != current)
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..5b0bd8581fe6 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -8,19 +8,21 @@
#include <asm/orc_lookup.h>
#define orc_warn(fmt, ...) \
- printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...) \
+({ \
+ if (state->task == current) \
+ orc_warn(args); \
+})
extern int __start_orc_unwind_ip[];
extern int __stop_orc_unwind_ip[];
extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
static inline unsigned long orc_ip(const int *ip)
{
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
- if (!orc_init)
- return NULL;
-
if (ip == 0)
return &null_orc_entry;
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
#ifdef CONFIG_MODULES
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
static void orc_sort_swap(void *_a, void *_b, int size)
{
struct orc_entry *orc_a, *orc_b;
@@ -381,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
return true;
}
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs. This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+ unsigned long *val)
+{
+ unsigned int reg = reg_off/8;
+
+ if (!state->regs)
+ return false;
+
+ if (state->full_regs) {
+ *val = ((unsigned long *)state->regs)[reg];
+ return true;
+ }
+
+ if (state->prev_regs) {
+ *val = ((unsigned long *)state->prev_regs)[reg];
+ return true;
+ }
+
+ return false;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+ unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@@ -445,43 +477,39 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_REG_R10:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+ orc_warn_current("missing R10 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r10;
break;
case ORC_REG_R13:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+ orc_warn_current("missing R13 value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->r13;
break;
case ORC_REG_DI:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+ orc_warn_current("missing RDI value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->di;
break;
case ORC_REG_DX:
- if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %pB\n",
- (void *)state->ip);
+ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+ orc_warn_current("missing DX value at %pB\n",
+ (void *)state->ip);
goto err;
}
- sp = state->regs->dx;
break;
default:
- orc_warn("unknown SP base reg %d for ip %pB\n",
+ orc_warn("unknown SP base reg %d at %pB\n",
orc->sp_reg, (void *)state->ip);
goto err;
}
@@ -504,44 +532,48 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp = sp;
state->regs = NULL;
+ state->prev_regs = NULL;
state->signal = false;
break;
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
state->regs = (struct pt_regs *)sp;
+ state->prev_regs = NULL;
state->full_regs = true;
state->signal = true;
break;
case ORC_TYPE_REGS_IRET:
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
- orc_warn("can't dereference iret registers at %p for ip %pB\n",
- (void *)sp, (void *)orig_ip);
+ orc_warn_current("can't access iret registers at %pB\n",
+ (void *)orig_ip);
goto err;
}
+ if (state->full_regs)
+ state->prev_regs = state->regs;
state->regs = (void *)sp - IRET_FRAME_OFFSET;
state->full_regs = false;
state->signal = true;
break;
default:
- orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc_warn("unknown .orc_unwind entry type %d at %pB\n",
orc->type, (void *)orig_ip);
- break;
+ goto err;
}
/* Find BP: */
switch (orc->bp_reg) {
case ORC_REG_UNDEFINED:
- if (state->regs && state->full_regs)
- state->bp = state->regs->bp;
+ if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+ state->bp = tmp;
break;
case ORC_REG_PREV_SP:
@@ -564,8 +596,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%pB\n",
- (void *)orig_ip);
+ orc_warn_current("stack going in the wrong direction? at %pB\n",
+ (void *)orig_ip);
goto err;
}
@@ -585,6 +617,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
+ if (!orc_init)
+ goto done;
+
memset(state, 0, sizeof(*state));
state->task = task;
@@ -651,7 +686,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Otherwise, skip ahead to the user-specified starting frame: */
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->sp <= (unsigned long)first_frame))
+ state->sp < (unsigned long)first_frame))
unwind_next_frame(state);
return;
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 59eca6a94ce7..b8c55a2e402d 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -43,7 +43,8 @@ struct cpa_data {
unsigned long pfn;
unsigned int flags;
unsigned int force_split : 1,
- force_static_prot : 1;
+ force_static_prot : 1,
+ force_flush_all : 1;
struct page **pages;
};
@@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
return;
}
- if (cpa->numpages <= tlb_single_page_flush_ceiling)
- on_each_cpu(__cpa_flush_tlb, cpa, 1);
- else
+ if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
flush_tlb_all();
+ else
+ on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
return;
@@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
+
ret = __change_page_attr_set_clr(&alias_cpa, 0);
if (ret)
return ret;
@@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
alias_cpa.curpage = 0;
+ cpa->force_flush_all = 1;
/*
* The high mapping range is imprecise, so ignore the
* return value.
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 78ba57efd16b..3d411716d7ee 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -123,6 +123,7 @@
#include <linux/ioprio.h>
#include <linux/sbitmap.h>
#include <linux/delay.h>
+#include <linux/backing-dev.h>
#include "blk.h"
#include "blk-mq.h"
@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
switch (ioprio_class) {
default:
- dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
- "bfq: bad prio class %d\n", ioprio_class);
+ pr_err("bdi %s: bfq: bad prio class %d\n",
+ bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
+ ioprio_class);
/* fall through */
case IOPRIO_CLASS_NONE:
/*
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c5dc833212e1..930212c1a512 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
{
/* some drivers (floppy) instantiate a queue w/o disk registered */
if (blkg->q->backing_dev_info->dev)
- return dev_name(blkg->q->backing_dev_info->dev);
+ return bdi_dev_name(blkg->q->backing_dev_info);
return NULL;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 3ab0c1c704b6..7c1fe605d0d6 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -466,7 +466,7 @@ struct ioc_gq {
*/
atomic64_t vtime;
atomic64_t done_vtime;
- atomic64_t abs_vdebt;
+ u64 abs_vdebt;
u64 last_vtime;
/*
@@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
struct iocg_wake_ctx ctx = { .iocg = iocg };
u64 margin_ns = (u64)(ioc->period_us *
WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
- u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
+ u64 vdebt, vshortage, expires, oexpires;
s64 vbudget;
u32 hw_inuse;
@@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
vbudget = now->vnow - atomic64_read(&iocg->vtime);
/* pay off debt */
- abs_vdebt = atomic64_read(&iocg->abs_vdebt);
- vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
+ vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
if (vdebt && vbudget > 0) {
u64 delta = min_t(u64, vbudget, vdebt);
u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
- abs_vdebt);
+ iocg->abs_vdebt);
atomic64_add(delta, &iocg->vtime);
atomic64_add(delta, &iocg->done_vtime);
- atomic64_sub(abs_delta, &iocg->abs_vdebt);
- if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
- atomic64_set(&iocg->abs_vdebt, 0);
+ iocg->abs_vdebt -= abs_delta;
}
/*
@@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
u64 expires, oexpires;
u32 hw_inuse;
+ lockdep_assert_held(&iocg->waitq.lock);
+
/* debt-adjust vtime */
current_hweight(iocg, NULL, &hw_inuse);
- vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
+ vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
- /* clear or maintain depending on the overage */
- if (time_before_eq64(vtime, now->vnow)) {
+ /*
+ * Clear or maintain depending on the overage. Non-zero vdebt is what
+ * guarantees that @iocg is online and future iocg_kick_delay() will
+ * clear use_delay. Don't leave it on when there's no vdebt.
+ */
+ if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
blkcg_clear_delay(blkg);
return false;
}
@@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
{
struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
struct ioc_now now;
+ unsigned long flags;
+ spin_lock_irqsave(&iocg->waitq.lock, flags);
ioc_now(iocg->ioc, &now);
iocg_kick_delay(iocg, &now, 0);
+ spin_unlock_irqrestore(&iocg->waitq.lock, flags);
return HRTIMER_NORESTART;
}
@@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
* should have woken up in the last period and expire idle iocgs.
*/
list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
- if (!waitqueue_active(&iocg->waitq) &&
- !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
+ if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
+ !iocg_is_idle(iocg))
continue;
spin_lock(&iocg->waitq.lock);
- if (waitqueue_active(&iocg->waitq) ||
- atomic64_read(&iocg->abs_vdebt)) {
+ if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
/* might be oversleeping vtime / hweight changes, kick */
iocg_kick_waitq(iocg, &now);
iocg_kick_delay(iocg, &now, 0);
@@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
* tests are racy but the races aren't systemic - we only miss once
* in a while which is fine.
*/
- if (!waitqueue_active(&iocg->waitq) &&
- !atomic64_read(&iocg->abs_vdebt) &&
+ if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
time_before_eq64(vtime + cost, now.vnow)) {
iocg_commit_bio(iocg, bio, cost);
return;
}
/*
- * We're over budget. If @bio has to be issued regardless,
- * remember the abs_cost instead of advancing vtime.
- * iocg_kick_waitq() will pay off the debt before waking more IOs.
+ * We activated above but w/o any synchronization. Deactivation is
+ * synchronized with waitq.lock and we won't get deactivated as long
+ * as we're waiting or has debt, so we're good if we're activated
+ * here. In the unlikely case that we aren't, just issue the IO.
+ */
+ spin_lock_irq(&iocg->waitq.lock);
+
+ if (unlikely(list_empty(&iocg->active_list))) {
+ spin_unlock_irq(&iocg->waitq.lock);
+ iocg_commit_bio(iocg, bio, cost);
+ return;
+ }
+
+ /*
+ * We're over budget. If @bio has to be issued regardless, remember
+ * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+ * off the debt before waking more IOs.
+ *
* This way, the debt is continuously paid off each period with the
- * actual budget available to the cgroup. If we just wound vtime,
- * we would incorrectly use the current hw_inuse for the entire
- * amount which, for example, can lead to the cgroup staying
- * blocked for a long time even with substantially raised hw_inuse.
+ * actual budget available to the cgroup. If we just wound vtime, we
+ * would incorrectly use the current hw_inuse for the entire amount
+ * which, for example, can lead to the cgroup staying blocked for a
+ * long time even with substantially raised hw_inuse.
+ *
+ * An iocg with vdebt should stay online so that the timer can keep
+ * deducting its vdebt and [de]activate use_delay mechanism
+ * accordingly. We don't want to race against the timer trying to
+ * clear them and leave @iocg inactive w/ dangling use_delay heavily
+ * penalizing the cgroup and its descendants.
*/
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
- atomic64_add(abs_cost, &iocg->abs_vdebt);
+ iocg->abs_vdebt += abs_cost;
if (iocg_kick_delay(iocg, &now, cost))
blkcg_schedule_throttle(rqos->q,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+ spin_unlock_irq(&iocg->waitq.lock);
return;
}
@@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
* All waiters are on iocg->waitq and the wait states are
* synchronized using waitq.lock.
*/
- spin_lock_irq(&iocg->waitq.lock);
-
- /*
- * We activated above but w/o any synchronization. Deactivation is
- * synchronized with waitq.lock and we won't get deactivated as
- * long as we're waiting, so we're good if we're activated here.
- * In the unlikely case that we are deactivated, just issue the IO.
- */
- if (unlikely(list_empty(&iocg->active_list))) {
- spin_unlock_irq(&iocg->waitq.lock);
- iocg_commit_bio(iocg, bio, cost);
- return;
- }
-
init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
wait.wait.private = current;
wait.bio = bio;
@@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
struct ioc_now now;
u32 hw_inuse;
u64 abs_cost, cost;
+ unsigned long flags;
/* bypass if disabled or for root cgroup */
if (!ioc->enabled || !iocg->level)
@@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
iocg->cursor = bio_end;
/*
- * Charge if there's enough vtime budget and the existing request
- * has cost assigned. Otherwise, account it as debt. See debt
- * handling in ioc_rqos_throttle() for details.
+ * Charge if there's enough vtime budget and the existing request has
+ * cost assigned.
*/
if (rq->bio && rq->bio->bi_iocost_cost &&
- time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
+ time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
iocg_commit_bio(iocg, bio, cost);
- else
- atomic64_add(abs_cost, &iocg->abs_vdebt);
+ return;
+ }
+
+ /*
+ * Otherwise, account it as debt if @iocg is online, which it should
+ * be for the vast majority of cases. See debt handling in
+ * ioc_rqos_throttle() for details.
+ */
+ spin_lock_irqsave(&iocg->waitq.lock, flags);
+ if (likely(!list_empty(&iocg->active_list))) {
+ iocg->abs_vdebt += abs_cost;
+ iocg_kick_delay(iocg, &now, cost);
+ } else {
+ iocg_commit_bio(iocg, bio, cost);
+ }
+ spin_unlock_irqrestore(&iocg->waitq.lock, flags);
}
static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
@@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
iocg->ioc = ioc;
atomic64_set(&iocg->vtime, now.vnow);
atomic64_set(&iocg->done_vtime, now.vnow);
- atomic64_set(&iocg->abs_vdebt, 0);
atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
INIT_LIST_HEAD(&iocg->active_list);
iocg->hweight_active = HWEIGHT_WHOLE;
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 376d7ed3f1f8..3c734b81b3a2 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -287,7 +287,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
crypto_free_skcipher(ctx->child);
}
-static void free(struct skcipher_instance *inst)
+static void free_inst(struct skcipher_instance *inst)
{
crypto_drop_skcipher(skcipher_instance_ctx(inst));
kfree(inst);
@@ -400,12 +400,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.encrypt = encrypt;
inst->alg.decrypt = decrypt;
- inst->free = free;
+ inst->free = free_inst;
err = skcipher_register_instance(tmpl, inst);
if (err) {
err_free_inst:
- free(inst);
+ free_inst(inst);
}
return err;
}
diff --git a/crypto/xts.c b/crypto/xts.c
index dbdd8af629e6..6d8cea94b3cf 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -322,7 +322,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
crypto_free_cipher(ctx->tweak);
}
-static void free(struct skcipher_instance *inst)
+static void free_inst(struct skcipher_instance *inst)
{
crypto_drop_skcipher(skcipher_instance_ctx(inst));
kfree(inst);
@@ -434,12 +434,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.encrypt = encrypt;
inst->alg.decrypt = decrypt;
- inst->free = free;
+ inst->free = free_inst;
err = skcipher_register_instance(tmpl, inst);
if (err) {
err_free_inst:
- free(inst);
+ free_inst(inst);
}
return err;
}
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
index 31f9f0e369b9..55b031d2c989 100644
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -16,7 +16,7 @@
int efi_tpm_final_log_size;
EXPORT_SYMBOL(efi_tpm_final_log_size);
-static int tpm2_calc_event_log_size(void *data, int count, void *size_info)
+static int __init tpm2_calc_event_log_size(void *data, int count, void *size_info)
{
struct tcg_pcr_event2_head *header;
int event_size, size = 0;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 20cce366e951..1dc3718560d0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -101,6 +101,8 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
+static void update_and_flush_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable);
/****************************************************************************
*
@@ -151,6 +153,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
+static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
+{
+ u64 pt_root = atomic64_read(&domain->pt_root);
+
+ pgtable->root = (u64 *)(pt_root & PAGE_MASK);
+ pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ return pt_root;
+}
+
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -1397,13 +1419,18 @@ static struct page *free_sub_pt(unsigned long root, int mode,
static void free_pagetable(struct protection_domain *domain)
{
- unsigned long root = (unsigned long)domain->pt_root;
+ struct domain_pgtable pgtable;
struct page *freelist = NULL;
+ unsigned long root;
- BUG_ON(domain->mode < PAGE_MODE_NONE ||
- domain->mode > PAGE_MODE_6_LEVEL);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ atomic64_set(&domain->pt_root, 0);
- freelist = free_sub_pt(root, domain->mode, freelist);
+ BUG_ON(pgtable.mode < PAGE_MODE_NONE ||
+ pgtable.mode > PAGE_MODE_6_LEVEL);
+
+ root = (unsigned long)pgtable.root;
+ freelist = free_sub_pt(root, pgtable.mode, freelist);
free_page_list(freelist);
}
@@ -1417,24 +1444,39 @@ static bool increase_address_space(struct protection_domain *domain,
unsigned long address,
gfp_t gfp)
{
+ struct domain_pgtable pgtable;
unsigned long flags;
- bool ret = false;
- u64 *pte;
+ bool ret = true;
+ u64 *pte, root;
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(domain->mode) ||
- WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address <= PM_LEVEL_SIZE(pgtable.mode))
+ goto out;
+
+ ret = false;
+ if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
goto out;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
- *pte = PM_LEVEL_PDE(domain->mode,
- iommu_virt_to_phys(domain->pt_root));
- domain->pt_root = pte;
- domain->mode += 1;
+ *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+
+ pgtable.root = pte;
+ pgtable.mode += 1;
+ update_and_flush_device_table(domain, &pgtable);
+ domain_flush_complete(domain);
+
+ /*
+ * Device Table needs to be updated and flushed before the new root can
+ * be published.
+ */
+ root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode);
+ atomic64_set(&domain->pt_root, root);
ret = true;
@@ -1451,16 +1493,29 @@ static u64 *alloc_pte(struct protection_domain *domain,
gfp_t gfp,
bool *updated)
{
+ struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(domain->mode))
- *updated = increase_address_space(domain, address, gfp) || *updated;
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+ /*
+ * Return an error if there is no memory to update the
+ * page-table.
+ */
+ if (!increase_address_space(domain, address, gfp))
+ return NULL;
+
+ /* Read new values to check if update was successful */
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ }
+
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -1536,16 +1591,19 @@ static u64 *fetch_pte(struct protection_domain *domain,
unsigned long address,
unsigned long *page_size)
{
+ struct domain_pgtable pgtable;
int level;
u64 *pte;
*page_size = 0;
- if (address > PM_LEVEL_SIZE(domain->mode))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address > PM_LEVEL_SIZE(pgtable.mode))
return NULL;
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
@@ -1660,7 +1718,13 @@ out:
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- update_domain(dom);
+ /*
+ * Flush domain TLB(s) and wait for completion. Any Device-Table
+ * Updates and flushing already happened in
+ * increase_address_space().
+ */
+ domain_flush_tlb_pde(dom);
+ domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -1806,6 +1870,7 @@ static void dma_ops_domain_free(struct protection_domain *domain)
static struct protection_domain *dma_ops_domain_alloc(void)
{
struct protection_domain *domain;
+ u64 *pt_root, root;
domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL);
if (!domain)
@@ -1814,12 +1879,14 @@ static struct protection_domain *dma_ops_domain_alloc(void)
if (protection_domain_init(domain))
goto free_domain;
- domain->mode = PAGE_MODE_3_LEVEL;
- domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- domain->flags = PD_DMA_OPS_MASK;
- if (!domain->pt_root)
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root)
goto free_domain;
+ root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+ atomic64_set(&domain->pt_root, root);
+ domain->flags = PD_DMA_OPS_MASK;
+
if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
goto free_domain;
@@ -1841,16 +1908,17 @@ static bool dma_ops_domain(struct protection_domain *domain)
}
static void set_dte_entry(u16 devid, struct protection_domain *domain,
+ struct domain_pgtable *pgtable,
bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (domain->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(domain->pt_root);
+ if (pgtable->mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(pgtable->root);
- pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -1923,6 +1991,7 @@ static void clear_dte_entry(u16 devid)
static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
+ struct domain_pgtable pgtable;
struct amd_iommu *iommu;
bool ats;
@@ -1938,7 +2007,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_cnt += 1;
/* Update device table */
- set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ set_dte_entry(dev_data->devid, domain, &pgtable,
+ ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
device_flush_dte(dev_data);
@@ -2249,23 +2320,36 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
*
*****************************************************************************/
-static void update_device_table(struct protection_domain *domain)
+static void update_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
{
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
- dev_data->iommu_v2);
+ set_dte_entry(dev_data->devid, domain, pgtable,
+ dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
}
+static void update_and_flush_device_table(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
+{
+ update_device_table(domain, pgtable);
+ domain_flush_devices(domain);
+}
+
static void update_domain(struct protection_domain *domain)
{
- update_device_table(domain);
+ struct domain_pgtable pgtable;
- domain_flush_devices(domain);
+ /* Update device table */
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ update_and_flush_device_table(domain, &pgtable);
+
+ /* Flush domain TLB(s) and wait for completion */
domain_flush_tlb_pde(domain);
+ domain_flush_complete(domain);
}
int __init amd_iommu_init_api(void)
@@ -2375,6 +2459,7 @@ out_err:
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *pdomain;
+ u64 *pt_root, root;
switch (type) {
case IOMMU_DOMAIN_UNMANAGED:
@@ -2382,13 +2467,15 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
if (!pdomain)
return NULL;
- pdomain->mode = PAGE_MODE_3_LEVEL;
- pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pdomain->pt_root) {
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root) {
protection_domain_free(pdomain);
return NULL;
}
+ root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+ atomic64_set(&pdomain->pt_root, root);
+
pdomain->domain.geometry.aperture_start = 0;
pdomain->domain.geometry.aperture_end = ~0ULL;
pdomain->domain.geometry.force_aperture = true;
@@ -2406,7 +2493,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
if (!pdomain)
return NULL;
- pdomain->mode = PAGE_MODE_NONE;
+ atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE);
break;
default:
return NULL;
@@ -2418,6 +2505,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
+ struct domain_pgtable pgtable;
domain = to_pdomain(dom);
@@ -2435,7 +2523,9 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
dma_ops_domain_free(domain);
break;
default:
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (pgtable.mode != PAGE_MODE_NONE)
free_pagetable(domain);
if (domain->flags & PD_IOMMUV2_MASK)
@@ -2518,10 +2608,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
int prot = 0;
int ret;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -2541,8 +2633,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return 0;
return iommu_unmap_page(domain, iova, page_size);
@@ -2553,9 +2647,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long offset_mask, pte_pgsize;
+ struct domain_pgtable pgtable;
u64 *pte, __pte;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return iova;
pte = fetch_pte(domain, iova, &pte_pgsize);
@@ -2708,16 +2804,26 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
unsigned long flags;
+ u64 pt_root;
spin_lock_irqsave(&domain->lock, flags);
+ /* First save pgtable configuration*/
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
/* Update data structure */
- domain->mode = PAGE_MODE_NONE;
+ pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE);
+ atomic64_set(&domain->pt_root, pt_root);
/* Make changes visible to IOMMUs */
update_domain(domain);
+ /* Restore old pgtable in domain->ptroot to free page-table */
+ pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode);
+ atomic64_set(&domain->pt_root, pt_root);
+
/* Page-table is not visible to IOMMU anymore, so free it */
free_pagetable(domain);
@@ -2908,9 +3014,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
static int __set_gcr3(struct protection_domain *domain, int pasid,
unsigned long cr3)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -2924,9 +3032,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid,
static int __clear_gcr3(struct protection_domain *domain, int pasid)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index ca8c4522045b..7a8fdec138bd 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -468,8 +468,7 @@ struct protection_domain {
iommu core code */
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- int mode; /* paging mode (0-6 levels) */
- u64 *pt_root; /* page table root pointer */
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -477,6 +476,12 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
+/* For decocded pt_root */
+struct domain_pgtable {
+ int mode;
+ u64 *root;
+};
+
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index d5cac4f46ca5..4e1d11af23c8 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
if (!region)
return -ENOMEM;
- list_add(&vdev->resv_regions, &region->list);
+ list_add(&region->list, &vdev->resv_regions);
return 0;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f2adea96b04c..f3c037f5a9ba 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
* Don't treat an error as fatal, as we potentially already
* have a NGUID or EUI-64.
*/
- if (status > 0)
+ if (status > 0 && !(status & NVME_SC_DNR))
status = 0;
goto free_data;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4e79e412b276..e13c370de830 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
- if (++nvmeq->cq_head == nvmeq->q_depth) {
+ u16 tmp = nvmeq->cq_head + 1;
+
+ if (tmp == nvmeq->q_depth) {
nvmeq->cq_head = 0;
nvmeq->cq_phase ^= 1;
+ } else {
+ nvmeq->cq_head = tmp;
}
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 481ac97b4d25..dcaed75de9e6 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
&congestion_kb_fops);
snprintf(name, sizeof(name), "../../bdi/%s",
- dev_name(fsc->sb->s_bdi->dev));
+ bdi_dev_name(fsc->sb->s_bdi));
fsc->debugfs_bdi =
debugfs_create_symlink("bdi",
fsc->client->debugfs_dir,
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0b91b0631173..979d9f977409 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -680,8 +680,6 @@ struct io_op_def {
unsigned needs_mm : 1;
/* needs req->file assigned */
unsigned needs_file : 1;
- /* needs req->file assigned IFF fd is >= 0 */
- unsigned fd_non_neg : 1;
/* hash wq insertion if file is a regular file */
unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */
@@ -784,8 +782,6 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1,
},
[IORING_OP_OPENAT] = {
- .needs_file = 1,
- .fd_non_neg = 1,
.file_table = 1,
.needs_fs = 1,
},
@@ -799,8 +795,6 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_STATX] = {
.needs_mm = 1,
- .needs_file = 1,
- .fd_non_neg = 1,
.needs_fs = 1,
.file_table = 1,
},
@@ -837,8 +831,6 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
},
[IORING_OP_OPENAT2] = {
- .needs_file = 1,
- .fd_non_neg = 1,
.file_table = 1,
.needs_fs = 1,
},
@@ -5368,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_steal_work(req, workptr);
}
-static int io_req_needs_file(struct io_kiocb *req, int fd)
-{
- if (!io_op_defs[req->opcode].needs_file)
- return 0;
- if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
- return 0;
- return 1;
-}
-
static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
int index)
{
@@ -5414,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
}
static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
- int fd, unsigned int flags)
+ int fd)
{
bool fixed;
- if (!io_req_needs_file(req, fd))
- return 0;
-
- fixed = (flags & IOSQE_FIXED_FILE);
+ fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
if (unlikely(!fixed && req->needs_fixed_file))
return -EBADF;
@@ -5798,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
struct io_submit_state *state, bool async)
{
unsigned int sqe_flags;
- int id, fd;
+ int id;
/*
* All io need record the previous position, if LINK vs DARIN,
@@ -5850,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
IOSQE_ASYNC | IOSQE_FIXED_FILE |
IOSQE_BUFFER_SELECT | IOSQE_IO_LINK);
- fd = READ_ONCE(sqe->fd);
- return io_req_set_file(state, req, fd, sqe_flags);
+ if (!io_op_defs[req->opcode].needs_file)
+ return 0;
+
+ return io_req_set_file(state, req, READ_ONCE(sqe->fd));
}
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
@@ -7360,11 +7342,9 @@ static int io_uring_release(struct inode *inode, struct file *file)
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct files_struct *files)
{
- struct io_kiocb *req;
- DEFINE_WAIT(wait);
-
while (!list_empty_careful(&ctx->inflight_list)) {
- struct io_kiocb *cancel_req = NULL;
+ struct io_kiocb *cancel_req = NULL, *req;
+ DEFINE_WAIT(wait);
spin_lock_irq(&ctx->inflight_lock);
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
@@ -7404,6 +7384,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
*/
if (refcount_sub_and_test(2, &cancel_req->refs)) {
io_put_req(cancel_req);
+ finish_wait(&ctx->inflight_wait, &wait);
continue;
}
}
@@ -7411,8 +7392,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
io_put_req(cancel_req);
schedule();
+ finish_wait(&ctx->inflight_wait, &wait);
}
- finish_wait(&ctx->inflight_wait, &wait);
}
static int io_uring_flush(struct file *file, void *data)
@@ -7761,7 +7742,8 @@ err:
return ret;
}
-static int io_uring_create(unsigned entries, struct io_uring_params *p)
+static int io_uring_create(unsigned entries, struct io_uring_params *p,
+ struct io_uring_params __user *params)
{
struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
@@ -7853,6 +7835,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
p->cq_off.cqes = offsetof(struct io_rings, cqes);
+ p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
+ IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
+ IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+
+ if (copy_to_user(params, p, sizeof(*p))) {
+ ret = -EFAULT;
+ goto err;
+ }
/*
* Install ring fd as the very last thing, so we don't risk someone
* having closed it before we finish setup
@@ -7861,9 +7851,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
if (ret < 0)
goto err;
- p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
- IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
- IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
@@ -7879,7 +7866,6 @@ err:
static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
{
struct io_uring_params p;
- long ret;
int i;
if (copy_from_user(&p, params, sizeof(p)))
@@ -7894,14 +7880,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
return -EINVAL;
- ret = io_uring_create(entries, &p);
- if (ret < 0)
- return ret;
-
- if (copy_to_user(params, &p, sizeof(p)))
- return -EFAULT;
-
- return ret;
+ return io_uring_create(entries, &p, params);
}
SYSCALL_DEFINE2(io_uring_setup, u32, entries,
diff --git a/fs/splice.c b/fs/splice.c
index 4735defc46ee..fd0a1e7e5959 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in,
loff_t offset;
long ret;
+ if (unlikely(!(in->f_mode & FMODE_READ) ||
+ !(out->f_mode & FMODE_WRITE)))
+ return -EBADF;
+
ipipe = get_pipe_info(in);
opipe = get_pipe_info(out);
@@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (off_in || off_out)
return -ESPIPE;
- if (!(in->f_mode & FMODE_READ))
- return -EBADF;
-
- if (!(out->f_mode & FMODE_WRITE))
- return -EBADF;
-
/* Splicing to self would be fun, but... */
if (ipipe == opipe)
return -EINVAL;
@@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in,
offset = out->f_pos;
}
- if (unlikely(!(out->f_mode & FMODE_WRITE)))
- return -EBADF;
-
if (unlikely(out->f_flags & O_APPEND))
return -EINVAL;
@@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
error = -EBADF;
in = fdget(fd_in);
if (in.file) {
- if (in.file->f_mode & FMODE_READ) {
- out = fdget(fd_out);
- if (out.file) {
- if (out.file->f_mode & FMODE_WRITE)
- error = do_splice(in.file, off_in,
- out.file, off_out,
- len, flags);
- fdput(out);
- }
+ out = fdget(fd_out);
+ if (out.file) {
+ error = do_splice(in.file, off_in, out.file, off_out,
+ len, flags);
+ fdput(out);
}
fdput(in);
}
@@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len,
struct pipe_inode_info *opipe = get_pipe_info(out);
int ret = -EINVAL;
+ if (unlikely(!(in->f_mode & FMODE_READ) ||
+ !(out->f_mode & FMODE_WRITE)))
+ return -EBADF;
+
/*
* Duplicate the contents of ipipe to opipe without actually
* copying the data.
@@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len,
SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
{
- struct fd in;
+ struct fd in, out;
int error;
if (unlikely(flags & ~SPLICE_F_ALL))
@@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
error = -EBADF;
in = fdget(fdin);
if (in.file) {
- if (in.file->f_mode & FMODE_READ) {
- struct fd out = fdget(fdout);
- if (out.file) {
- if (out.file->f_mode & FMODE_WRITE)
- error = do_tee(in.file, out.file,
- len, flags);
- fdput(out);
- }
+ out = fdget(fdout);
+ if (out.file) {
+ error = do_tee(in.file, out.file, len, flags);
+ fdput(out);
}
fdput(in);
}
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index 675e26989376..8fe03b4a0d2b 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_free;
}
- err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
+ err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id);
if (err)
goto fail_free;
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index ee577a83cfe6..7367150f962a 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -219,6 +219,7 @@ struct backing_dev_info {
wait_queue_head_t wb_waitq;
struct device *dev;
+ char dev_name[64];
struct device *owner;
struct timer_list laptop_mode_wb_timer;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f88197c1ffc2..c9ad5c3b7b4b 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
(1 << WB_async_congested));
}
-extern const char *bdi_unknown_name;
-
-static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
-{
- if (!bdi || !bdi->dev)
- return bdi_unknown_name;
- return dev_name(bdi->dev);
-}
+const char *bdi_dev_name(struct backing_dev_info *bdi);
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h
index 784814160197..9c66e59d859c 100644
--- a/include/trace/events/wbt.h
+++ b/include/trace/events/wbt.h
@@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
),
TP_fast_assign(
- strlcpy(__entry->name, dev_name(bdi->dev),
+ strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name));
__entry->rmean = stat[0].mean;
__entry->rmin = stat[0].min;
@@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
),
TP_fast_assign(
- strlcpy(__entry->name, dev_name(bdi->dev),
+ strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name));
__entry->lat = div_u64(lat, 1000);
),
@@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
),
TP_fast_assign(
- strlcpy(__entry->name, dev_name(bdi->dev),
+ strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name));
__entry->msg = msg;
__entry->step = step;
@@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
),
TP_fast_assign(
- strlcpy(__entry->name, dev_name(bdi->dev),
+ strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name));
__entry->status = status;
__entry->step = step;
diff --git a/init/Kconfig b/init/Kconfig
index 9e22ee8fbd75..9278a603d399 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -39,22 +39,6 @@ config TOOLS_SUPPORT_RELR
config CC_HAS_ASM_INLINE
def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
-config CC_HAS_WARN_MAYBE_UNINITIALIZED
- def_bool $(cc-option,-Wmaybe-uninitialized)
- help
- GCC >= 4.7 supports this option.
-
-config CC_DISABLE_WARN_MAYBE_UNINITIALIZED
- bool
- depends on CC_HAS_WARN_MAYBE_UNINITIALIZED
- default CC_IS_GCC && GCC_VERSION < 40900 # unreliable for GCC < 4.9
- help
- GCC's -Wmaybe-uninitialized is not reliable by definition.
- Lots of false positive warnings are produced in some cases.
-
- If this option is enabled, -Wno-maybe-uninitialzed is passed
- to the compiler to suppress maybe-uninitialized warnings.
-
config CONSTRUCTORS
bool
depends on !UML
@@ -1257,14 +1241,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
config CC_OPTIMIZE_FOR_PERFORMANCE_O3
bool "Optimize more for performance (-O3)"
depends on ARC
- imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
help
Choosing this option will pass "-O3" to your compiler to optimize
the kernel yet more for performance.
config CC_OPTIMIZE_FOR_SIZE
bool "Optimize for size (-Os)"
- imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
help
Choosing this option will pass "-Os" to your compiler resulting
in a smaller kernel.
diff --git a/init/initramfs.c b/init/initramfs.c
index 8ec1be4d7d51..7a38012e1af7 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -542,7 +542,7 @@ void __weak free_initrd_mem(unsigned long start, unsigned long end)
}
#ifdef CONFIG_KEXEC_CORE
-static bool kexec_free_initrd(void)
+static bool __init kexec_free_initrd(void)
{
unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 402eef84c859..743647005f64 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES
config PROFILE_ALL_BRANCHES
bool "Profile all if conditionals" if !FORTIFY_SOURCE
select TRACE_BRANCH_PROFILING
- imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
help
This tracer profiles all branch conditions. Every if ()
taken in the kernel is recorded whether it hit or miss.
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c81b4f3a7268..efc5b83acd2d 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class;
-const char *bdi_unknown_name = "(unknown)";
+static const char *bdi_unknown_name = "(unknown)";
/*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
@@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
if (bdi->dev) /* The driver needs to use separate queues per device */
return 0;
- dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
+ vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
+ dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
if (IS_ERR(dev))
return PTR_ERR(dev);
@@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL(bdi_put);
+const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return bdi->dev_name;
+}
+EXPORT_SYMBOL_GPL(bdi_dev_name);
+
static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index 7427a5ee761b..9d8e9613008a 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -159,7 +159,12 @@ class IocgStat:
else:
self.inflight_pct = 0
- self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+ # vdebt used to be an atomic64_t and is now u64, support both
+ try:
+ self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+ except:
+ self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+
self.use_delay = blkg.use_delay.counter.value_()
self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4b170fd08a28..3c6da70e6084 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -72,6 +72,17 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
return find_insn(file, func->cfunc->sec, func->cfunc->offset);
}
+static struct instruction *prev_insn_same_sym(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *prev = list_prev_entry(insn, list);
+
+ if (&prev->list != &file->insn_list && prev->func == insn->func)
+ return prev;
+
+ return NULL;
+}
+
#define func_for_each_insn(file, func, insn) \
for (insn = find_insn(file, func->sec, func->offset); \
insn; \
@@ -1050,8 +1061,8 @@ static struct rela *find_jump_table(struct objtool_file *file,
* it.
*/
for (;
- &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func;
- insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+ insn && insn->func && insn->func->pfunc == func;
+ insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
break;
@@ -1449,7 +1460,7 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
struct cfi_reg *cfa = &state->cfa;
struct stack_op *op = &insn->stack_op;
- if (cfa->base != CFI_SP)
+ if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
return 0;
/* push */
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 0b79c2353a21..12e01ac190ec 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -87,9 +87,10 @@ struct elf {
#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS)
#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1))
-#define for_offset_range(_offset, _start, _end) \
- for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
- _offset <= ((_end) & OFFSET_STRIDE_MASK); \
+#define for_offset_range(_offset, _start, _end) \
+ for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
+ _offset >= ((_start) & OFFSET_STRIDE_MASK) && \
+ _offset <= ((_end) & OFFSET_STRIDE_MASK); \
_offset += OFFSET_STRIDE)
static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)