From 14d72af7ab00d9c2eb5b473b4fdf3b751b8cf8e2 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Fri, 16 Jul 2021 00:57:07 +0800 Subject: MIPS: Ingenic: Add system type for new Ingenic SoCs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add JZ4730, JZ4750, JZ4755, JZ4760, JZ4760B, X2000H, and X2100 system type for cat /proc/cpuinfo to give out JZ4730, JZ4750, JZ4755, JZ4760, JZ4760B, X2000H, and X2100. Signed-off-by: 周琰杰 (Zhou Yanjie) Reviewed-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- arch/mips/generic/board-ingenic.c | 21 +++++++++++++++++++++ arch/mips/include/asm/bootinfo.h | 3 +++ arch/mips/include/asm/cpu.h | 4 ++-- 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/generic/board-ingenic.c b/arch/mips/generic/board-ingenic.c index 0cec0bea13d6..e86e0016c548 100644 --- a/arch/mips/generic/board-ingenic.c +++ b/arch/mips/generic/board-ingenic.c @@ -21,6 +21,10 @@ static __init char *ingenic_get_system_type(unsigned long machtype) { switch (machtype) { + case MACH_INGENIC_X2100: + return "X2100"; + case MACH_INGENIC_X2000H: + return "X2000H"; case MACH_INGENIC_X2000E: return "X2000E"; case MACH_INGENIC_X2000: @@ -37,8 +41,18 @@ static __init char *ingenic_get_system_type(unsigned long machtype) return "JZ4775"; case MACH_INGENIC_JZ4770: return "JZ4770"; + case MACH_INGENIC_JZ4760B: + return "JZ4760B"; + case MACH_INGENIC_JZ4760: + return "JZ4760"; + case MACH_INGENIC_JZ4755: + return "JZ4755"; + case MACH_INGENIC_JZ4750: + return "JZ4750"; case MACH_INGENIC_JZ4725B: return "JZ4725B"; + case MACH_INGENIC_JZ4730: + return "JZ4730"; default: return "JZ4740"; } @@ -61,8 +75,13 @@ static __init const void *ingenic_fixup_fdt(const void *fdt, const void *match_d } static const struct of_device_id ingenic_of_match[] __initconst = { + { .compatible = "ingenic,jz4730", .data = (void *)MACH_INGENIC_JZ4730 }, { .compatible = "ingenic,jz4740", .data = (void *)MACH_INGENIC_JZ4740 }, { .compatible = "ingenic,jz4725b", .data = (void *)MACH_INGENIC_JZ4725B }, + { .compatible = "ingenic,jz4750", .data = (void *)MACH_INGENIC_JZ4750 }, + { .compatible = "ingenic,jz4755", .data = (void *)MACH_INGENIC_JZ4755 }, + { .compatible = "ingenic,jz4760", .data = (void *)MACH_INGENIC_JZ4760 }, + { .compatible = "ingenic,jz4760b", .data = (void *)MACH_INGENIC_JZ4760B }, { .compatible = "ingenic,jz4770", .data = (void *)MACH_INGENIC_JZ4770 }, { .compatible = "ingenic,jz4775", .data = (void *)MACH_INGENIC_JZ4775 }, { .compatible = "ingenic,jz4780", .data = (void *)MACH_INGENIC_JZ4780 }, @@ -71,6 +90,8 @@ static const struct of_device_id ingenic_of_match[] __initconst = { { .compatible = "ingenic,x1830", .data = (void *)MACH_INGENIC_X1830 }, { .compatible = "ingenic,x2000", .data = (void *)MACH_INGENIC_X2000 }, { .compatible = "ingenic,x2000e", .data = (void *)MACH_INGENIC_X2000E }, + { .compatible = "ingenic,x2000h", .data = (void *)MACH_INGENIC_X2000H }, + { .compatible = "ingenic,x2100", .data = (void *)MACH_INGENIC_X2100 }, {} }; diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index 4c2e8173e6ec..2128ba903391 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -75,6 +75,7 @@ enum ingenic_machine_type { MACH_INGENIC_JZ4750, MACH_INGENIC_JZ4755, MACH_INGENIC_JZ4760, + MACH_INGENIC_JZ4760B, MACH_INGENIC_JZ4770, MACH_INGENIC_JZ4775, MACH_INGENIC_JZ4780, @@ -83,6 +84,8 @@ enum ingenic_machine_type { MACH_INGENIC_X1830, MACH_INGENIC_X2000, MACH_INGENIC_X2000E, + MACH_INGENIC_X2000H, + MACH_INGENIC_X2100, }; extern char *system_type; diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 9e6211e6d76b..d45a52f65b7a 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -46,8 +46,8 @@ #define PRID_COMP_NETLOGIC 0x0c0000 #define PRID_COMP_CAVIUM 0x0d0000 #define PRID_COMP_LOONGSON 0x140000 -#define PRID_COMP_INGENIC_13 0x130000 /* X2000 */ -#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4740, JZ4750, X1830 */ +#define PRID_COMP_INGENIC_13 0x130000 /* X2000, X2100 */ +#define PRID_COMP_INGENIC_D0 0xd00000 /* JZ4730, JZ4740, JZ4750, JZ4755, JZ4760, X1830 */ #define PRID_COMP_INGENIC_D1 0xd10000 /* JZ4770, JZ4775, X1000 */ #define PRID_COMP_INGENIC_E1 0xe10000 /* JZ4780 */ -- cgit From e98b461bb057aaea6fa766260788c08825213837 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 13 Jul 2021 10:49:15 -0700 Subject: MIPS: octeon: Remove vestiges of CONFIG_CAVIUM_RESERVE32 The config option CAVIUM_RESERVE32 is not used. Remove the dead code controlled by it. Signed-off-by: Joe Perches Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c | 21 +++--------- arch/mips/cavium-octeon/setup.c | 38 +--------------------- 2 files changed, 5 insertions(+), 54 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c index 3839feba68f2..fb42e8e21ea0 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c +++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c @@ -57,27 +57,14 @@ EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr); static cvmx_cmd_queue_result_t __cvmx_cmd_queue_init_state_ptr(void) { char *alloc_name = "cvmx_cmd_queues"; -#if defined(CONFIG_CAVIUM_RESERVE32) && CONFIG_CAVIUM_RESERVE32 - extern uint64_t octeon_reserve32_memory; -#endif if (likely(__cvmx_cmd_queue_state_ptr)) return CVMX_CMD_QUEUE_SUCCESS; -#if defined(CONFIG_CAVIUM_RESERVE32) && CONFIG_CAVIUM_RESERVE32 - if (octeon_reserve32_memory) - __cvmx_cmd_queue_state_ptr = - cvmx_bootmem_alloc_named_range(sizeof(*__cvmx_cmd_queue_state_ptr), - octeon_reserve32_memory, - octeon_reserve32_memory + - (CONFIG_CAVIUM_RESERVE32 << - 20) - 1, 128, alloc_name); - else -#endif - __cvmx_cmd_queue_state_ptr = - cvmx_bootmem_alloc_named(sizeof(*__cvmx_cmd_queue_state_ptr), - 128, - alloc_name); + __cvmx_cmd_queue_state_ptr = + cvmx_bootmem_alloc_named(sizeof(*__cvmx_cmd_queue_state_ptr), + 128, + alloc_name); if (__cvmx_cmd_queue_state_ptr) memset(__cvmx_cmd_queue_state_ptr, 0, sizeof(*__cvmx_cmd_queue_state_ptr)); diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index ce4e2806159b..0ddd3cc16ee4 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -284,11 +284,6 @@ void octeon_crash_smp_send_stop(void) #endif /* CONFIG_KEXEC */ -#ifdef CONFIG_CAVIUM_RESERVE32 -uint64_t octeon_reserve32_memory; -EXPORT_SYMBOL(octeon_reserve32_memory); -#endif - #ifdef CONFIG_KEXEC /* crashkernel cmdline parameter is parsed _after_ memory setup * we also parse it here (workaround for EHB5200) */ @@ -665,9 +660,7 @@ void __init prom_init(void) int i; u64 t; int argc; -#ifdef CONFIG_CAVIUM_RESERVE32 - int64_t addr = -1; -#endif + /* * The bootloader passes a pointer to the boot descriptor in * $a3, this is available as fw_arg3. @@ -782,25 +775,6 @@ void __init prom_init(void) cvmx_write_csr(CVMX_LED_UDD_DATX(1), 0); cvmx_write_csr(CVMX_LED_EN, 1); } -#ifdef CONFIG_CAVIUM_RESERVE32 - /* - * We need to temporarily allocate all memory in the reserve32 - * region. This makes sure the kernel doesn't allocate this - * memory when it is getting memory from the - * bootloader. Later, after the memory allocations are - * complete, the reserve32 will be freed. - * - * Allocate memory for RESERVED32 aligned on 2MB boundary. This - * is in case we later use hugetlb entries with it. - */ - addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20, - 0, 0, 2 << 20, - "CAVIUM_RESERVE32", 0); - if (addr < 0) - pr_err("Failed to allocate CAVIUM_RESERVE32 memory area\n"); - else - octeon_reserve32_memory = addr; -#endif #ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2 if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) { @@ -1078,16 +1052,6 @@ void __init plat_mem_setup(void) cvmx_bootmem_unlock(); #endif /* CONFIG_CRASH_DUMP */ -#ifdef CONFIG_CAVIUM_RESERVE32 - /* - * Now that we've allocated the kernel memory it is safe to - * free the reserved region. We free it here so that builtin - * drivers can use the memory. - */ - if (octeon_reserve32_memory) - cvmx_bootmem_free_named("CAVIUM_RESERVE32"); -#endif /* CONFIG_CAVIUM_RESERVE32 */ - if (total == 0) panic("Unable to allocate memory from " "cvmx_bootmem_phy_alloc"); -- cgit From b7d2be48cc08a9d42e347d944efa9f37ab9b83d2 Mon Sep 17 00:00:00 2001 From: Chen Lifu Date: Tue, 29 Jun 2021 10:34:54 +0800 Subject: riscv: kprobes: implement the auipc instruction This has been tested by probing a module that contains an auipc instruction. Signed-off-by: Chen Lifu [Palmer: commit message] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/decode-insn.c | 2 +- arch/riscv/kernel/probes/simulate-insn.c | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c index 0ed043acc882..5eb03fb61450 100644 --- a/arch/riscv/kernel/probes/decode-insn.c +++ b/arch/riscv/kernel/probes/decode-insn.c @@ -38,11 +38,11 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api) RISCV_INSN_REJECTED(c_ebreak, insn); #endif - RISCV_INSN_REJECTED(auipc, insn); RISCV_INSN_REJECTED(branch, insn); RISCV_INSN_SET_SIMULATE(jal, insn); RISCV_INSN_SET_SIMULATE(jalr, insn); + RISCV_INSN_SET_SIMULATE(auipc, insn); return INSN_GOOD; } diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c index 2519ce26377d..b81719522d5c 100644 --- a/arch/riscv/kernel/probes/simulate-insn.c +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -83,3 +83,37 @@ bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *reg return ret; } + +#define auipc_rd_idx(opcode) \ + ((opcode >> 7) & 0x1f) + +#define auipc_imm(opcode) \ + ((((opcode) >> 12) & 0xfffff) << 12) + +#if __riscv_xlen == 64 +#define auipc_offset(opcode) sign_extend64(auipc_imm(opcode), 31) +#elif __riscv_xlen == 32 +#define auipc_offset(opcode) auipc_imm(opcode) +#else +#error "Unexpected __riscv_xlen" +#endif + +bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs) +{ + /* + * auipc instruction: + * 31 12 11 7 6 0 + * | imm[31:12] | rd | opcode | + * 20 5 7 + */ + + u32 rd_idx = auipc_rd_idx(opcode); + unsigned long rd_val = addr + auipc_offset(opcode); + + if (!rv_insn_reg_set_val(regs, rd_idx, rd_val)) + return false; + + instruction_pointer_set(regs, addr + 4); + + return true; +} -- cgit From 67979e927dd053bde3b71128495f651256b3161c Mon Sep 17 00:00:00 2001 From: Chen Lifu Date: Tue, 29 Jun 2021 10:34:55 +0800 Subject: riscv: kprobes: implement the branch instructions This has been tested by probing a module that contains each of the flavors of branches we have. Signed-off-by: Chen Lifu [Palmer: commit message, fix kconfig errors] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/decode-insn.c | 3 +- arch/riscv/kernel/probes/simulate-insn.c | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c index 5eb03fb61450..64f6183b4717 100644 --- a/arch/riscv/kernel/probes/decode-insn.c +++ b/arch/riscv/kernel/probes/decode-insn.c @@ -38,11 +38,10 @@ riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api) RISCV_INSN_REJECTED(c_ebreak, insn); #endif - RISCV_INSN_REJECTED(branch, insn); - RISCV_INSN_SET_SIMULATE(jal, insn); RISCV_INSN_SET_SIMULATE(jalr, insn); RISCV_INSN_SET_SIMULATE(auipc, insn); + RISCV_INSN_SET_SIMULATE(branch, insn); return INSN_GOOD; } diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c index b81719522d5c..d73e96f6ed7c 100644 --- a/arch/riscv/kernel/probes/simulate-insn.c +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -117,3 +117,81 @@ bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *re return true; } + +#define branch_rs1_idx(opcode) \ + (((opcode) >> 15) & 0x1f) + +#define branch_rs2_idx(opcode) \ + (((opcode) >> 20) & 0x1f) + +#define branch_funct3(opcode) \ + (((opcode) >> 12) & 0x7) + +#define branch_imm(opcode) \ + (((((opcode) >> 8) & 0xf ) << 1) | \ + ((((opcode) >> 25) & 0x3f) << 5) | \ + ((((opcode) >> 7) & 0x1 ) << 11) | \ + ((((opcode) >> 31) & 0x1 ) << 12)) + +#define branch_offset(opcode) \ + sign_extend32((branch_imm(opcode)), 12) + +#define BRANCH_BEQ 0x0 +#define BRANCH_BNE 0x1 +#define BRANCH_BLT 0x4 +#define BRANCH_BGE 0x5 +#define BRANCH_BLTU 0x6 +#define BRANCH_BGEU 0x7 + +bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs) +{ + /* + * branch instructions: + * 31 30 25 24 20 19 15 14 12 11 8 7 6 0 + * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode | + * 1 6 5 5 3 4 1 7 + * imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ + * imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE + * imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT + * imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE + * imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU + * imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU + */ + + s32 offset; + s32 offset_tmp; + unsigned long rs1_val; + unsigned long rs2_val; + + if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) || + !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val)) + return false; + + offset_tmp = branch_offset(opcode); + switch (branch_funct3(opcode)) { + case BRANCH_BEQ: + offset = (rs1_val == rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BNE: + offset = (rs1_val != rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BLT: + offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BGE: + offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BLTU: + offset = (rs1_val < rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BGEU: + offset = (rs1_val >= rs2_val) ? offset_tmp : 4; + break; + default: + return false; + } + + instruction_pointer_set(regs, addr + offset); + + return true; +} -- cgit From faff43da31ae469ff28d71af4aa47f0c08b0b26c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 19 Jul 2021 21:16:17 -0700 Subject: mips: cavium-octeon: clean up kernel-doc in cvmx-interrupt-decodes.c Fix kernel-doc warnings reported by kernel test robot: arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:49: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * __cvmx_interrupt_gmxx_rxx_int_en_enable enables all interrupt bits in cvmx_gmxx_rxx_int_en_t arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:230: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * __cvmx_interrupt_pcsx_intx_en_reg_enable enables all interrupt bits in cvmx_pcsx_intx_en_reg_t arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:271: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * __cvmx_interrupt_pcsxx_int_en_reg_enable enables all interrupt bits in cvmx_pcsxx_int_en_reg_t arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:301: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * __cvmx_interrupt_spxx_int_msk_enable enables all interrupt bits in cvmx_spxx_int_msk_t arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c:340: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * __cvmx_interrupt_stxx_int_msk_enable enables all interrupt bits in cvmx_stxx_int_msk_t Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Aditya Srivastava Cc: David Daney Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- .../cavium-octeon/executive/cvmx-interrupt-decodes.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c index 2f415d9d0f3c..67d6da21d49f 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c +++ b/arch/mips/cavium-octeon/executive/cvmx-interrupt-decodes.c @@ -46,7 +46,9 @@ /** - * __cvmx_interrupt_gmxx_rxx_int_en_enable enables all interrupt bits in cvmx_gmxx_rxx_int_en_t + * __cvmx_interrupt_gmxx_rxx_int_en_enable - enable all interrupt bits in cvmx_gmxx_rxx_int_en_t + * @index: interrupt register offset + * @block: interrupt register block_id */ void __cvmx_interrupt_gmxx_rxx_int_en_enable(int index, int block) { @@ -227,7 +229,9 @@ void __cvmx_interrupt_gmxx_rxx_int_en_enable(int index, int block) cvmx_write_csr(CVMX_GMXX_RXX_INT_EN(index, block), gmx_rx_int_en.u64); } /** - * __cvmx_interrupt_pcsx_intx_en_reg_enable enables all interrupt bits in cvmx_pcsx_intx_en_reg_t + * __cvmx_interrupt_pcsx_intx_en_reg_enable - enable all interrupt bits in cvmx_pcsx_intx_en_reg_t + * @index: interrupt register offset + * @block: interrupt register block_id */ void __cvmx_interrupt_pcsx_intx_en_reg_enable(int index, int block) { @@ -268,7 +272,8 @@ void __cvmx_interrupt_pcsx_intx_en_reg_enable(int index, int block) cvmx_write_csr(CVMX_PCSX_INTX_EN_REG(index, block), pcs_int_en_reg.u64); } /** - * __cvmx_interrupt_pcsxx_int_en_reg_enable enables all interrupt bits in cvmx_pcsxx_int_en_reg_t + * __cvmx_interrupt_pcsxx_int_en_reg_enable - enable all interrupt bits in cvmx_pcsxx_int_en_reg_t + * @index: interrupt register block_id */ void __cvmx_interrupt_pcsxx_int_en_reg_enable(int index) { @@ -298,7 +303,8 @@ void __cvmx_interrupt_pcsxx_int_en_reg_enable(int index) } /** - * __cvmx_interrupt_spxx_int_msk_enable enables all interrupt bits in cvmx_spxx_int_msk_t + * __cvmx_interrupt_spxx_int_msk_enable - enable all interrupt bits in cvmx_spxx_int_msk_t + * @index: interrupt register block_id */ void __cvmx_interrupt_spxx_int_msk_enable(int index) { @@ -337,7 +343,8 @@ void __cvmx_interrupt_spxx_int_msk_enable(int index) cvmx_write_csr(CVMX_SPXX_INT_MSK(index), spx_int_msk.u64); } /** - * __cvmx_interrupt_stxx_int_msk_enable enables all interrupt bits in cvmx_stxx_int_msk_t + * __cvmx_interrupt_stxx_int_msk_enable - enable all interrupt bits in cvmx_stxx_int_msk_t + * @index: interrupt register block_id */ void __cvmx_interrupt_stxx_int_msk_enable(int index) { -- cgit From 73b9919f3c17851b4c1d90fb6c343d9c438c554c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 19 Jul 2021 21:34:32 -0700 Subject: mips: netlogic: fix kernel-doc complaints in fmn-config.c Clean up kernel-doc warnings in netlogic/xlr/fmn-config.c by using correct kernel-doc format. Fixes these warnings: arch/mips/netlogic/xlr/fmn-config.c:106: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Configure bucket size and credits for a device. 'size' is the size of arch/mips/netlogic/xlr/fmn-config.c:181: warning: expecting prototype for Setup the FMN details for each devices according to the device available(). Prototype was for xlr_board_info_setup() instead Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Ganesan Ramalingam Cc: John Crispin Cc: Aditya Srivastava Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- arch/mips/netlogic/xlr/fmn-config.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/netlogic/xlr/fmn-config.c b/arch/mips/netlogic/xlr/fmn-config.c index c7622c6e5f67..15483537e8cf 100644 --- a/arch/mips/netlogic/xlr/fmn-config.c +++ b/arch/mips/netlogic/xlr/fmn-config.c @@ -103,18 +103,19 @@ static void check_credit_distribution(void) } /** - * Configure bucket size and credits for a device. 'size' is the size of - * the buckets for the device. This size is distributed among all the CPUs - * so that all of them can send messages to the device. - * - * The device is also given 'cpu_credits' to send messages to the CPUs - * + * setup_fmn_cc - Configure bucket size and credits for a device. * @dev_info: FMN information structure for each devices * @start_stn_id: Starting station id of dev_info * @end_stn_id: End station id of dev_info * @num_buckets: Total number of buckets for den_info * @cpu_credits: Allowed credits to cpu for each devices pointing by dev_info * @size: Size of the each buckets in the device station + * + * 'size' is the size of the buckets for the device. This size is + * distributed among all the CPUs + * so that all of them can send messages to the device. + * + * The device is also given 'cpu_credits' to send messages to the CPUs */ static void setup_fmn_cc(struct xlr_fmn_info *dev_info, int start_stn_id, int end_stn_id, int num_buckets, int cpu_credits, int size) @@ -174,6 +175,8 @@ static void setup_cpu_fmninfo(struct xlr_fmn_info *cpu, int num_core) } /** + * xlr_board_info_setup - Setup FMN details + * * Setup the FMN details for each devices according to the device available * in each variant of XLR/XLS processor */ -- cgit From d17eef2767d84b7dc4e1b8029be1eacb1d8679f6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 1 Apr 2021 00:06:56 +0900 Subject: mips: replace deprecated EXTRA_CFLAGS with ccflags-y As Documentation/kbuild/makefiles.rst says, EXTRA_CFLAGS is deprecated. Replace it with ccflags-y. Signed-off-by: Masahiro Yamada Signed-off-by: Thomas Bogendoerfer --- arch/mips/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index c67250a956b8..18c69eff1d3f 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -4,7 +4,7 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o) -EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm +ccflags-y += -Ivirt/kvm -Iarch/mips/kvm common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o -- cgit From d656132d2a2abc06917d822f7adcda86fd6dd192 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 1 Apr 2021 00:06:57 +0900 Subject: mips: clean up kvm Makefile You can use kvm-y instead of kvm-objs to create the composite module. kvm-$(CONFIG_...) looks cleaner. Signed-off-by: Masahiro Yamada Signed-off-by: Thomas Bogendoerfer --- arch/mips/kvm/Makefile | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 18c69eff1d3f..d3710959da55 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -2,21 +2,18 @@ # Makefile for KVM support for MIPS # -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o) - ccflags-y += -Ivirt/kvm -Iarch/mips/kvm -common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o +kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o) +kvm-$(CONFIG_CPU_HAS_MSA) += msa.o -kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ +kvm-y += mips.o emulate.o entry.o \ interrupt.o stats.o \ fpu.o -kvm-objs += hypcall.o -kvm-objs += mmu.o -ifdef CONFIG_CPU_LOONGSON64 -kvm-objs += loongson_ipi.o -endif +kvm-y += hypcall.o +kvm-y += mmu.o +kvm-$(CONFIG_CPU_LOONGSON64) += loongson_ipi.o -kvm-objs += vz.o +kvm-y += vz.o obj-$(CONFIG_KVM) += kvm.o obj-y += callback.o tlb.o -- cgit From 93ebb6828723b8aef114415c4dc3518342f7dcad Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Sat, 24 Jul 2021 01:17:46 +0200 Subject: s390/pv: fix the forcing of the swiotlb Since commit 903cd0f315fe ("swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing") if code sets swiotlb_force it needs to do so before the swiotlb is initialised. Otherwise io_tlb_default_mem->force_bounce will not get set to true, and devices that use (the default) swiotlb will not bounce despite switolb_force having the value of SWIOTLB_FORCE. Let us restore swiotlb functionality for PV by fulfilling this new requirement. This change addresses what turned out to be a fragility in commit 64e1f0c531d1 ("s390/mm: force swiotlb for protected virtualization"), which ain't exactly broken in its original context, but could give us some more headache if people backport the broken change and forget this fix. Signed-off-by: Halil Pasic Tested-by: Christian Borntraeger Reviewed-by: Christian Borntraeger Fixes: 903cd0f315fe ("swiotlb: Use is_swiotlb_force_bounce for swiotlb data bouncing") Fixes: 64e1f0c531d1 ("s390/mm: force swiotlb for protected virtualization") Cc: stable@vger.kernel.org #5.3+ Signed-off-by: Konrad Rzeszutek Wilk --- arch/s390/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8ac710de1ab1..07bbee9b7320 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -186,9 +186,9 @@ static void pv_init(void) return; /* make sure bounce buffers are shared */ + swiotlb_force = SWIOTLB_FORCE; swiotlb_init(1); swiotlb_update_mem_attributes(); - swiotlb_force = SWIOTLB_FORCE; } void __init mem_init(void) -- cgit From 8119cefd9a29b71997e62b762932d23499ba4896 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 14 Jul 2021 18:17:58 +0530 Subject: powerpc/kexec: blacklist functions called in real mode for kprobe As kprobe does not handle events happening in real mode, blacklist the functions that only get called in real mode or in kexec sequence with MMU turned off. Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/162626687834.155313.4692863392927831843.stgit@hbathini-workstation.ibm.com --- arch/powerpc/kernel/head_64.S | 2 ++ arch/powerpc/kexec/core_64.c | 6 ++++-- arch/powerpc/mm/book3s64/hash_native.c | 2 +- arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- arch/powerpc/mm/book3s64/radix_pgtable.c | 3 ++- arch/powerpc/platforms/ps3/htab.c | 3 ++- arch/powerpc/platforms/ps3/mm.c | 8 ++++++-- arch/powerpc/platforms/pseries/lpar.c | 9 ++++++--- 8 files changed, 25 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 79930b0bc781..f17ae2083733 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -712,6 +712,8 @@ _GLOBAL(copy_and_flush) isync blr +_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */ + .align 8 copy_to_here: diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 8a449b2d8715..84618d3c8013 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -72,7 +72,8 @@ int default_machine_kexec_prepare(struct kimage *image) return 0; } -static void copy_segments(unsigned long ind) +/* Called during kexec sequence with MMU off */ +static notrace void copy_segments(unsigned long ind) { unsigned long entry; unsigned long *ptr; @@ -105,7 +106,8 @@ static void copy_segments(unsigned long ind) } } -void kexec_copy_flush(struct kimage *image) +/* Called during kexec sequence with MMU off */ +notrace void kexec_copy_flush(struct kimage *image) { long i, nr_segments = image->nr_segments; struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 52e170bd95ae..d8279bfe68ea 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -787,7 +787,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, * TODO: add batching support when enabled. remember, no dynamic memory here, * although there is the control page available... */ -static void native_hpte_clear(void) +static notrace void native_hpte_clear(void) { unsigned long vpn = 0; unsigned long slot, slots; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 9ffa65074cb0..300099de553b 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -172,8 +172,8 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* For use by kexec */ -void mmu_cleanup_all(void) +/* For use by kexec, called with MMU off */ +notrace void mmu_cleanup_all(void) { if (radix_enabled()) radix__mmu_cleanup_all(); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index e50ddf129c15..ae20add7954a 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -679,7 +679,8 @@ void radix__early_init_mmu_secondary(void) mtspr(SPRN_UAMOR, 0); } -void radix__mmu_cleanup_all(void) +/* Called during kexec sequence with MMU off */ +notrace void radix__mmu_cleanup_all(void) { unsigned long lpcr; diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 7ddc7ec6a7c0..ef710a715903 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -169,7 +169,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, spin_unlock_irqrestore(&ps3_htab_lock, flags); } -static void ps3_hpte_clear(void) +/* Called during kexec sequence with MMU off */ +static notrace void ps3_hpte_clear(void) { unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4; u64 i; diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index a81eac35d900..9c44f335c0b9 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -195,9 +195,11 @@ fail: /** * ps3_mm_vas_destroy - + * + * called during kexec sequence with MMU off. */ -void ps3_mm_vas_destroy(void) +notrace void ps3_mm_vas_destroy(void) { int result; @@ -1243,9 +1245,11 @@ void __init ps3_mm_init(void) /** * ps3_mm_shutdown - final cleanup of address space + * + * called during kexec sequence with MMU off. */ -void ps3_mm_shutdown(void) +notrace void ps3_mm_shutdown(void) { ps3_mm_region_destroy(&map.r1); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index dab356e3ff87..869ef638698a 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -801,7 +801,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) return -1; } -static void manual_hpte_clear_all(void) +/* Called during kexec sequence with MMU off */ +static notrace void manual_hpte_clear_all(void) { unsigned long size_bytes = 1UL << ppc64_pft_size; unsigned long hpte_count = size_bytes >> 4; @@ -834,7 +835,8 @@ static void manual_hpte_clear_all(void) } } -static int hcall_hpte_clear_all(void) +/* Called during kexec sequence with MMU off */ +static notrace int hcall_hpte_clear_all(void) { int rc; @@ -845,7 +847,8 @@ static int hcall_hpte_clear_all(void) return rc; } -static void pseries_hpte_clear_all(void) +/* Called during kexec sequence with MMU off */ +static notrace void pseries_hpte_clear_all(void) { int rc; -- cgit From 16df55ce104125b4bf1070467973070fb6fc16ff Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 25 Jul 2021 09:02:56 -0700 Subject: mips: clean up (remove) kernel-doc in cavium-octeon/executive/ Remove all kernel-doc notation in arch/mips/cavium-octeon/executive/. This removes dozens of kernel-doc warnings. Most of these functions are static and don't need to be documented with kernel-doc. The function comments are still present for anyone who wants to read them. These files are part of the OCTEON SDK so presumably they are documented there as well. arch/mips/cavium-octeon/executive/cvmx-bootmem.c:61: warning: Function parameter or member 'addr' not described in 'CVMX_BOOTMEM_NAMED_GET_FIELD' arch/mips/cavium-octeon/executive/cvmx-bootmem.c:61: warning: Function parameter or member 'field' not described in 'CVMX_BOOTMEM_NAMED_GET_FIELD' arch/mips/cavium-octeon/executive/cvmx-bootmem.c:61: warning: expecting prototype for This macro returns a member of the(). Prototype was for CVMX_BOOTMEM_NAMED_GET_FIELD() instead arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: Function parameter or member 'base' not described in '__cvmx_bootmem_desc_get' arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: Function parameter or member 'offset' not described in '__cvmx_bootmem_desc_get' arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: Function parameter or member 'size' not described in '__cvmx_bootmem_desc_get' arch/mips/cavium-octeon/executive/cvmx-bootmem.c:77: warning: expecting prototype for This function is the implementation of the get macros defined(). Prototype was for __cvmx_bootmem_desc_get() instead arch/mips/cavium-octeon/executive/cvmx-bootmem.c:133: warning: expecting prototype for Allocate a block of memory from the free list that was(). Prototype was for cvmx_bootmem_alloc_range() instead arch/mips/cavium-octeon/executive/cvmx-bootmem.c:554: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Finds a named memory block by name. arch/mips/cavium-octeon/executive/cvmx-bootmem.c:661: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Frees a named block. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/cavium-octeon/executive/cvmx-helper-board.c:64: warning: expecting prototype for Return the MII PHY address associated with the given IPD(). Prototype was for cvmx_helper_board_get_mii_address() instead arch/mips/cavium-octeon/executive/cvmx-helper-board.c:211: warning: expecting prototype for This function is the board specific method of determining an(). Prototype was for __cvmx_helper_board_link_get() instead arch/mips/cavium-octeon/executive/cvmx-helper-board.c:278: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * This function is called by cvmx_helper_interface_probe() after it arch/mips/cavium-octeon/executive/cvmx-helper-board.c:324: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Get the clock type used for the USB block based on board type. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/cavium-octeon/executive/cvmx-spi.c:77: warning: expecting prototype for Get current SPI4 initialization callbacks(). Prototype was for cvmx_spi_get_callbacks() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:87: warning: expecting prototype for Set new SPI4 initialization callbacks(). Prototype was for cvmx_spi_set_callbacks() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:92: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Initialize and start the SPI interface. arch/mips/cavium-octeon/executive/cvmx-spi.c:151: warning: expecting prototype for This routine restarts the SPI interface after it has lost synchronization(). Prototype was for cvmx_spi_restart_interface() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:196: warning: expecting prototype for Callback to perform SPI4 reset(). Prototype was for cvmx_spi_reset_cb() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:313: warning: expecting prototype for Callback to setup calendar and miscellaneous settings before clock detection(). Prototype was for cvmx_spi_calendar_setup_cb() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:431: warning: expecting prototype for Callback to perform clock detection(). Prototype was for cvmx_spi_clock_detect_cb() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:509: warning: expecting prototype for Callback to perform link training(). Prototype was for cvmx_spi_training_cb() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:578: warning: expecting prototype for Callback to perform calendar data synchronization(). Prototype was for cvmx_spi_calendar_sync_cb() instead arch/mips/cavium-octeon/executive/cvmx-spi.c:634: warning: expecting prototype for Callback to handle interface up(). Prototype was for cvmx_spi_interface_up_cb() instead ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:67: warning: expecting prototype for Probe a XAUI interface and determine the number of ports(). Prototype was for __cvmx_helper_xaui_probe() instead arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:106: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Bringup and enable a XAUI interface. After this call packet arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:253: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Return the link state of an IPD/PKO port as returned by arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c:292: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Configure an IPD/PKO port for the specified link state. This ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/cavium-octeon/executive/cvmx-pko.c:43: warning: Function parameter or member 'interface' not described in '__cvmx_pko_int' arch/mips/cavium-octeon/executive/cvmx-pko.c:43: warning: Function parameter or member 'index' not described in '__cvmx_pko_int' arch/mips/cavium-octeon/executive/cvmx-pko.c:43: warning: expecting prototype for Internal state of packet output(). Prototype was for __cvmx_pko_int() instead arch/mips/cavium-octeon/executive/cvmx-pko.c:186: warning: expecting prototype for Call before any other calls to initialize the packet(). Prototype was for cvmx_pko_initialize_global() instead arch/mips/cavium-octeon/executive/cvmx-pko.c:241: warning: expecting prototype for This function does per(). Prototype was for cvmx_pko_initialize_local() instead arch/mips/cavium-octeon/executive/cvmx-pko.c:247: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Enables the packet output hardware. It must already be arch/mips/cavium-octeon/executive/cvmx-pko.c:270: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Disables the packet output. Does not affect any configuration. arch/mips/cavium-octeon/executive/cvmx-pko.c:282: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Reset the packet output. arch/mips/cavium-octeon/executive/cvmx-pko.c:293: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Shutdown and free resources required by packet output. arch/mips/cavium-octeon/executive/cvmx-pko.c:324: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Configure a output port and the associated queues for use. arch/mips/cavium-octeon/executive/cvmx-pko.c:555: warning: expecting prototype for Show map of ports(). Prototype was for cvmx_pko_show_queue_map() instead arch/mips/cavium-octeon/executive/cvmx-pko.c:577: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Rate limit a PKO port to a max packets/sec. This function is only arch/mips/cavium-octeon/executive/cvmx-pko.c:610: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Rate limit a PKO port to a max bits/sec. This function is only ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:49: warning: cannot understand function prototype: '__cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr; ' arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:53: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Initialize the Global queue state pointer. arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:101: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Initialize a command queue for use. The initial FPA buffer is arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:199: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Shutdown a queue a free it's command buffers to the FPA. The arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:235: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Return the number of command words pending in the queue. This arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c:291: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Return the command buffer to be written to. The purpose of this ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:53: warning: expecting prototype for Probe RGMII ports and determine the number present(). Prototype was for __cvmx_helper_rgmii_probe() instead arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:92: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Put an RGMII interface in loopback mode. Internal packets sent arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:135: warning: expecting prototype for Workaround ASX setup errata with CN38XX pass1(). Prototype was for __cvmx_helper_errata_asx_pass1() instead arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:152: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Configure all of the ASX, GMX, and PKO registers required arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:255: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Return the link state of an IPD/PKO port as returned by arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c:284: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Configure an IPD/PKO port for the specified link state. This -- arch/mips/cavium-octeon/executive/cvmx-l2c.c:768: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Return log base 2 of the number of sets in the L2 cache arch/mips/cavium-octeon/executive/cvmx-l2c.c:861: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Flush a line from the L2 cache Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Aditya Srivastava Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/executive/cvmx-bootmem.c | 10 +++++----- arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c | 12 ++++++------ .../cavium-octeon/executive/cvmx-helper-board.c | 8 ++++---- .../cavium-octeon/executive/cvmx-helper-rgmii.c | 12 ++++++------ .../cavium-octeon/executive/cvmx-helper-xaui.c | 8 ++++---- arch/mips/cavium-octeon/executive/cvmx-l2c.c | 9 ++++----- arch/mips/cavium-octeon/executive/cvmx-pko.c | 22 +++++++++++----------- arch/mips/cavium-octeon/executive/cvmx-spi.c | 20 ++++++++++---------- 8 files changed, 50 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c index e794b2d53adf..b63ad5d42cc7 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c +++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c @@ -44,7 +44,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; /* See header file for descriptions of functions */ -/** +/* * This macro returns a member of the * cvmx_bootmem_named_block_desc_t structure. These members can't * be directly addressed as they might be in memory not directly @@ -60,7 +60,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; offsetof(struct cvmx_bootmem_named_block_desc, field), \ sizeof_field(struct cvmx_bootmem_named_block_desc, field)) -/** +/* * This function is the implementation of the get macros defined * for individual structure members. The argument are generated * by the macros inorder to read only the needed memory. @@ -115,7 +115,7 @@ static uint64_t cvmx_bootmem_phy_get_next(uint64_t addr) return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63)); } -/** +/* * Allocate a block of memory from the free list that was * passed to the application by the bootloader within a specified * address range. This is an allocate-only algorithm, so @@ -550,7 +550,7 @@ bootmem_free_done: } -/** +/* * Finds a named memory block by name. * Also used for finding an unused entry in the named block table. * @@ -657,7 +657,7 @@ struct cvmx_bootmem_named_block_desc *cvmx_bootmem_find_named_block(char *name) } EXPORT_SYMBOL(cvmx_bootmem_find_named_block); -/** +/* * Frees a named block. * * @name: name of block to free diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c index fb42e8e21ea0..20189e9ad94d 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c +++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c @@ -42,14 +42,14 @@ #include #include -/** +/* * This application uses this pointer to access the global queue * state. It points to a bootmem named block. */ __cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr; EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr); -/** +/* * Initialize the Global queue state pointer. * * Returns CVMX_CMD_QUEUE_SUCCESS or a failure code @@ -84,7 +84,7 @@ static cvmx_cmd_queue_result_t __cvmx_cmd_queue_init_state_ptr(void) return CVMX_CMD_QUEUE_SUCCESS; } -/** +/* * Initialize a command queue for use. The initial FPA buffer is * allocated and the hardware unit is configured to point to the * new command queue. @@ -182,7 +182,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_initialize(cvmx_cmd_queue_id_t queue_id, } } -/** +/* * Shutdown a queue a free it's command buffers to the FPA. The * hardware connected to the queue must be stopped before this * function is called. @@ -218,7 +218,7 @@ cvmx_cmd_queue_result_t cvmx_cmd_queue_shutdown(cvmx_cmd_queue_id_t queue_id) return CVMX_CMD_QUEUE_SUCCESS; } -/** +/* * Return the number of command words pending in the queue. This * function may be relatively slow for some hardware units. * @@ -274,7 +274,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id) return CVMX_CMD_QUEUE_INVALID_PARAM; } -/** +/* * Return the command buffer to be written to. The purpose of this * function is to allow CVMX routine access t othe low level buffer * for initial hardware setup. User applications should not call this diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c index abd11b7af22f..1daa0c6b6f4e 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c @@ -44,7 +44,7 @@ #include #include -/** +/* * Return the MII PHY address associated with the given IPD * port. A result of -1 means there isn't a MII capable PHY * connected to this port. On chips supporting multiple MII @@ -189,7 +189,7 @@ int cvmx_helper_board_get_mii_address(int ipd_port) return -1; } -/** +/* * This function is the board specific method of determining an * ethernet ports link speed. Most Octeon boards have Marvell PHYs * and are handled by the fall through case. This function must be @@ -274,7 +274,7 @@ union cvmx_helper_link_info __cvmx_helper_board_link_get(int ipd_port) return result; } -/** +/* * This function is called by cvmx_helper_interface_probe() after it * determines the number of ports Octeon can support on a specific * interface. This function is the per board location to override @@ -320,7 +320,7 @@ int __cvmx_helper_board_interface_probe(int interface, int supported_ports) return supported_ports; } -/** +/* * Get the clock type used for the USB block based on board type. * Used by the USB code for auto configuration of clock type. * diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c index c4b58598aa9d..a8c3be4eb6f0 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c @@ -42,7 +42,7 @@ #include #include -/** +/* * Probe RGMII ports and determine the number present * * @interface: Interface to probe @@ -88,7 +88,7 @@ int __cvmx_helper_rgmii_probe(int interface) return num_ports; } -/** +/* * Put an RGMII interface in loopback mode. Internal packets sent * out will be received back again on the same port. Externally * received packets will echo back out. @@ -120,7 +120,7 @@ void cvmx_helper_rgmii_internal_loopback(int port) cvmx_write_csr(CVMX_GMXX_PRTX_CFG(index, interface), gmx_cfg.u64); } -/** +/* * Workaround ASX setup errata with CN38XX pass1 * * @interface: Interface to setup @@ -148,7 +148,7 @@ static int __cvmx_helper_errata_asx_pass1(int interface, int port, return 0; } -/** +/* * Configure all of the ASX, GMX, and PKO registers required * to get RGMII to function on the supplied interface. * @@ -251,7 +251,7 @@ int __cvmx_helper_rgmii_enable(int interface) return 0; } -/** +/* * Return the link state of an IPD/PKO port as returned by * auto negotiation. The result of this function may not match * Octeon's link config if auto negotiation has changed since @@ -280,7 +280,7 @@ union cvmx_helper_link_info __cvmx_helper_rgmii_link_get(int ipd_port) return __cvmx_helper_board_link_get(ipd_port); } -/** +/* * Configure an IPD/PKO port for the specified link state. This * function does not influence auto negotiation at the PHY level. * The passed link state must always match the link state returned diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c b/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c index 842990e8404f..fea71a85bb29 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c @@ -54,7 +54,7 @@ int __cvmx_helper_xaui_enumerate(int interface) return 1; } -/** +/* * Probe a XAUI interface and determine the number of ports * connected to it. The XAUI interface should still be down * after this call. @@ -102,7 +102,7 @@ int __cvmx_helper_xaui_probe(int interface) return __cvmx_helper_xaui_enumerate(interface); } -/** +/* * Bringup and enable a XAUI interface. After this call packet * I/O should be fully functional. This is called with IPD * enabled but PKO disabled. @@ -249,7 +249,7 @@ int __cvmx_helper_xaui_enable(int interface) return 0; } -/** +/* * Return the link state of an IPD/PKO port as returned by * auto negotiation. The result of this function may not match * Octeon's link config if auto negotiation has changed since @@ -288,7 +288,7 @@ union cvmx_helper_link_info __cvmx_helper_xaui_link_get(int ipd_port) return result; } -/** +/* * Configure an IPD/PKO port for the specified link state. This * function does not influence auto negotiation at the PHY level. * The passed link state must always match the link state returned diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c index 83df0a963a8b..33b303691bc2 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-l2c.c +++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c @@ -281,7 +281,7 @@ uint64_t cvmx_l2c_read_perf(uint32_t counter) } } -/** +/* * @INTERNAL * Helper function use to fault in cache lines for L2 cache locking * @@ -575,7 +575,7 @@ union __cvmx_l2c_tag { }; -/** +/* * @INTERNAL * Function to read a L2C tag. This code make the current core * the 'debug core' for the L2. This code must only be executed by @@ -764,9 +764,8 @@ int cvmx_l2c_get_cache_size_bytes(void) CVMX_CACHE_LINE_SIZE; } -/** +/* * Return log base 2 of the number of sets in the L2 cache - * Returns */ int cvmx_l2c_get_set_bits(void) { @@ -857,7 +856,7 @@ int cvmx_l2c_get_num_assoc(void) return l2_assoc; } -/** +/* * Flush a line from the L2 cache * This should only be called from one core at a time, as this routine * sets the core to the 'debug' core in order to flush the line. diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c index b0efc35e95c4..7c4879e74318 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-pko.c +++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c @@ -35,7 +35,7 @@ #include #include -/** +/* * Internal state of packet output */ @@ -176,7 +176,7 @@ static void __cvmx_pko_chip_init(void) } } -/** +/* * Call before any other calls to initialize the packet * output system. This does chip global config, and should only be * done by one core. @@ -229,7 +229,7 @@ void cvmx_pko_initialize_global(void) } } -/** +/* * This function does per-core initialization required by the PKO routines. * This must be called on all cores that will do packet output, and must * be called after the FPA has been initialized and filled with pages. @@ -243,7 +243,7 @@ int cvmx_pko_initialize_local(void) return 0; } -/** +/* * Enables the packet output hardware. It must already be * configured. */ @@ -266,7 +266,7 @@ void cvmx_pko_enable(void) cvmx_write_csr(CVMX_PKO_REG_FLAGS, flags.u64); } -/** +/* * Disables the packet output. Does not affect any configuration. */ void cvmx_pko_disable(void) @@ -278,7 +278,7 @@ void cvmx_pko_disable(void) } EXPORT_SYMBOL_GPL(cvmx_pko_disable); -/** +/* * Reset the packet output. */ static void __cvmx_pko_reset(void) @@ -289,7 +289,7 @@ static void __cvmx_pko_reset(void) cvmx_write_csr(CVMX_PKO_REG_FLAGS, pko_reg_flags.u64); } -/** +/* * Shutdown and free resources required by packet output. */ void cvmx_pko_shutdown(void) @@ -320,7 +320,7 @@ void cvmx_pko_shutdown(void) } EXPORT_SYMBOL_GPL(cvmx_pko_shutdown); -/** +/* * Configure a output port and the associated queues for use. * * @port: Port to configure. @@ -548,7 +548,7 @@ cvmx_pko_status_t cvmx_pko_config_port(uint64_t port, uint64_t base_queue, } #ifdef PKO_DEBUG -/** +/* * Show map of ports -> queues for different cores. */ void cvmx_pko_show_queue_map() @@ -573,7 +573,7 @@ void cvmx_pko_show_queue_map() } #endif -/** +/* * Rate limit a PKO port to a max packets/sec. This function is only * supported on CN51XX and higher, excluding CN58XX. * @@ -606,7 +606,7 @@ int cvmx_pko_rate_limit_packets(int port, int packets_s, int burst) return 0; } -/** +/* * Rate limit a PKO port to a max bits/sec. This function is only * supported on CN51XX and higher, excluding CN58XX. * diff --git a/arch/mips/cavium-octeon/executive/cvmx-spi.c b/arch/mips/cavium-octeon/executive/cvmx-spi.c index f51957a3e915..eb9333e84a6b 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-spi.c +++ b/arch/mips/cavium-octeon/executive/cvmx-spi.c @@ -66,7 +66,7 @@ static cvmx_spi_callbacks_t cvmx_spi_callbacks = { .interface_up_cb = cvmx_spi_interface_up_cb }; -/** +/* * Get current SPI4 initialization callbacks * * @callbacks: Pointer to the callbacks structure.to fill @@ -78,7 +78,7 @@ void cvmx_spi_get_callbacks(cvmx_spi_callbacks_t *callbacks) memcpy(callbacks, &cvmx_spi_callbacks, sizeof(cvmx_spi_callbacks)); } -/** +/* * Set new SPI4 initialization callbacks * * @new_callbacks: Pointer to an updated callbacks structure. @@ -88,7 +88,7 @@ void cvmx_spi_set_callbacks(cvmx_spi_callbacks_t *new_callbacks) memcpy(&cvmx_spi_callbacks, new_callbacks, sizeof(cvmx_spi_callbacks)); } -/** +/* * Initialize and start the SPI interface. * * @interface: The identifier of the packet interface to configure and @@ -133,7 +133,7 @@ int cvmx_spi_start_interface(int interface, cvmx_spi_mode_t mode, int timeout, return res; } -/** +/* * This routine restarts the SPI interface after it has lost synchronization * with its correspondent system. * @@ -179,7 +179,7 @@ int cvmx_spi_restart_interface(int interface, cvmx_spi_mode_t mode, int timeout) } EXPORT_SYMBOL_GPL(cvmx_spi_restart_interface); -/** +/* * Callback to perform SPI4 reset * * @interface: The identifier of the packet interface to configure and @@ -294,7 +294,7 @@ int cvmx_spi_reset_cb(int interface, cvmx_spi_mode_t mode) return 0; } -/** +/* * Callback to setup calendar and miscellaneous settings before clock detection * * @interface: The identifier of the packet interface to configure and @@ -413,7 +413,7 @@ int cvmx_spi_calendar_setup_cb(int interface, cvmx_spi_mode_t mode, return 0; } -/** +/* * Callback to perform clock detection * * @interface: The identifier of the packet interface to configure and @@ -491,7 +491,7 @@ int cvmx_spi_clock_detect_cb(int interface, cvmx_spi_mode_t mode, int timeout) return 0; } -/** +/* * Callback to perform link training * * @interface: The identifier of the packet interface to configure and @@ -560,7 +560,7 @@ int cvmx_spi_training_cb(int interface, cvmx_spi_mode_t mode, int timeout) return 0; } -/** +/* * Callback to perform calendar data synchronization * * @interface: The identifier of the packet interface to configure and @@ -617,7 +617,7 @@ int cvmx_spi_calendar_sync_cb(int interface, cvmx_spi_mode_t mode, int timeout) return 0; } -/** +/* * Callback to handle interface up * * @interface: The identifier of the packet interface to configure and -- cgit From 64c888ce33602c66f1a043c1f7943e3d453186c2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 25 Jul 2021 09:02:57 -0700 Subject: mips: clean up kernel-doc in cavium-octeon/*.c Convert function comments to kernel-doc notation to remove kernel-doc warnings in arch/mips/cavium-octeon/*.c. Also clean up the comments in a few places. arch/mips/cavium-octeon/flash_setup.c:66: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Module/ driver initialization. -- arch/mips/cavium-octeon/setup.c:308: warning: expecting prototype for Return non zero if we are currently running in the Octeon simulator(). Prototype was for octeon_is_simulation() instead arch/mips/cavium-octeon/setup.c:314: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Return true if Octeon is in PCI Host mode. This means arch/mips/cavium-octeon/setup.c:334: warning: expecting prototype for Get the clock rate of Octeon(). Prototype was for octeon_get_clock_rate() instead arch/mips/cavium-octeon/setup.c:351: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Write to the LCD display connected to the bootbus. This display arch/mips/cavium-octeon/setup.c:380: warning: expecting prototype for Return the console uart passed by the bootloader(). Prototype was for octeon_get_boot_uart() instead arch/mips/cavium-octeon/setup.c:386: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Get the coremask Linux was booted on. arch/mips/cavium-octeon/setup.c:399: warning: expecting prototype for Check the hardware BIST results for a CPU(). Prototype was for octeon_check_cpu_bist() instead arch/mips/cavium-octeon/setup.c:432: warning: expecting prototype for Reboot Octeon(). Prototype was for octeon_restart() instead arch/mips/cavium-octeon/setup.c:452: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Permanently stop a core. arch/mips/cavium-octeon/setup.c:475: warning: expecting prototype for Halt the system(). Prototype was for octeon_halt() instead arch/mips/cavium-octeon/setup.c:520: warning: expecting prototype for Return a string representing the system type(). Prototype was for octeon_board_type_string() instead arch/mips/cavium-octeon/setup.c:661: warning: expecting prototype for Early entry point for arch setup(). Prototype was for prom_init() instead -- arch/mips/cavium-octeon/smp.c:100: warning: Function parameter or member 'cpu' not described in 'octeon_send_ipi_single' arch/mips/cavium-octeon/smp.c:100: warning: Function parameter or member 'action' not described in 'octeon_send_ipi_single' arch/mips/cavium-octeon/smp.c:100: warning: expecting prototype for Cause the function described by call_data to be executed on the passed(). Prototype was for octeon_send_ipi_single() instead arch/mips/cavium-octeon/smp.c:119: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Detect available CPUs, populate cpu_possible_mask arch/mips/cavium-octeon/smp.c:210: warning: Function parameter or member 'cpu' not described in 'octeon_boot_secondary' arch/mips/cavium-octeon/smp.c:210: warning: Function parameter or member 'idle' not described in 'octeon_boot_secondary' arch/mips/cavium-octeon/smp.c:210: warning: expecting prototype for Firmware CPU startup hook(). Prototype was for octeon_boot_secondary() instead arch/mips/cavium-octeon/smp.c:236: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * After we've done initial boot, this function is called to allow the arch/mips/cavium-octeon/smp.c:258: warning: Function parameter or member 'max_cpus' not described in 'octeon_prepare_cpus' arch/mips/cavium-octeon/smp.c:258: warning: expecting prototype for Callout to firmware before smp_init(). Prototype was for octeon_prepare_cpus() instead arch/mips/cavium-octeon/smp.c:276: warning: expecting prototype for Last chance for the board code to finish SMP initialization before(). Prototype was for octeon_smp_finish() instead Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Aditya Srivastava Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/flash_setup.c | 2 +- arch/mips/cavium-octeon/setup.c | 43 ++++++++++++++++++----------------- arch/mips/cavium-octeon/smp.c | 14 +++++------- 3 files changed, 29 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c index a5e8f4a784af..c8a8c6d359b9 100644 --- a/arch/mips/cavium-octeon/flash_setup.c +++ b/arch/mips/cavium-octeon/flash_setup.c @@ -62,7 +62,7 @@ static void octeon_flash_map_copy_to(struct map_info *map, unsigned long to, up(&octeon_bootbus_sem); } -/** +/* * Module/ driver initialization. * * Returns Zero on success diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 0ddd3cc16ee4..00bf269763cf 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -295,9 +295,10 @@ static int octeon_uart; extern asmlinkage void handle_int(void); /** - * Return non zero if we are currently running in the Octeon simulator + * octeon_is_simulation - Return non-zero if we are currently running + * in the Octeon simulator * - * Returns + * Return: non-0 if running in the Octeon simulator, 0 otherwise */ int octeon_is_simulation(void) { @@ -306,10 +307,10 @@ int octeon_is_simulation(void) EXPORT_SYMBOL(octeon_is_simulation); /** - * Return true if Octeon is in PCI Host mode. This means + * octeon_is_pci_host - Return true if Octeon is in PCI Host mode. This means * Linux can control the PCI bus. * - * Returns Non zero if Octeon in host mode. + * Return: Non-zero if Octeon is in host mode. */ int octeon_is_pci_host(void) { @@ -321,9 +322,9 @@ int octeon_is_pci_host(void) } /** - * Get the clock rate of Octeon + * octeon_get_clock_rate - Get the clock rate of Octeon * - * Returns Clock rate in HZ + * Return: Clock rate in HZ */ uint64_t octeon_get_clock_rate(void) { @@ -343,11 +344,11 @@ EXPORT_SYMBOL(octeon_get_io_clock_rate); /** - * Write to the LCD display connected to the bootbus. This display - * exists on most Cavium evaluation boards. If it doesn't exist, then - * this function doesn't do anything. - * + * octeon_write_lcd - Write to the LCD display connected to the bootbus. * @s: String to write + * + * This display exists on most Cavium evaluation boards. If it doesn't exist, + * then this function doesn't do anything. */ static void octeon_write_lcd(const char *s) { @@ -367,9 +368,9 @@ static void octeon_write_lcd(const char *s) } /** - * Return the console uart passed by the bootloader + * octeon_get_boot_uart - Return the console uart passed by the bootloader * - * Returns uart (0 or 1) + * Return: uart number (0 or 1) */ static int octeon_get_boot_uart(void) { @@ -378,9 +379,9 @@ static int octeon_get_boot_uart(void) } /** - * Get the coremask Linux was booted on. + * octeon_get_boot_coremask - Get the coremask Linux was booted on. * - * Returns Core mask + * Return: Core mask */ int octeon_get_boot_coremask(void) { @@ -388,7 +389,7 @@ int octeon_get_boot_coremask(void) } /** - * Check the hardware BIST results for a CPU + * octeon_check_cpu_bist - Check the hardware BIST results for a CPU */ void octeon_check_cpu_bist(void) { @@ -419,7 +420,7 @@ void octeon_check_cpu_bist(void) } /** - * Reboot Octeon + * octeon_restart - Reboot Octeon * * @command: Command to pass to the bootloader. Currently ignored. */ @@ -444,7 +445,7 @@ static void octeon_restart(char *command) /** - * Permanently stop a core. + * octeon_kill_core - Permanently stop a core. * * @arg: Ignored. */ @@ -464,7 +465,7 @@ static void octeon_kill_core(void *arg) /** - * Halt the system + * octeon_halt - Halt the system */ static void octeon_halt(void) { @@ -507,9 +508,9 @@ static void __init init_octeon_system_type(void) } /** - * Return a string representing the system type + * octeon_board_type_string - Return a string representing the system type * - * Returns + * Return: system type string */ const char *octeon_board_type_string(void) { @@ -650,7 +651,7 @@ void octeon_user_io_init(void) } /** - * Early entry point for arch setup + * prom_init - Early entry point for arch setup */ void __init prom_init(void) { diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 66ce5527da54..89954f5f87fb 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -91,7 +91,7 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -/** +/* * Cause the function described by call_data to be executed on the passed * cpu. When the function has finished, increment the finished field of * call_data. @@ -115,7 +115,7 @@ static inline void octeon_send_ipi_mask(const struct cpumask *mask, octeon_send_ipi_single(i, action); } -/** +/* * Detect available CPUs, populate cpu_possible_mask */ static void octeon_smp_hotplug_setup(void) @@ -202,9 +202,8 @@ int plat_post_relocation(long offset) } #endif /* CONFIG_RELOCATABLE */ -/** +/* * Firmware CPU startup hook - * */ static int octeon_boot_secondary(int cpu, struct task_struct *idle) { @@ -232,7 +231,7 @@ static int octeon_boot_secondary(int cpu, struct task_struct *idle) return 0; } -/** +/* * After we've done initial boot, this function is called to allow the * board code to clean up state, if needed */ @@ -250,9 +249,8 @@ static void octeon_init_secondary(void) octeon_irq_setup_secondary(); } -/** +/* * Callout to firmware before smp_init - * */ static void __init octeon_prepare_cpus(unsigned int max_cpus) { @@ -268,7 +266,7 @@ static void __init octeon_prepare_cpus(unsigned int max_cpus) } } -/** +/* * Last chance for the board code to finish SMP initialization before * the CPU is "online". */ -- cgit From d2ac3a11cba23454516ba17f03e078a6831b8be5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 25 Jul 2021 09:02:58 -0700 Subject: mips: clean up kernel-doc in mm/c-octeon.c Clean up kernel-doc warnings in arch/mips/mm/c-octeon.c. arch/mips/mm/c-octeon.c:34: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Octeon automatically flushes the dcache on tlb changes, so arch/mips/mm/c-octeon.c:65: warning: expecting prototype for Flush caches as necessary for all cores affected by a(). Prototype was for octeon_flush_icache_all_cores() instead arch/mips/mm/c-octeon.c:99: warning: expecting prototype for Called to flush the icache on all cores(). Prototype was for octeon_flush_icache_all() instead arch/mips/mm/c-octeon.c:111: warning: expecting prototype for Called to flush all memory associated with a memory(). Prototype was for octeon_flush_cache_mm() instead arch/mips/mm/c-octeon.c:124: warning: Function parameter or member 'start' not described in 'octeon_flush_icache_range' arch/mips/mm/c-octeon.c:124: warning: Function parameter or member 'end' not described in 'octeon_flush_icache_range' arch/mips/mm/c-octeon.c:124: warning: expecting prototype for Flush a range of kernel addresses out of the icache(). Prototype was for octeon_flush_icache_range() instead arch/mips/mm/c-octeon.c:138: warning: expecting prototype for Flush a range out of a vma(). Prototype was for octeon_flush_cache_range() instead arch/mips/mm/c-octeon.c:153: warning: expecting prototype for Flush a specific page of a vma(). Prototype was for octeon_flush_cache_page() instead arch/mips/mm/c-octeon.c:164: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Probe Octeon's caches arch/mips/mm/c-octeon.c:264: warning: expecting prototype for Setup the Octeon cache flush routines(). Prototype was for octeon_cache_init() instead arch/mips/mm/c-octeon.c:349: warning: expecting prototype for Called when the the exception is not recoverable(). Prototype was for cache_parity_error_octeon_non_recoverable() instead Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Aditya Srivastava Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- arch/mips/mm/c-octeon.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 8ae181e08311..ec2ae501539a 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -30,7 +30,7 @@ unsigned long long cache_err_dcache[NR_CPUS]; EXPORT_SYMBOL_GPL(cache_err_dcache); -/** +/* * Octeon automatically flushes the dcache on tlb changes, so * from Linux's viewpoint it acts much like a physically * tagged cache. No flushing is needed @@ -56,8 +56,8 @@ static void local_octeon_flush_icache_range(unsigned long start, } /** - * Flush caches as necessary for all cores affected by a - * vma. If no vma is supplied, all cores are flushed. + * octeon_flush_icache_all_cores - Flush caches as necessary for all cores + * affected by a vma. If no vma is supplied, all cores are flushed. * * @vma: VMA to flush or NULL to flush all icaches. */ @@ -92,7 +92,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma) } -/** +/* * Called to flush the icache on all cores */ static void octeon_flush_icache_all(void) @@ -102,8 +102,7 @@ static void octeon_flush_icache_all(void) /** - * Called to flush all memory associated with a memory - * context. + * octeon_flush_cache_mm - flush all memory associated with a memory context. * * @mm: Memory context to flush */ @@ -116,7 +115,7 @@ static void octeon_flush_cache_mm(struct mm_struct *mm) } -/** +/* * Flush a range of kernel addresses out of the icache * */ @@ -127,11 +126,11 @@ static void octeon_flush_icache_range(unsigned long start, unsigned long end) /** - * Flush a range out of a vma + * octeon_flush_cache_range - Flush a range out of a vma * * @vma: VMA to flush - * @start: - * @end: + * @start: beginning address for flush + * @end: ending address for flush */ static void octeon_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -142,11 +141,11 @@ static void octeon_flush_cache_range(struct vm_area_struct *vma, /** - * Flush a specific page of a vma + * octeon_flush_cache_page - Flush a specific page of a vma * * @vma: VMA to flush page for * @page: Page to flush - * @pfn: + * @pfn: Page frame number */ static void octeon_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) @@ -160,7 +159,7 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size) BUG(); } -/** +/* * Probe Octeon's caches * */ @@ -256,7 +255,7 @@ static void octeon_cache_error_setup(void) set_handler(0x100, &except_vec2_octeon, 0x80); } -/** +/* * Setup the Octeon cache flush routines * */ @@ -341,7 +340,7 @@ asmlinkage void cache_parity_error_octeon_recoverable(void) co_cache_error_call_notifiers(0); } -/** +/* * Called when the the exception is not recoverable */ -- cgit From a86aadeff2fe9203d9575f1c157d09b3c557d1ce Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Jul 2021 11:26:12 +0100 Subject: MIPS: Alchemy: Fix spelling contraction "cant" -> "can't" There is a spelling mistake in a pr_warn message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/devboards/db1200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c index 421d651433b6..1864eb935ca5 100644 --- a/arch/mips/alchemy/devboards/db1200.c +++ b/arch/mips/alchemy/devboards/db1200.c @@ -835,7 +835,7 @@ int __init db1200_dev_setup(void) if (!IS_ERR(c)) { pfc = clk_round_rate(c, 50000000); if ((pfc < 1) || (abs(50000000 - pfc) > 2500000)) - pr_warn("DB1200: cant get I2C close to 50MHz\n"); + pr_warn("DB1200: can't get I2C close to 50MHz\n"); else clk_set_rate(c, pfc); clk_prepare_enable(c); -- cgit From a449ffaf9181b5a2dc705d8a06b13e0068207fd4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Jul 2021 12:42:31 +0100 Subject: powerpc/svm: Don't issue ultracalls if !mem_encrypt_active() Commit ad6c00283163 ("swiotlb: Free tbl memory in swiotlb_exit()") introduced a set_memory_encrypted() call to swiotlb_exit() so that the buffer pages are returned to an encrypted state prior to being freed. Sachin reports that this leads to the following crash on a Power server: [ 0.010799] software IO TLB: tearing down default memory pool [ 0.010805] ------------[ cut here ]------------ [ 0.010808] kernel BUG at arch/powerpc/kernel/interrupt.c:98! Nick spotted that this is because set_memory_encrypted() is issuing an ultracall which doesn't exist for the processor, and should therefore be gated by mem_encrypt_active() to mirror the x86 implementation. Cc: Konrad Rzeszutek Wilk Cc: Claire Chang Cc: Christoph Hellwig Cc: Robin Murphy Fixes: ad6c00283163 ("swiotlb: Free tbl memory in swiotlb_exit()") Suggested-by: Nicholas Piggin Reported-by: Sachin Sant Tested-by: Sachin Sant Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/1905CD70-7656-42AE-99E2-A31FC3812EAC@linux.vnet.ibm.com/ Signed-off-by: Will Deacon Signed-off-by: Konrad Rzeszutek Wilk --- arch/powerpc/platforms/pseries/svm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 1d829e257996..87f001b4c4e4 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -63,6 +63,9 @@ void __init svm_swiotlb_init(void) int set_memory_encrypted(unsigned long addr, int numpages) { + if (!mem_encrypt_active()) + return 0; + if (!PAGE_ALIGNED(addr)) return -EINVAL; @@ -73,6 +76,9 @@ int set_memory_encrypted(unsigned long addr, int numpages) int set_memory_decrypted(unsigned long addr, int numpages) { + if (!mem_encrypt_active()) + return 0; + if (!PAGE_ALIGNED(addr)) return -EINVAL; -- cgit From 6011cf68c88545e16cb32039c2cecfdae6a32315 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:48 +0100 Subject: KVM: arm64: Walk userspace page tables to compute the THP mapping size We currently rely on the kvm_is_transparent_hugepage() helper to discover whether a given page has the potential to be mapped as a block mapping. However, this API doesn't really give un everything we want: - we don't get the size: this is not crucial today as we only support PMD-sized THPs, but we'd like to have larger sizes in the future - we're the only user left of the API, and there is a will to remove it altogether To address the above, implement a simple walker using the existing page table infrastructure, and plumb it into transparent_hugepage_adjust(). No new page sizes are supported in the process. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210726153552.1535838-3-maz@kernel.org --- arch/arm64/kvm/mmu.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..183c107c06b2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -433,6 +433,32 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, return 0; } +static struct kvm_pgtable_mm_ops kvm_user_mm_ops = { + /* We shouldn't need any other callback to walk the PT */ + .phys_to_virt = kvm_host_va, +}; + +static int get_user_mapping_size(struct kvm *kvm, u64 addr) +{ + struct kvm_pgtable pgt = { + .pgd = (kvm_pte_t *)kvm->mm->pgd, + .ia_bits = VA_BITS, + .start_level = (KVM_PGTABLE_MAX_LEVELS - + CONFIG_PGTABLE_LEVELS), + .mm_ops = &kvm_user_mm_ops, + }; + kvm_pte_t pte = 0; /* Keep GCC quiet... */ + u32 level = ~0; + int ret; + + ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level); + VM_BUG_ON(ret); + VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS); + VM_BUG_ON(!(pte & PTE_VALID)); + + return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)); +} + static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, .zalloc_pages_exact = kvm_host_zalloc_pages_exact, @@ -780,7 +806,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, * Returns the size of the mapping. */ static unsigned long -transparent_hugepage_adjust(struct kvm_memory_slot *memslot, +transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long hva, kvm_pfn_t *pfnp, phys_addr_t *ipap) { @@ -791,8 +817,8 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, * sure that the HVA and IPA are sufficiently aligned and that the * block map is contained within the memslot. */ - if (kvm_is_transparent_hugepage(pfn) && - fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) && + get_user_mapping_size(kvm, hva) >= PMD_SIZE) { /* * The address we faulted on is backed by a transparent huge * page. However, because we map the compound huge page and @@ -1051,7 +1077,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * backed by a THP and thus use block mapping if possible. */ if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) - vma_pagesize = transparent_hugepage_adjust(memslot, hva, + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, &pfn, &fault_ipa); if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { -- cgit From f2cc327303b13a70311e823bd52aa0bca8c7ddbc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:49 +0100 Subject: KVM: arm64: Avoid mapping size adjustment on permission fault Since we only support PMD-sized mappings for THP, getting a permission fault on a level that results in a mapping being larger than PAGE_SIZE is a sure indication that we have already upgraded our mapping to a PMD. In this case, there is no need to try and parse userspace page tables, as the fault information already tells us everything. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210726153552.1535838-4-maz@kernel.org --- arch/arm64/kvm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 183c107c06b2..116a2910a4a4 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1076,9 +1076,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) - vma_pagesize = transparent_hugepage_adjust(kvm, memslot, hva, - &pfn, &fault_ipa); + if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) { + if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE) + vma_pagesize = fault_granule; + else + vma_pagesize = transparent_hugepage_adjust(kvm, memslot, + hva, &pfn, + &fault_ipa); + } if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { /* Check the VMM hasn't introduced a new VM_SHARED VMA */ -- cgit From 0fe49630101b3ce23bd21a2788440ac719ec868a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:51 +0100 Subject: KVM: arm64: Use get_page() instead of kvm_get_pfn() When mapping a THP, we are guaranteed that the page isn't reserved, and we can safely avoid the kvm_is_reserved_pfn() call. Replace kvm_get_pfn() with get_page(pfn_to_page()). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210726153552.1535838-6-maz@kernel.org --- arch/arm64/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 116a2910a4a4..6e002d30a478 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -840,7 +840,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot, *ipap &= PMD_MASK; kvm_release_pfn_clean(pfn); pfn &= ~(PTRS_PER_PMD - 1); - kvm_get_pfn(pfn); + get_page(pfn_to_page(pfn)); *pfnp = pfn; return PMD_SIZE; -- cgit From 63db506e07622c344a3c748a1c06293d48780f83 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Jul 2021 16:35:47 +0100 Subject: KVM: arm64: Introduce helper to retrieve a PTE and its level It is becoming a common need to fetch the PTE for a given address together with its level. Add such a helper. Signed-off-by: Marc Zyngier Reviewed-by: Quentin Perret Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210726153552.1535838-2-maz@kernel.org --- arch/arm64/include/asm/kvm_pgtable.h | 20 ++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 39 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index f004c0115d89..e42b55bd50a2 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -432,6 +432,26 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker); +/** + * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry + * with its level. + * @pgt: Page-table structure initialised by kvm_pgtable_*_init() + * or a similar initialiser. + * @addr: Input address for the start of the walk. + * @ptep: Pointer to storage for the retrieved PTE. + * @level: Pointer to storage for the level of the retrieved PTE. + * + * The offset of @addr within a page is ignored. + * + * The walker will walk the page-table entries corresponding to the input + * address specified, retrieving the leaf corresponding to this address. + * Invalid entries are treated as leaf entries. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, + kvm_pte_t *ptep, u32 *level); + /** * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical * Addresses with compatible permission diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 05321f4165e3..78f36bd5df6c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -326,6 +326,45 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, return _kvm_pgtable_walk(&walk_data); } +struct leaf_walk_data { + kvm_pte_t pte; + u32 level; +}; + +static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + struct leaf_walk_data *data = arg; + + data->pte = *ptep; + data->level = level; + + return 0; +} + +int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, + kvm_pte_t *ptep, u32 *level) +{ + struct leaf_walk_data data; + struct kvm_pgtable_walker walker = { + .cb = leaf_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = &data, + }; + int ret; + + ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE), + PAGE_SIZE, &walker); + if (!ret) { + if (ptep) + *ptep = data.pte; + if (level) + *level = data.level; + } + + return ret; +} + struct hyp_map_data { u64 phys; kvm_pte_t attr; -- cgit From 0ab410a93d627ae73136d1a52c096262360b7992 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jul 2021 13:38:59 +0100 Subject: KVM: arm64: Narrow PMU sysreg reset values to architectural requirements A number of the PMU sysregs expose reset values that are not compliant with the architecture (set bits in the RES0 ranges, for example). This in turn has the effect that we need to pointlessly mask some register fields when using them. Let's start by making sure we don't have illegal values in the shadow registers at reset time. This affects all the registers that dedicate one bit per counter, the counters themselves, PMEVTYPERn_EL0 and PMSELR_EL0. Reported-by: Alexandre Chartre Reviewed-by: Alexandre Chartre Acked-by: Russell King (Oracle) Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20210719123902.1493805-2-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6f126eb6ac1..96bdfa0e68b2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -603,6 +603,41 @@ static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN; } +static void reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + u64 n, mask = BIT(ARMV8_PMU_CYCLE_IDX); + + /* No PMU available, any PMU reg may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + + n = read_sysreg(pmcr_el0) >> ARMV8_PMU_PMCR_N_SHIFT; + n &= ARMV8_PMU_PMCR_N_MASK; + if (n) + mask |= GENMASK(n - 1, 0); + + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= mask; +} + +static void reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); +} + +static void reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_EVTYPE_MASK; +} + +static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) +{ + reset_unknown(vcpu, r); + __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK; +} + static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; @@ -944,16 +979,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } #define PMU_SYS_REG(r) \ - SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility + SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \ + .reset = reset_pmevcntr, \ .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ #define PMU_PMEVTYPER_EL0(n) \ { PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \ + .reset = reset_pmevtyper, \ .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), } static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -1595,13 +1632,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { { PMU_SYS_REG(SYS_PMSWINC_EL0), .access = access_pmswinc, .reg = PMSWINC_EL0 }, { PMU_SYS_REG(SYS_PMSELR_EL0), - .access = access_pmselr, .reg = PMSELR_EL0 }, + .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, { PMU_SYS_REG(SYS_PMCEID0_EL0), .access = access_pmceid, .reset = NULL }, { PMU_SYS_REG(SYS_PMCEID1_EL0), .access = access_pmceid, .reset = NULL }, { PMU_SYS_REG(SYS_PMCCNTR_EL0), - .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 }, + .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 }, { PMU_SYS_REG(SYS_PMXEVTYPER_EL0), .access = access_pmu_evtyper, .reset = NULL }, { PMU_SYS_REG(SYS_PMXEVCNTR_EL0), -- cgit From f5eff40058a856c23c5ec2f31756f107a2b1ef84 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jul 2021 13:39:00 +0100 Subject: KVM: arm64: Drop unnecessary masking of PMU registers We always sanitise our PMU sysreg on the write side, so there is no need to do it on the read side as well. Drop the unnecessary masking. Acked-by: Russell King (Oracle) Reviewed-by: Alexandre Chartre Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210719123902.1493805-3-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 3 +-- arch/arm64/kvm/sys_regs.c | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f33825c995cb..fae4e95b586c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -373,7 +373,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); - reg &= kvm_pmu_valid_counter_mask(vcpu); } return reg; @@ -569,7 +568,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); } else { kvm_pmu_disable_counter_mask(vcpu, mask); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 96bdfa0e68b2..f22139658e48 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -880,7 +880,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, kvm_pmu_disable_counter_mask(vcpu, val); } } else { - p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); } return true; @@ -904,7 +904,7 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* accessing PMINTENCLR_EL1 */ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; } else { - p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } return true; @@ -926,7 +926,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* accessing PMOVSCLR_EL0 */ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); } else { - p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; + p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } return true; -- cgit From ca4f202d08ba7f24cc97dce14c6d20ec7a679135 Mon Sep 17 00:00:00 2001 From: Alexandre Chartre Date: Mon, 19 Jul 2021 13:39:01 +0100 Subject: KVM: arm64: Disabling disabled PMU counters wastes a lot of time In a KVM guest on arm64, performance counters interrupts have an unnecessary overhead which slows down execution when using the "perf record" command and limits the "perf record" sampling period. The problem is that when a guest VM disables counters by clearing the PMCR_EL0.E bit (bit 0), KVM will disable all counters defined in PMCR_EL0 even if they are not enabled in PMCNTENSET_EL0. KVM disables a counter by calling into the perf framework, in particular by calling perf_event_create_kernel_counter() which is a time consuming operation. So, for example, with a Neoverse N1 CPU core which has 6 event counters and one cycle counter, KVM will always disable all 7 counters even if only one is enabled. This typically happens when using the "perf record" command in a guest VM: perf will disable all event counters with PMCNTENTSET_EL0 and only uses the cycle counter. And when using the "perf record" -F option with a high profiling frequency, the overhead of KVM disabling all counters instead of one on every counter interrupt becomes very noticeable. The problem is fixed by having KVM disable only counters which are enabled in PMCNTENSET_EL0. If a counter is not enabled in PMCNTENSET_EL0 then KVM will not enable it when setting PMCR_EL0.E and it will remain disabled as long as it is not enabled in PMCNTENSET_EL0. So there is effectively no need to disable a counter when clearing PMCR_EL0.E if it is not enabled PMCNTENSET_EL0. Acked-by: Russell King (Oracle) Reviewed-by: Alexandru Elisei Signed-off-by: Alexandre Chartre [maz: moved 'mask' close to the actual user, simplifying the patch] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210712170345.660272-1-alexandre.chartre@oracle.com Link: https://lore.kernel.org/r/20210719123902.1493805-4-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fae4e95b586c..dc65b58dc68f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -563,20 +563,21 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) */ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) { - unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); int i; if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); } else { - kvm_pmu_disable_counter_mask(vcpu, mask); + kvm_pmu_disable_counter_mask(vcpu, + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); } if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); -- cgit From 7a3ba3095a32f9c4ec8f30d680fea5150e12c3f3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Jul 2021 13:39:02 +0100 Subject: KVM: arm64: Remove PMSWINC_EL0 shadow register We keep an entry for the PMSWINC_EL0 register in the vcpu structure, while *never* writing anything there outside of reset. Given that the register is defined as write-only, that we always trap when this register is accessed, there is little point in saving anything anyway. Get rid of the entry, and save a mighty 8 bytes per vcpu structure. We still need to keep it exposed to userspace in order to preserve backward compatibility with previously saved VMs. Since userspace cannot expect any effect of writing to PMSWINC_EL0, treat the register as RAZ/WI for the purpose of userspace access. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210719123902.1493805-5-maz@kernel.org --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/sys_regs.c | 21 ++++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 41911585ae0c..afc169630884 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -185,7 +185,6 @@ enum vcpu_sysreg { PMCNTENSET_EL0, /* Count Enable Set Register */ PMINTENSET_EL1, /* Interrupt Enable Set Register */ PMOVSSET_EL0, /* Overflow Flag Status Set Register */ - PMSWINC_EL0, /* Software Increment Register */ PMUSERENR_EL0, /* User Enable Register */ /* Pointer Authentication Registers in a strict increasing order. */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f22139658e48..a1f5101f49a3 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1286,6 +1286,20 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, true); } +static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + int err; + u64 val; + + /* Perform the access even if we are going to ignore the value */ + err = reg_from_user(&val, uaddr, sys_reg_to_index(rd)); + if (err) + return err; + + return 0; +} + static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { @@ -1629,8 +1643,13 @@ static const struct sys_reg_desc sys_reg_descs[] = { .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, { PMU_SYS_REG(SYS_PMOVSCLR_EL0), .access = access_pmovs, .reg = PMOVSSET_EL0 }, + /* + * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was + * previously (and pointlessly) advertised in the past... + */ { PMU_SYS_REG(SYS_PMSWINC_EL0), - .access = access_pmswinc, .reg = PMSWINC_EL0 }, + .get_user = get_raz_id_reg, .set_user = set_wi_reg, + .access = access_pmswinc, .reset = NULL }, { PMU_SYS_REG(SYS_PMSELR_EL0), .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, { PMU_SYS_REG(SYS_PMCEID0_EL0), -- cgit From 673692735fdc40ed7da32c0cb3517adaf4227b2b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jul 2021 15:04:25 -0700 Subject: KVM: x86: Use KVM_BUG/KVM_BUG_ON to handle bugs that are fatal to the VM Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Paolo Bonzini Message-Id: <0e8760a26151f47dc47052b25ca8b84fffe0641e.1625186503.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++--------- arch/x86/kvm/x86.c | 4 ++++ 3 files changed, 19 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e8ccab50ebf6..4ce6d827fccd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1560,7 +1560,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); break; default: - WARN_ON_ONCE(1); + KVM_BUG_ON(1, vcpu->kvm); } } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 927a552393b9..fb1ac33a9902 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2274,7 +2274,7 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits; break; default: - WARN_ON_ONCE(1); + KVM_BUG_ON(1, vcpu->kvm); break; } } @@ -4996,6 +4996,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) return kvm_complete_insn_gp(vcpu, err); case 3: WARN_ON_ONCE(enable_unrestricted_guest); + err = kvm_set_cr3(vcpu, val); return kvm_complete_insn_gp(vcpu, err); case 4: @@ -5021,14 +5022,13 @@ static int handle_cr(struct kvm_vcpu *vcpu) } break; case 2: /* clts */ - WARN_ONCE(1, "Guest should always own CR0.TS"); - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); - trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); - return kvm_skip_emulated_instruction(vcpu); + KVM_BUG(1, vcpu->kvm, "Guest always owns CR0.TS"); + return -EIO; case 1: /*mov from cr*/ switch (cr) { case 3: WARN_ON_ONCE(enable_unrestricted_guest); + val = kvm_read_cr3(vcpu); kvm_register_write(vcpu, reg, val); trace_kvm_cr_read(cr, val); @@ -5338,7 +5338,9 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { - WARN_ON_ONCE(!enable_vnmi); + if (KVM_BUG_ON(!enable_vnmi, vcpu->kvm)) + return -EIO; + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); ++vcpu->stat.nmi_window_exits; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5896,7 +5898,8 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) * below) should never happen as that means we incorrectly allowed a * nested VM-Enter with an invalid vmcs12. */ - WARN_ON_ONCE(vmx->nested.nested_run_pending); + if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm)) + return -EIO; /* If guest state is invalid, start emulating */ if (vmx->emulation_required) @@ -6274,7 +6277,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) int max_irr; bool max_irr_updated; - WARN_ON(!vcpu->arch.apicv_active); + if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm)) + return -EIO; + if (pi_test_on(&vmx->pi_desc)) { pi_clear_on(&vmx->pi_desc); /* @@ -6357,7 +6362,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; gate_desc *desc = (gate_desc *)host_idt_base + vector; - if (WARN_ONCE(!is_external_intr(intr_info), + if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm, "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5d5c5ed7dd4..338844b82c4c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9395,6 +9395,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) { + r = -EIO; + goto out; + } if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; -- cgit From 19025e7bc5977b35c73bea99841245f93470105e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jul 2021 15:04:26 -0700 Subject: KVM: x86/mmu: Mark VM as bugged if page fault returns RET_PF_INVALID Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Paolo Bonzini Message-Id: <298980aa5fc5707184ac082287d13a800cd9c25f.1625186503.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..ce1d955a0536 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5134,7 +5134,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, if (r == RET_PF_INVALID) { r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); - if (WARN_ON_ONCE(r == RET_PF_INVALID)) + if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm)) return -EIO; } -- cgit From e489a4a6bddb83385cb3d896776c5a7b2ec653d8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jul 2021 15:04:29 -0700 Subject: KVM: x86: Hoist kvm_dirty_regs check out of sync_regs() Move the kvm_dirty_regs vs. KVM_SYNC_X86_VALID_FIELDS check out of sync_regs() and into its sole caller, kvm_arch_vcpu_ioctl_run(). This allows a future patch to allow synchronizing select state for protected VMs. Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Paolo Bonzini Message-Id: <889017a8d31cea46472e0c64b234ef5919278ed9.1625186503.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 338844b82c4c..e906c05e530e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9980,7 +9980,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) goto out; } - if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { + if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) || + (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) { r = -EINVAL; goto out; } @@ -10585,9 +10586,6 @@ static void store_regs(struct kvm_vcpu *vcpu) static int sync_regs(struct kvm_vcpu *vcpu) { - if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) - return -EINVAL; - if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { __set_regs(vcpu, &vcpu->run->s.regs.regs); vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; -- cgit From 03fffc5493c8c8d850586df5740c985026c313bf Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jul 2021 15:04:50 -0700 Subject: KVM: x86/mmu: Refactor shadow walk in __direct_map() to reduce indentation Employ a 'continue' to reduce the indentation for linking a new shadow page during __direct_map() in preparation for linking private pages. Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Paolo Bonzini Message-Id: <702419686d5700373123f6ea84e7a946c2cad8b4.1625186503.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ce1d955a0536..85c70492f863 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2939,15 +2939,16 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, break; drop_large_spte(vcpu, it.sptep); - if (!is_shadow_present_pte(*it.sptep)) { - sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, - it.level - 1, true, ACC_ALL); - - link_shadow_page(vcpu, it.sptep, sp); - if (is_tdp && huge_page_disallowed && - req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); - } + if (is_shadow_present_pte(*it.sptep)) + continue; + + sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, + it.level - 1, true, ACC_ALL); + + link_shadow_page(vcpu, it.sptep, sp); + if (is_tdp && huge_page_disallowed && + req_level >= it.level) + account_huge_nx_page(vcpu->kvm, sp); } ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, -- cgit From 7fa2a347512ab06ea1558302564879c060d52b0c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jul 2021 15:04:51 -0700 Subject: KVM: x86/mmu: Return old SPTE from mmu_spte_clear_track_bits() Return the old SPTE when clearing a SPTE and push the "old SPTE present" check to the caller. Private shadow page support will use the old SPTE in rmap_remove() to determine whether or not there is a linked private shadow page. Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Paolo Bonzini Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 85c70492f863..d9e3ac6dda05 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -592,9 +592,9 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) * Rules for using mmu_spte_clear_track_bits: * It sets the sptep from present to nonpresent, and track the * state bits, it is used to clear the last level sptep. - * Returns non-zero if the PTE was previously valid. + * Returns the old PTE. */ -static int mmu_spte_clear_track_bits(u64 *sptep) +static u64 mmu_spte_clear_track_bits(u64 *sptep) { kvm_pfn_t pfn; u64 old_spte = *sptep; @@ -605,7 +605,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) old_spte = __update_clear_spte_slow(sptep, 0ull); if (!is_shadow_present_pte(old_spte)) - return 0; + return old_spte; pfn = spte_to_pfn(old_spte); @@ -622,7 +622,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) if (is_dirty_spte(old_spte)) kvm_set_pfn_dirty(pfn); - return 1; + return old_spte; } /* @@ -1119,7 +1119,9 @@ out: static void drop_spte(struct kvm *kvm, u64 *sptep) { - if (mmu_spte_clear_track_bits(sptep)) + u64 old_spte = mmu_spte_clear_track_bits(sptep); + + if (is_shadow_present_pte(old_spte)) rmap_remove(kvm, sptep); } -- cgit From ec1cf69c376970f42761e641cf5074b84f8db243 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 25 Jun 2021 11:32:06 -0400 Subject: KVM: X86: Add per-vm stat for max rmap list size Add a new statistic max_mmu_rmap_size, which stores the maximum size of rmap for the vm. Signed-off-by: Peter Xu Message-Id: <20210625153214.43106-2-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu/mmu.c | 2 ++ arch/x86/kvm/x86.c | 1 + 3 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 974cbfb1eefe..d798650ad793 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1209,6 +1209,7 @@ struct kvm_vm_stat { u64 lpages; u64 nx_lpage_splits; u64 max_mmu_page_hash_collisions; + u64 max_mmu_rmap_size; }; struct kvm_vcpu_stat { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d9e3ac6dda05..0b2954b5fbc6 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2698,6 +2698,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (is_shadow_present_pte(*sptep)) { if (!was_rmapped) { rmap_count = rmap_add(vcpu, sptep, gfn); + if (rmap_count > vcpu->kvm->stat.max_mmu_rmap_size) + vcpu->kvm->stat.max_mmu_rmap_size = rmap_count; if (rmap_count > RMAP_RECYCLE_THRESHOLD) rmap_recycle(vcpu, sptep, gfn); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e906c05e530e..ab200f2cb1f0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -235,6 +235,7 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_ICOUNTER(VM, mmu_unsync), STATS_DESC_ICOUNTER(VM, lpages), STATS_DESC_ICOUNTER(VM, nx_lpage_splits), + STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size), STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions) }; static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == -- cgit From 38f703663d4c82ead5b51b8860deeef19d6dcb6d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 27 Jul 2021 12:32:51 +0200 Subject: KVM: arm64: Count VMID-wide TLB invalidations KVM/ARM has an architecture-specific implementation of kvm_flush_remote_tlbs; however, unlike the generic one, it does not count the flushes in kvm->stat.remote_tlb_flush, so that it inexorably remained stuck to zero. Signed-off-by: Paolo Bonzini Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210727103251.16561-1-pbonzini@redhat.com --- arch/arm64/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..2ca0a494a111 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -81,6 +81,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) void kvm_flush_remote_tlbs(struct kvm *kvm) { kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); + ++kvm->stat.generic.remote_tlb_flush; } static bool kvm_is_device_pfn(unsigned long pfn) -- cgit From 013cc4c6788f1ce9885d3c0281904f93ee8f2271 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 28 Jul 2021 21:06:23 +0800 Subject: KVM: arm64: Fix comments related to GICv2 PMR reporting Remove the repeated word 'the' from two comments. Signed-off-by: Jason Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210728130623.12017-1-wangborong@cdjrlc.com --- arch/arm64/kvm/vgic/vgic-mmio-v2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index a016f07adc28..5f9014ae595b 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -282,7 +282,7 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, case GIC_CPU_PRIMASK: /* * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the - * the PMR field as GICH_VMCR.VMPriMask rather than + * PMR field as GICH_VMCR.VMPriMask rather than * GICC_PMR.Priority, so we expose the upper five bits of * priority mask to userspace using the lower bits in the * unsigned long. @@ -329,7 +329,7 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, case GIC_CPU_PRIMASK: /* * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the - * the PMR field as GICH_VMCR.VMPriMask rather than + * PMR field as GICH_VMCR.VMPriMask rather than * GICC_PMR.Priority, so we expose the upper five bits of * priority mask to userspace using the lower bits in the * unsigned long. -- cgit From 1694caef426247bb406c2b04672336ef77b5fb87 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 1 Jul 2021 17:41:01 +0200 Subject: x86/kvm: remove non-x86 stuff from arch/x86/kvm/ioapic.h The file has been moved to arch/x86 long time ago. Time to get rid of non-x86 stuff. Signed-off-by: Juergen Gross Message-Id: <20210701154105.23215-3-jgross@suse.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 11e4065e1617..bbd4a5d18b5d 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -35,11 +35,7 @@ struct kvm_vcpu; #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 -#ifdef CONFIG_X86 #define RTC_GSI 8 -#else -#define RTC_GSI -1U -#endif struct dest_map { /* vcpu bitmap where IRQ has been sent */ -- cgit From 76cd325ea75bb2a84c329782d7b8015b6a970c34 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:52 +0000 Subject: KVM: x86/mmu: Rename cr2_or_gpa to gpa in fast_page_fault fast_page_fault is only called from direct_page_fault where we know the address is a gpa. Fixes: 736c291c9f36 ("KVM: x86: Use gpa_t for cr2/gpa to fix TDP support on 32-bit KVM") Reviewed-by: Ben Gardon Reviewed-by: Sean Christopherson Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-2-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0b2954b5fbc6..6f5910b7b9bc 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3103,8 +3103,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) /* * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS. */ -static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - u32 error_code) +static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -3120,7 +3119,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) + for_each_shadow_entry_lockless(vcpu, gpa, iterator, spte) if (!is_shadow_present_pte(spte)) break; @@ -3199,8 +3198,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, } while (true); - trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, - spte, ret); + trace_fast_page_fault(vcpu, gpa, error_code, iterator.sptep, spte, ret); walk_shadow_page_lockless_end(vcpu); return ret; -- cgit From 61bcd360aa9851cec969b7b40f23fd1faa7f85a4 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:53 +0000 Subject: KVM: x86/mmu: Fix use of enums in trace_fast_page_fault Enum values have to be exported to userspace since the formatting is not done in the kernel. Without doing this perf maps RET_PF_FIXED and RET_PF_SPURIOUS to 0, which results in incorrect output: $ perf record -a -e kvmmmu:fast_page_fault --filter "ret==3" -- ./access_tracking_perf_test $ perf script | head -1 [...] new 610006048d25877 spurious 0 fixed 0 <------ should be 1 Fix this by exporting the enum values to userspace with TRACE_DEFINE_ENUM. Fixes: c4371c2a682e ("KVM: x86/mmu: Return unique RET_PF_* values if the fault was fixed") Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu_internal.h | 3 +++ arch/x86/kvm/mmu/mmutrace.h | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 35567293c1fd..626cb848dab4 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -140,6 +140,9 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, * RET_PF_INVALID: the spte is invalid, let the real page fault path update it. * RET_PF_FIXED: The faulting entry has been fixed. * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU. + * + * Any names added to this enum should be exported to userspace for use in + * tracepoints via TRACE_DEFINE_ENUM() in mmutrace.h */ enum { RET_PF_RETRY = 0, diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h index efbad33a0645..2924a4081a19 100644 --- a/arch/x86/kvm/mmu/mmutrace.h +++ b/arch/x86/kvm/mmu/mmutrace.h @@ -54,6 +54,12 @@ { PFERR_RSVD_MASK, "RSVD" }, \ { PFERR_FETCH_MASK, "F" } +TRACE_DEFINE_ENUM(RET_PF_RETRY); +TRACE_DEFINE_ENUM(RET_PF_EMULATE); +TRACE_DEFINE_ENUM(RET_PF_INVALID); +TRACE_DEFINE_ENUM(RET_PF_FIXED); +TRACE_DEFINE_ENUM(RET_PF_SPURIOUS); + /* * A pagetable walk has started */ -- cgit From c5c8c7c53004cb70715320018c3b4287071c1cfd Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:54 +0000 Subject: KVM: x86/mmu: Make walk_shadow_page_lockless_{begin,end} interoperate with the TDP MMU Acquire the RCU read lock in walk_shadow_page_lockless_begin and release it in walk_shadow_page_lockless_end when the TDP MMU is enabled. This should not introduce any functional changes but is used in the following commit to make fast_page_fault interoperate with the TDP MMU. Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-4-dmatlack@google.com> [Use if...else instead of if(){return;}] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 52 +++++++++++++++++++++++++++------------------- arch/x86/kvm/mmu/tdp_mmu.c | 6 ++---- arch/x86/kvm/mmu/tdp_mmu.h | 10 +++++++++ 3 files changed, 43 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f5910b7b9bc..d5b0c8b0e9e9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -686,28 +686,36 @@ static bool mmu_spte_age(u64 *sptep) static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { - /* - * Prevent page table teardown by making any free-er wait during - * kvm_flush_remote_tlbs() IPI to all active vcpus. - */ - local_irq_disable(); + if (is_tdp_mmu(vcpu->arch.mmu)) { + kvm_tdp_mmu_walk_lockless_begin(); + } else { + /* + * Prevent page table teardown by making any free-er wait during + * kvm_flush_remote_tlbs() IPI to all active vcpus. + */ + local_irq_disable(); - /* - * Make sure a following spte read is not reordered ahead of the write - * to vcpu->mode. - */ - smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); + /* + * Make sure a following spte read is not reordered ahead of the write + * to vcpu->mode. + */ + smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); + } } static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { - /* - * Make sure the write to vcpu->mode is not reordered in front of - * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us - * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. - */ - smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); - local_irq_enable(); + if (is_tdp_mmu(vcpu->arch.mmu)) { + kvm_tdp_mmu_walk_lockless_end(); + } else { + /* + * Make sure the write to vcpu->mode is not reordered in front of + * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us + * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. + */ + smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); + local_irq_enable(); + } } static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect) @@ -3617,6 +3625,8 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) /* * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. + * + * Must be called between walk_shadow_page_lockless_{begin,end}. */ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { @@ -3624,8 +3634,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level int leaf = -1; u64 spte; - walk_shadow_page_lockless_begin(vcpu); - for (shadow_walk_init(&iterator, vcpu, addr), *root_level = iterator.level; shadow_walk_okay(&iterator); @@ -3639,8 +3647,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level break; } - walk_shadow_page_lockless_end(vcpu); - return leaf; } @@ -3652,11 +3658,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) int root, leaf, level; bool reserved = false; + walk_shadow_page_lockless_begin(vcpu); + if (is_tdp_mmu(vcpu->arch.mmu)) leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else leaf = get_walk(vcpu, addr, sptes, &root); + walk_shadow_page_lockless_end(vcpu); + if (unlikely(leaf < 0)) { *sptep = 0ull; return reserved; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0853370bd811..228f0cc5e2cf 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1516,6 +1516,8 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, /* * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. + * + * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}. */ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) @@ -1527,14 +1529,10 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, *root_level = vcpu->arch.mmu->shadow_root_level; - rcu_read_lock(); - tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; sptes[leaf] = iter.old_spte; } - rcu_read_unlock(); - return leaf; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 1cae4485b3bc..93e1bf5089c4 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -77,6 +77,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int min_level); +static inline void kvm_tdp_mmu_walk_lockless_begin(void) +{ + rcu_read_lock(); +} + +static inline void kvm_tdp_mmu_walk_lockless_end(void) +{ + rcu_read_unlock(); +} + int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level); -- cgit From 6e8eb2060cc7fbc4d388d0ab70502e265aec98c6 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 13 Jul 2021 22:09:55 +0000 Subject: KVM: x86/mmu: fast_page_fault support for the TDP MMU Make fast_page_fault interoperate with the TDP MMU by leveraging walk_shadow_page_lockless_{begin,end} to acquire the RCU read lock and introducing a new helper function kvm_tdp_mmu_fast_pf_get_last_sptep to grab the lowest level sptep. Suggested-by: Ben Gardon Signed-off-by: David Matlack Message-Id: <20210713220957.3493520-5-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 50 ++++++++++++++++++++++++++++++++++------------ arch/x86/kvm/mmu/tdp_mmu.c | 41 +++++++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.h | 2 ++ 3 files changed, 80 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d5b0c8b0e9e9..29010abb659c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3108,15 +3108,41 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) return spte & PT_PRESENT_MASK; } +/* + * Returns the last level spte pointer of the shadow page walk for the given + * gpa, and sets *spte to the spte value. This spte may be non-preset. If no + * walk could be performed, returns NULL and *spte does not contain valid data. + * + * Contract: + * - Must be called between walk_shadow_page_lockless_{begin,end}. + * - The returned sptep must not be used after walk_shadow_page_lockless_end. + */ +static u64 *fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gpa_t gpa, u64 *spte) +{ + struct kvm_shadow_walk_iterator iterator; + u64 old_spte; + u64 *sptep = NULL; + + for_each_shadow_entry_lockless(vcpu, gpa, iterator, old_spte) { + sptep = iterator.sptep; + *spte = old_spte; + + if (!is_shadow_present_pte(old_spte)) + break; + } + + return sptep; +} + /* * Returns one of RET_PF_INVALID, RET_PF_FIXED or RET_PF_SPURIOUS. */ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) { - struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; int ret = RET_PF_INVALID; u64 spte = 0ull; + u64 *sptep = NULL; uint retry_count = 0; if (!page_fault_can_be_fast(error_code)) @@ -3127,14 +3153,15 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gpa, iterator, spte) - if (!is_shadow_present_pte(spte)) - break; + if (is_tdp_mmu(vcpu->arch.mmu)) + sptep = kvm_tdp_mmu_fast_pf_get_last_sptep(vcpu, gpa, &spte); + else + sptep = fast_pf_get_last_sptep(vcpu, gpa, &spte); if (!is_shadow_present_pte(spte)) break; - sp = sptep_to_sp(iterator.sptep); + sp = sptep_to_sp(sptep); if (!is_last_spte(spte, sp->role.level)) break; @@ -3192,8 +3219,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) * since the gfn is not stable for indirect shadow page. See * Documentation/virt/kvm/locking.rst to get more detail. */ - if (fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte, - new_spte)) { + if (fast_pf_fix_direct_spte(vcpu, sp, sptep, spte, new_spte)) { ret = RET_PF_FIXED; break; } @@ -3206,7 +3232,7 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code) } while (true); - trace_fast_page_fault(vcpu, gpa, error_code, iterator.sptep, spte, ret); + trace_fast_page_fault(vcpu, gpa, error_code, sptep, spte, ret); walk_shadow_page_lockless_end(vcpu); return ret; @@ -3841,11 +3867,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, if (page_fault_handle_page_track(vcpu, error_code, gfn)) return RET_PF_EMULATE; - if (!is_tdp_mmu_fault) { - r = fast_page_fault(vcpu, gpa, error_code); - if (r != RET_PF_INVALID) - return r; - } + r = fast_page_fault(vcpu, gpa, error_code); + if (r != RET_PF_INVALID) + return r; r = mmu_topup_memory_caches(vcpu, false); if (r) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 228f0cc5e2cf..88aff5d0512c 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -527,6 +527,10 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm, if (is_removed_spte(iter->old_spte)) return false; + /* + * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and + * does not hold the mmu_lock. + */ if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, new_spte) != iter->old_spte) return false; @@ -1536,3 +1540,40 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, return leaf; } + +/* + * Returns the last level spte pointer of the shadow page walk for the given + * gpa, and sets *spte to the spte value. This spte may be non-preset. If no + * walk could be performed, returns NULL and *spte does not contain valid data. + * + * Contract: + * - Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}. + * - The returned sptep must not be used after kvm_tdp_mmu_walk_lockless_end. + * + * WARNING: This function is only intended to be called during fast_page_fault. + */ +u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, + u64 *spte) +{ + struct tdp_iter iter; + struct kvm_mmu *mmu = vcpu->arch.mmu; + gfn_t gfn = addr >> PAGE_SHIFT; + tdp_ptep_t sptep = NULL; + + tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { + *spte = iter.old_spte; + sptep = iter.sptep; + } + + /* + * Perform the rcu_dereference to get the raw spte pointer value since + * we are passing it up to fast_page_fault, which is shared with the + * legacy MMU and thus does not retain the TDP MMU-specific __rcu + * annotation. + * + * This is safe since fast_page_fault obeys the contracts of this + * function as well as all TDP MMU contracts around modifying SPTEs + * outside of mmu_lock. + */ + return rcu_dereference(sptep); +} diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 93e1bf5089c4..361b47f98cc5 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -89,6 +89,8 @@ static inline void kvm_tdp_mmu_walk_lockless_end(void) int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level); +u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, + u64 *spte); #ifdef CONFIG_X86_64 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm); -- cgit From 71ba3f3189c78f756a659568fb473600fd78f207 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 26 Jul 2021 12:30:52 -0400 Subject: KVM: x86: enable TDP MMU by default With the addition of fast page fault support, the TDP-specific MMU has reached feature parity with the original MMU. All my testing in the last few months has been done with the TDP MMU; switch the default on 64-bit machines. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 88aff5d0512c..41cee1d22918 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -10,7 +10,7 @@ #include #include -static bool __read_mostly tdp_mmu_enabled = false; +static bool __read_mostly tdp_mmu_enabled = true; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); /* Initializes the TDP MMU for the VM, if enabled. */ -- cgit From df63202fe52bd1583ce1418c755a81d6d41af3b9 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 13 Jul 2021 17:20:19 +0300 Subject: KVM: x86: APICv: drop immediate APICv disablement on current vCPU Special case of disabling the APICv on the current vCPU right away in kvm_request_apicv_update doesn't bring much benefit vs raising KVM_REQ_APICV_UPDATE on it instead, since this request will be processed on the next entry to the guest. (the comment about having another #VMEXIT is wrong). It also hides various assumptions that APIVc enable state matches the APICv inhibit state, as this special case only makes those states match on the current vCPU. Previous patches fixed few such assumptions so now it should be safe to drop this special case. Signed-off-by: Maxim Levitsky Message-Id: <20210713142023.106183-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ab200f2cb1f0..54fdadacb27f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9264,7 +9264,6 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); */ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) { - struct kvm_vcpu *except; unsigned long old, new, expected; if (!kvm_x86_ops.check_apicv_inhibit_reasons || @@ -9290,16 +9289,7 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) if (kvm_x86_ops.pre_update_apicv_exec_ctrl) static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate); - /* - * Sending request to update APICV for all other vcpus, - * while update the calling vcpu immediately instead of - * waiting for another #VMEXIT to handle the request. - */ - except = kvm_get_running_vcpu(); - kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE, - except); - if (except) - kvm_vcpu_update_apicv(except); + kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); -- cgit From df37ed38e6c26064d5f97ebbe5cacc75eeb89153 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:39 -0700 Subject: KVM: x86: Flush the guest's TLB on INIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flush the guest's TLB on INIT, as required by Intel's SDM. Although AMD's APM states that the TLBs are unchanged by INIT, it's not clear that that's correct as the APM also states that the TLB is flush on "External initialization of the processor." Regardless, relying on the guest to be paranoid is unnecessarily risky, while an unnecessary flush is benign from a functional perspective and likely has no measurable impact on guest performance. Note, as of the April 2021 version of Intels' SDM, it also contradicts itself with respect to TLB flushing. The overview of INIT explicitly calls out the TLBs as being invalidated, while a table later in the same section says they are unchanged. 9.1 INITIALIZATION OVERVIEW: The major difference is that during an INIT, the internal caches, MSRs, MTRRs, and x87 FPU state are left unchanged (although, the TLBs and BTB are invalidated as with a hardware reset) Table 9-1: Register Power up Reset INIT Data and Code Cache, TLBs: Invalid[6] Invalid[6] Unchanged Given Core2's erratum[*] about global TLB entries not being flush on INIT, it's safe to assume that the table is simply wrong. AZ28. INIT Does Not Clear Global Entries in the TLB Problem: INIT may not flush a TLB entry when: • The processor is in protected mode with paging enabled and the page global enable flag is set (PGE bit of CR4 register) • G bit for the page table entry is set • TLB entry is present in TLB when INIT occurs • Software may encounter unexpected page fault or incorrect address translation due to a TLB entry erroneously left in TLB after INIT. Workaround: Write to CR3, CR4 (setting bits PSE, PGE or PAE) or CR0 (setting bits PG or PE) registers before writing to memory early in BIOS code to clear all the global entries from TLB. Status: For the steppings affected, see the Summary Tables of Changes. [*] https://www.intel.com/content/dam/support/us/en/documents/processors/mobile/celeron/sb/320121.pdf Fixes: 6aa8b732ca01 ("[PATCH] kvm: userspace interface") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 54fdadacb27f..164ecca7769e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10872,6 +10872,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) */ if (old_cr0 & X86_CR0_PG) kvm_mmu_reset_context(vcpu); + + /* + * Intel's SDM states that all TLB entries are flushed on INIT. AMD's + * APM states the TLBs are untouched by INIT, but it also states that + * the TLBs are flushed on "External initialization of the processor." + * Flush the guest TLB regardless of vendor, there is no meaningful + * benefit in relying on the guest to flush the TLB immediately after + * INIT. A spurious TLB flush is benign and likely negligible from a + * performance perspective. + */ + if (init_event) + kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) -- cgit From afc8de0118be84f4058b9977d481aeb3e0758dbb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:40 -0700 Subject: KVM: nVMX: Set LDTR to its architecturally defined value on nested VM-Exit Set L1's LDTR on VM-Exit per the Intel SDM: The host-state area does not contain a selector field for LDTR. LDTR is established as follows on all VM exits: the selector is cleared to 0000H, the segment is marked unusable and is otherwise undefined (although the base address is always canonical). This is likely a benign bug since the LDTR is unusable, as it means the L1 VMM is conditioned to reload its LDTR in order to function properly on bare metal. Fixes: 4704d0befb07 ("KVM: nVMX: Exiting from L2 to L1") Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a52134b0c42..7f8184f432b4 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4298,6 +4298,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, }; vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); + memset(&seg, 0, sizeof(seg)); + seg.unusable = 1; + vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); + kvm_set_dr(vcpu, 7, 0x400); vmcs_write64(GUEST_IA32_DEBUGCTL, 0); -- cgit From 4f117ce4aefca0e90cd44680219d4c261c1381b9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:41 -0700 Subject: KVM: SVM: Zero out GDTR.base and IDTR.base on INIT Explicitly set GDTR.base and IDTR.base to zero when intializing the VMCB. Functionally this only affects INIT, as the bases are implicitly set to zero on RESET by virtue of the VMCB being zero allocated. Per AMD's APM, GDTR.base and IDTR.base are zeroed after RESET and INIT. Fixes: 04d2cc7780d4 ("KVM: Move main vcpu loop into subarch independent code") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4ce6d827fccd..7845232b6fb6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1241,7 +1241,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu) SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; save->cs.limit = 0xffff; + save->gdtr.base = 0; save->gdtr.limit = 0xffff; + save->idtr.base = 0; save->idtr.limit = 0xffff; init_sys_seg(&save->ldtr, SEG_TYPE_LDT); -- cgit From 2a24be79b6b7061a486239c3a3489eb67b9587f6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:42 -0700 Subject: KVM: VMX: Set EDX at INIT with CPUID.0x1, Family-Model-Stepping Set EDX at RESET/INIT based on the userspace-defined CPUID model when possible, i.e. when CPUID.0x1.EAX is defind by userspace. At RESET/INIT, all CPUs that support CPUID set EDX to the FMS enumerated in CPUID.0x1.EAX. If no CPUID match is found, fall back to KVM's default of 0x600 (Family '6'), which is the least awful approximation of KVM's virtual CPU model. Fixes: 6aa8b732ca01 ("[PATCH] kvm: userspace interface") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fb1ac33a9902..6946a5161d38 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4394,6 +4394,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct msr_data apic_base_msr; + u32 eax, dummy; u64 cr0; vmx->rmode.vm86_active = 0; @@ -4401,7 +4402,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->msr_ia32_umwait_control = 0; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); + eax = 1; + if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true)) + eax = get_rdx_init_val(); + kvm_rdx_write(vcpu, eax); + vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); -- cgit From 067a456d091d05fdae32cae350410d905968b645 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:43 -0700 Subject: KVM: SVM: Require exact CPUID.0x1 match when stuffing EDX at INIT Do not allow an inexact CPUID "match" when querying the guest's CPUID.0x1 to stuff EDX during INIT. In the common case, where the guest CPU model is an AMD variant, allowing an inexact match is a nop since KVM doesn't emulate Intel's goofy "out-of-range" logic for AMD and Hygon. If the vCPU model happens to be an Intel variant, an inexact match is possible if and only if the max CPUID leaf is precisely '0'. Aside from the fact that there's probably no CPU in existence with a single CPUID leaf, if the max CPUID leaf is '0', that means that CPUID.0.EAX is '0', and thus an inexact match for CPUID.0x1.EAX will also yield '0'. So, with lots of twisty logic, no functional change intended. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7845232b6fb6..f4b6fec6252f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1346,7 +1346,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) } init_vmcb(vcpu); - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false); + kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); kvm_rdx_write(vcpu, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) -- cgit From 665f4d9238ad83c36dd4e078ccab45b3ddec211d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:44 -0700 Subject: KVM: SVM: Fall back to KVM's hardcoded value for EDX at RESET/INIT At vCPU RESET/INIT (mostly RESET), stuff EDX with KVM's hardcoded, default Family-Model-Stepping ID of 0x600 if CPUID.0x1 isn't defined. At RESET, the CPUID lookup is guaranteed to "miss" because KVM emulates RESET before exposing the vCPU to userspace, i.e. userspace can't possibly have done set the vCPU's CPUID model, and thus KVM will always write '0'. At INIT, using 0x600 is less bad than using '0'. While initializing EDX to '0' is _extremely_ unlikely to be noticed by the guest, let alone break the guest, and can be overridden by userspace for the RESET case, using 0x600 is preferable as it will allow consolidating the relevant VMX and SVM RESET/INIT logic in the future. And, digging through old specs suggests that neither Intel nor AMD have ever shipped a CPU that initialized EDX to '0' at RESET. Regarding 0x600 as KVM's default Family, it is a sane default and in many ways the most appropriate. Prior to the 386 implementations, DX was undefined at RESET. With the 386, 486, 586/P5, and 686/P6/Athlon, both Intel and AMD set EDX to 3, 4, 5, and 6 respectively. AMD switched to using '15' as its primary Family with the introduction of AMD64, but Intel has continued using '6' for the last few decades. So, '6' is a valid Family for both Intel and AMD CPUs, is compatible with both 32-bit and 64-bit CPUs (albeit not a perfect fit for 64-bit AMD), and of the common Families (3 - 6), is the best fit with respect to KVM's virtual CPU model. E.g. prior to the P6, Intel CPUs did not have a STI window. Modern operating systems, Linux included, rely on the STI window, e.g. for "safe halt", and KVM unconditionally assumes the virtual CPU has an STI window. Thus enumerating a Family ID of 3, 4, or 5 would be provably wrong. Opportunistically remove a stale comment. Fixes: 66f7b72e1171 ("KVM: x86: Make register state after reset conform to specification") Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f4b6fec6252f..05c1e60d829a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1264,7 +1264,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); save->cr4 = X86_CR4_PAE; - /* rdx = ?? */ if (npt_enabled) { /* Setup VMCB for Nested Paging */ @@ -1346,7 +1345,15 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) } init_vmcb(vcpu); - kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); + /* + * Fall back to KVM's default Family/Model/Stepping if no CPUID match + * is found. Note, it's impossible to get a match at RESET since KVM + * emulates RESET before exposing the vCPU to userspace, i.e. it's + * impossible for kvm_cpuid() to find a valid entry on RESET. But, go + * through the motions in case that's ever remedied, and to be pedantic. + */ + if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true)) + eax = get_rdx_init_val(); kvm_rdx_write(vcpu, eax); if (kvm_vcpu_apicv_active(vcpu) && !init_event) -- cgit From 61152cd907d59ffd6b0a9479b2fa3b3b7b080409 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:45 -0700 Subject: KVM: VMX: Remove explicit MMU reset in enter_rmode() Drop an explicit MMU reset when entering emulated real mode now that the vCPU INIT/RESET path correctly handles conditional MMU resets, e.g. if INIT arrives while the vCPU is in 64-bit mode. Note, while there are multiple other direct calls to vmx_set_cr0(), i.e. paths that change CR0 without invoking kvm_post_set_cr0(), only the INIT emulation can reach enter_rmode(). CLTS emulation only toggles CR.TS, VM-Exit (and late VM-Fail) emulation cannot architecturally transition to Real Mode, and VM-Enter to Real Mode is possible if and only if Unrestricted Guest is enabled (exposed to L1). This effectively reverts commit 8668a3c468ed ("KVM: VMX: Reset mmu context when entering real mode") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6946a5161d38..207393a429d1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2852,8 +2852,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - - kvm_mmu_reset_context(vcpu); } int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) -- cgit From 5d2d7e41e3b80f37ec8673825fae07ffe9f140c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:46 -0700 Subject: KVM: SVM: Drop explicit MMU reset at RESET/INIT Drop an explicit MMU reset in SVM's vCPU RESET/INIT flow now that the common x86 path correctly handles conditional MMU resets, e.g. if INIT arrives while the vCPU is in 64-bit mode. This reverts commit ebae871a509d ("kvm: svm: reset mmu on VCPU reset"). Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 05c1e60d829a..a9af9bff4755 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1261,7 +1261,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * It also updates the guest-visible cr0 value. */ svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); - kvm_mmu_reset_context(vcpu); save->cr4 = X86_CR4_PAE; -- cgit From c2f79a65b4b66681894ef7d7e3912ba55acc20d5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:49 -0700 Subject: KVM: x86: WARN if the APIC map is dirty without an in-kernel local APIC WARN if KVM ends up in a state where it thinks its APIC map needs to be recalculated, but KVM is not emulating the local APIC. This is mostly to document KVM's "rules" in order to provide clarity in future cleanups. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-12-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ba5a27879f1d..add4dd1e3528 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -192,6 +192,9 @@ void kvm_recalculate_apic_map(struct kvm *kvm) if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) return; + WARN_ONCE(!irqchip_in_kernel(kvm), + "Dirty APIC map without an in-kernel local APIC"); + mutex_lock(&kvm->arch.apic_map_lock); /* * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map -- cgit From 549240e8e09e063b7ba44c9f8497e8499562a34c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:50 -0700 Subject: KVM: x86: Remove defunct BSP "update" in local APIC reset Remove a BSP APIC update in kvm_lapic_reset() that is a glorified and confusing nop. When the code was originally added, kvm_vcpu_is_bsp() queried kvm->arch.bsp_vcpu, i.e. the intent was to set the BSP bit in the BSP vCPU's APIC. But, stuffing the BSP bit at INIT was wrong since the guest can change its BSP(s); this was fixed by commit 58d269d8cccc ("KVM: x86: BSP in MSR_IA32_APICBASE is writable"). In other words, kvm_vcpu_is_bsp() is now purely a reflection of vcpu->arch.apic_base.MSR_IA32_APICBASE_BSP, thus the update will always set the current value and kvm_lapic_set_base() is effectively a nop if the new and old values match. The RESET case, which does need to stuff the BSP for the reset vCPU, is handled by vendor code (though this will soon be moved to common code). No functional change intended. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-13-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index add4dd1e3528..a24ce8fe93e5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2367,9 +2367,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); - if (kvm_vcpu_is_bsp(vcpu)) - kvm_lapic_set_base(vcpu, - vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); + vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); if (vcpu->arch.apicv_active) { -- cgit From 0214f6bbe564632adba299e38023d681c1bd68c5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:51 -0700 Subject: KVM: x86: Migrate the PIT only if vcpu0 is migrated, not any BSP Make vcpu0 the arbitrary owner of the PIT, as was intended when PIT migration was added by commit 2f5997140f22 ("KVM: migrate PIT timer"). The PIT was unintentionally turned into being owned by the BSP by commit c5af89b68abb ("KVM: Introduce kvm_vcpu_is_bsp() function."), and was then unintentionally converted to a shared ownership model when kvm_vcpu_is_bsp() was modified to check the APIC base MSR instead of hardcoding vcpu0 as the BSP. Functionally, this just means the PIT's hrtimer is migrated less often. The real motivation is to remove the usage of kvm_vcpu_is_bsp(), so that more legacy/broken crud can be removed in a future patch. Fixes: 58d269d8cccc ("KVM: x86: BSP in MSR_IA32_APICBASE is writable") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-14-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/i8254.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index a6e218c6140d..5a69cce4d72d 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -220,7 +220,8 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) struct kvm_pit *pit = vcpu->kvm->arch.vpit; struct hrtimer *timer; - if (!kvm_vcpu_is_bsp(vcpu) || !pit) + /* Somewhat arbitrarily make vcpu0 the owner of the PIT. */ + if (vcpu->vcpu_id || !pit) return; timer = &pit->pit_state.timer; -- cgit From 01913c57c225a8301e9a53447507f310a4be22b6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:52 -0700 Subject: KVM: x86: Don't force set BSP bit when local APIC is managed by userspace Don't set the BSP bit in vcpu->arch.apic_base when the local APIC is managed by userspace. Forcing all vCPUs to be BSPs is non-sensical, and was dead code when it was added by commit 97222cc83163 ("KVM: Emulate local APIC in kernel"). At the time, kvm_lapic_set_base() was invoked if and only if the local APIC was in-kernel (and it couldn't be called before the vCPU created its APIC). kvm_lapic_set_base() eventually gained generic usage, but the latent bug escaped notice because the only true consumer would be the guest itself in the form of an explicit RDMSRs on APs. Out of Linux, SeaBIOS, and EDK2/OVMF, only OVMF consumes the BSP bit from the APIC_BASE MSR. For the vast majority of usage in OVMF, BSP confusion would be benign. OVMF's BSP election upon SMI rendezvous might be broken, but practically no one runs KVM with an out-of-kernel local APIC, let alone does so while utilizing SMIs with OVMF. Fixes: 97222cc83163 ("KVM: Emulate local APIC in kernel") Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-15-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a24ce8fe93e5..acb201d16b5e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2268,9 +2268,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) u64 old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) - value |= MSR_IA32_APICBASE_BSP; - vcpu->arch.apic_base = value; if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) -- cgit From 503bc49424df4802ca34e4e1a024381fd7ced80e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:53 -0700 Subject: KVM: x86: Set BSP bit in reset BSP vCPU's APIC base by default Set the BSP bit appropriately during local APIC "reset" instead of relying on vendor code to clean up at a later point. This is a step towards consolidating the local APIC, VMX, and SVM xAPIC initialization code. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-16-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index acb201d16b5e..0fb282b64c8f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2321,6 +2321,7 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic = vcpu->arch.apic; + u64 msr_val; int i; if (!apic) @@ -2330,8 +2331,10 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) hrtimer_cancel(&apic->lapic_timer.timer); if (!init_event) { - kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE); + msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) + msr_val |= MSR_IA32_APICBASE_BSP; + kvm_lapic_set_base(vcpu, msr_val); kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); } kvm_apic_set_version(apic->vcpu); -- cgit From f0428b3dcb2d7efe4b2a2304841645b48f0b6499 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:54 -0700 Subject: KVM: VMX: Stuff vcpu->arch.apic_base directly at vCPU RESET Write vcpu->arch.apic_base directly instead of bouncing through kvm_set_apic_base(). This is a glorified nop, and is a step towards cleaning up the mess that is local APIC creation. When using an in-kernel APIC, kvm_create_lapic() explicitly sets vcpu->arch.apic_base to MSR_IA32_APICBASE_ENABLE to avoid its own kvm_lapic_set_base() call in kvm_lapic_reset() from triggering state changes. That call during RESET exists purely to set apic->base_address to the default base value. As a result, by the time VMX gets control, the only missing piece is the BSP bit being set for the reset BSP. For a userspace APIC, there are no side effects to process (for the APIC). In both cases, the call to kvm_update_cpuid_runtime() is a nop because the vCPU hasn't yet been exposed to userspace, i.e. there can't be any CPUID entries. No functional change intended. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-17-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 207393a429d1..2fc232e1bc20 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4391,7 +4391,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct msr_data apic_base_msr; u32 eax, dummy; u64 cr0; @@ -4409,12 +4408,10 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_set_cr8(vcpu, 0); if (!init_event) { - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; + vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) - apic_base_msr.data |= MSR_IA32_APICBASE_BSP; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); + vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; } vmx_segment_cache_clear(vmx); -- cgit From 421221234ada41b4a9f0beeb08e30b07388bd4bd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:55 -0700 Subject: KVM: x86: Open code necessary bits of kvm_lapic_set_base() at vCPU RESET Stuff vcpu->arch.apic_base and apic->base_address directly during APIC reset, as opposed to bouncing through kvm_set_apic_base() while fudging the ENABLE bit during creation to avoid the other, unwanted side effects. This is a step towards consolidating the APIC RESET logic across x86, VMX, and SVM. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-18-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0fb282b64c8f..295a9d02a9a5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2321,7 +2321,6 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { struct kvm_lapic *apic = vcpu->arch.apic; - u64 msr_val; int i; if (!apic) @@ -2331,10 +2330,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) hrtimer_cancel(&apic->lapic_timer.timer); if (!init_event) { - msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; + vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; if (kvm_vcpu_is_reset_bsp(vcpu)) - msr_val |= MSR_IA32_APICBASE_BSP; - kvm_lapic_set_base(vcpu, msr_val); + vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + + apic->base_address = APIC_DEFAULT_PHYS_BASE; + kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); } kvm_apic_set_version(apic->vcpu); @@ -2477,11 +2479,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) lapic_timer_advance_dynamic = false; } - /* - * APIC is created enabled. This will prevent kvm_lapic_set_base from - * thinking that APIC state has changed. - */ - vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_iodevice_init(&apic->dev, &apic_mmio_ops); -- cgit From 4547700a4d190ac419abffa317069aaccf1ac118 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:56 -0700 Subject: KVM: x86: Consolidate APIC base RESET initialization code Consolidate the APIC base RESET logic, which is currently spread out across both x86 and vendor code. For an in-kernel APIC, the vendor code is redundant. But for a userspace APIC, KVM relies on the vendor code to initialize vcpu->arch.apic_base. Hoist the vcpu->arch.apic_base initialization above the !apic check so that it applies to both flavors of APIC emulation, and delete the vendor code. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-19-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 12 +++++++----- arch/x86/kvm/svm/svm.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 7 ------- 3 files changed, 7 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 295a9d02a9a5..76fb00921203 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2323,6 +2323,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) struct kvm_lapic *apic = vcpu->arch.apic; int i; + if (!init_event) { + vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | + MSR_IA32_APICBASE_ENABLE; + if (kvm_vcpu_is_reset_bsp(vcpu)) + vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + } + if (!apic) return; @@ -2330,11 +2337,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) hrtimer_cancel(&apic->lapic_timer.timer); if (!init_event) { - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; - apic->base_address = APIC_DEFAULT_PHYS_BASE; kvm_apic_set_xapic_id(apic, vcpu->vcpu_id); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a9af9bff4755..acb7bd14110b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1336,12 +1336,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) svm->spec_ctrl = 0; svm->virt_spec_ctrl = 0; - if (!init_event) { - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; - } init_vmcb(vcpu); /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2fc232e1bc20..e487f10e2877 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4407,13 +4407,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); - if (!init_event) { - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE | - MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_reset_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; - } - vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); -- cgit From 49d8665cc20b973995b5f519222d3270daa82f3f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:57 -0700 Subject: KVM: x86: Move EDX initialization at vCPU RESET to common code Move the EDX initialization at vCPU RESET, which is now identical between VMX and SVM, into common code. No functional change intended. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-20-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 ----- arch/x86/kvm/svm/svm.c | 13 ------------- arch/x86/kvm/vmx/vmx.c | 6 ------ arch/x86/kvm/x86.c | 13 +++++++++++++ 4 files changed, 13 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d798650ad793..ec8e4aca69c8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1773,11 +1773,6 @@ static inline unsigned long read_msr(unsigned long msr) } #endif -static inline u32 get_rdx_init_val(void) -{ - return 0x600; /* P6 family */ -} - static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) { kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index acb7bd14110b..f42ec7d4b0cd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1330,25 +1330,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu) static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_svm *svm = to_svm(vcpu); - u32 dummy; - u32 eax = 1; svm->spec_ctrl = 0; svm->virt_spec_ctrl = 0; init_vmcb(vcpu); - /* - * Fall back to KVM's default Family/Model/Stepping if no CPUID match - * is found. Note, it's impossible to get a match at RESET since KVM - * emulates RESET before exposing the vCPU to userspace, i.e. it's - * impossible for kvm_cpuid() to find a valid entry on RESET. But, go - * through the motions in case that's ever remedied, and to be pedantic. - */ - if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true)) - eax = get_rdx_init_val(); - kvm_rdx_write(vcpu, eax); - if (kvm_vcpu_apicv_active(vcpu) && !init_event) avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e487f10e2877..ad41f3423acf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4391,7 +4391,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 eax, dummy; u64 cr0; vmx->rmode.vm86_active = 0; @@ -4399,11 +4398,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->msr_ia32_umwait_control = 0; - eax = 1; - if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true)) - eax = get_rdx_init_val(); - kvm_rdx_write(vcpu, eax); - vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 164ecca7769e..ed8b6d1b4e85 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10792,6 +10792,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { unsigned long old_cr0 = kvm_read_cr0(vcpu); + u32 eax, dummy; kvm_lapic_reset(vcpu, init_event); @@ -10858,6 +10859,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; + /* + * Fall back to KVM's default Family/Model/Stepping of 0x600 (P6/Athlon) + * if no CPUID match is found. Note, it's impossible to get a match at + * RESET since KVM emulates RESET before exposing the vCPU to userspace, + * i.e. it'simpossible for kvm_cpuid() to find a valid entry on RESET. + * But, go through the motions in case that's ever remedied. + */ + eax = 1; + if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true)) + eax = 0x600; + kvm_rdx_write(vcpu, eax); + vcpu->arch.ia32_xss = 0; static_call(kvm_x86_vcpu_reset)(vcpu, init_event); -- cgit From 9e90e215d9c9f013cc354c4fe1a1313531a97a05 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:58 -0700 Subject: KVM: SVM: Don't bother writing vmcb->save.rip at vCPU RESET/INIT Drop unnecessary initialization of vmcb->save.rip during vCPU RESET/INIT, as svm_vcpu_run() unconditionally propagates VCPU_REGS_RIP to save.rip. No true functional change intended. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-21-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f42ec7d4b0cd..9007adeb01fb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1253,8 +1253,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_efer(vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - save->rip = 0x0000fff0; - vcpu->arch.regs[VCPU_REGS_RIP] = save->rip; + vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0; /* * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. -- cgit From ee5a5584cba316bc90bc2fad0c6d10b71f1791cb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:32:59 -0700 Subject: KVM: VMX: Invert handling of CR0.WP for EPT without unrestricted guest Opt-in to forcing CR0.WP=1 for shadow paging, and stop lying about WP being "always on" for unrestricted guest. In addition to making KVM a wee bit more honest, this paves the way for additional cleanup. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-22-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ad41f3423acf..2e84c31efb81 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -136,8 +136,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE #define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) + (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) @@ -2995,9 +2994,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, - unsigned long cr0, - struct kvm_vcpu *vcpu) +static void ept_update_paging_mode_cr0(unsigned long cr0, struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3016,9 +3013,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, vcpu->arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); } - - if (!(cr0 & X86_CR0_WP)) - *hw_cr0 &= ~X86_CR0_WP; } void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) @@ -3031,6 +3025,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; else { hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; + if (!enable_ept) + hw_cr0 |= X86_CR0_WP; if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); @@ -3049,7 +3045,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif if (enable_ept && !is_unrestricted_guest(vcpu)) - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + ept_update_paging_mode_cr0(cr0, vcpu); vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); -- cgit From 4f0dcb544038e016277fb691f1e60d52d7448cf6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:00 -0700 Subject: KVM: VMX: Remove direct write to vcpu->arch.cr0 during vCPU RESET/INIT Remove a bogus write to vcpu->arch.cr0 that immediately precedes vmx_set_cr0() during vCPU RESET/INIT. For RESET, this is a nop since the "old" CR0 value is meaningless. But for INIT, if the vCPU is coming from paging enabled mode, crushing vcpu->arch.cr0 will cause the various is_paging() checks in vmx_set_cr0() to get false negatives. For the exit_lmode() case, the false negative is benign as vmx_set_efer() is called immediately after vmx_set_cr0(). For EPT without unrestricted guest, the false negative will cause KVM to unnecessarily run with CR3 load/store exiting. But again, this is benign, albeit sub-optimal. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-23-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2e84c31efb81..1e555fb732bf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4387,7 +4387,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 cr0; vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; @@ -4455,9 +4454,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; - vmx->vcpu.arch.cr0 = cr0; - vmx_set_cr0(vcpu, cr0); /* enter rmode */ + vmx_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); vmx_set_cr4(vcpu, 0); vmx_set_efer(vcpu, 0); -- cgit From c834fd7fc1308a0e0429d203a6c3af528cd902fa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:01 -0700 Subject: KVM: VMX: Fold ept_update_paging_mode_cr0() back into vmx_set_cr0() Move the CR0/CR3/CR4 shenanigans for EPT without unrestricted guest back into vmx_set_cr0(). This will allow a future patch to eliminate the rather gross stuffing of vcpu->arch.cr0 in the paging transition cases by snapshotting the old CR0. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-24-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1e555fb732bf..e4b1c24ad079 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2994,27 +2994,6 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } -static void ept_update_paging_mode_cr0(unsigned long cr0, struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) - vmx_cache_reg(vcpu, VCPU_EXREG_CR3); - if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } -} - void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3044,8 +3023,23 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (enable_ept && !is_unrestricted_guest(vcpu)) - ept_update_paging_mode_cr0(cr0, vcpu); + if (enable_ept && !is_unrestricted_guest(vcpu)) { + if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) + vmx_cache_reg(vcpu, VCPU_EXREG_CR3); + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); + vcpu->arch.cr0 = cr0; + vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); + } + } vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); -- cgit From 470750b3425513b9f63f176e564e63e0e7998afc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:02 -0700 Subject: KVM: nVMX: Do not clear CR3 load/store exiting bits if L1 wants 'em Keep CR3 load/store exiting enable as needed when running L2 in order to honor L1's desires. This fixes a largely theoretical bug where L1 could intercept CR3 but not CR0.PG and end up not getting the desired CR3 exits when L2 enables paging. In other words, the existing !is_paging() check inadvertantly handles the normal case for L2 where vmx_set_cr0() is called during VM-Enter, which is guaranteed to run with paging enabled, and thus will never clear the bits. Removing the !is_paging() check will also allow future consolidation and cleanup of the related code. From a performance perspective, this is all a nop, as the VMCS controls shadow will optimize away the VMWRITE when the controls are in the desired state. Add a comment explaining why CR3 is intercepted, with a big disclaimer about not querying the old CR3. Because vmx_set_cr0() is used for flows that are not directly tied to MOV CR3, e.g. vCPU RESET/INIT and nested VM-Enter, it's possible that is_paging() is not synchronized with CR3 load/store exiting. This is actually guaranteed in the current code, as KVM starts with CR3 interception disabled. Obviously that can be fixed, but there's no good reason to play whack-a-mole, and it tends to end poorly, e.g. descriptor table exiting for UMIP emulation attempted to be precise in the past and ended up botching the interception toggling. Fixes: fe3ef05c7572 ("KVM: nVMX: Prepare vmcs02 from vmcs01 and vmcs12") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-25-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e4b1c24ad079..9f69ccc096a3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2994,10 +2994,14 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } +#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \ + CPU_BASED_CR3_STORE_EXITING) + void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long hw_cr0; + u32 tmp; hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); if (is_unrestricted_guest(vcpu)) @@ -3024,18 +3028,42 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #endif if (enable_ept && !is_unrestricted_guest(vcpu)) { + /* + * Ensure KVM has an up-to-date snapshot of the guest's CR3. If + * the below code _enables_ CR3 exiting, vmx_cache_reg() will + * (correctly) stop reading vmcs.GUEST_CR3 because it thinks + * KVM's CR3 is installed. + */ if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3)) vmx_cache_reg(vcpu, VCPU_EXREG_CR3); + + /* + * When running with EPT but not unrestricted guest, KVM must + * intercept CR3 accesses when paging is _disabled_. This is + * necessary because restricted guests can't actually run with + * paging disabled, and so KVM stuffs its own CR3 in order to + * run the guest when identity mapped page tables. + * + * Do _NOT_ check the old CR0.PG, e.g. to optimize away the + * update, it may be stale with respect to CR3 interception, + * e.g. after nested VM-Enter. + * + * Lastly, honor L1's desires, i.e. intercept CR3 loads and/or + * stores to forward them to L1, even if KVM does not need to + * intercept them to preserve its identity mapped page tables. + */ if (!(cr0 & X86_CR0_PG)) { - /* From paging/starting to nonpaging */ - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); - vcpu->arch.cr0 = cr0; - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } else if (!is_paging(vcpu)) { - /* From nonpaging to paging */ - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING); + exec_controls_setbit(vmx, CR3_EXITING_BITS); + } else if (!is_guest_mode(vcpu)) { + exec_controls_clearbit(vmx, CR3_EXITING_BITS); + } else { + tmp = exec_controls_get(vmx); + tmp &= ~CR3_EXITING_BITS; + tmp |= get_vmcs12(vcpu)->cpu_based_vm_exec_control & CR3_EXITING_BITS; + exec_controls_set(vmx, tmp); + } + + if (!is_paging(vcpu) != !(cr0 & X86_CR0_PG)) { vcpu->arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); } -- cgit From 81ca0e7340eedcbe67911d97f292837e0cf39709 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:03 -0700 Subject: KVM: VMX: Pull GUEST_CR3 from the VMCS iff CR3 load exiting is disabled Tweak the logic for grabbing vmcs.GUEST_CR3 in vmx_cache_reg() to look directly at the execution controls, as opposed to effectively inferring the controls based on vCPUs. Inferring the controls isn't wrong, but it creates a very subtle dependency between the caching logic, the state of vcpu->arch.cr0 (via is_paging()), and the behavior of vmx_set_cr0(). Using the execution controls doesn't completely eliminate the dependency in vmx_set_cr0(), e.g. neglecting to cache CR3 before enabling interception would still break the guest, but it does reduce the code dependency and mostly eliminate the logical dependency (that CR3 loads are intercepted in certain scenarios). Eliminating the subtle read of vcpu->arch.cr0 will also allow for additional cleanup in vmx_set_cr0(). Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-26-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9f69ccc096a3..0c1e578967ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2262,8 +2262,11 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits; break; case VCPU_EXREG_CR3: - if (is_unrestricted_guest(vcpu) || - (enable_ept && is_paging(vcpu))) + /* + * When intercepting CR3 loads, e.g. for shadowing paging, KVM's + * CR3 is loaded into hardware, not the guest's CR3. + */ + if (!(exec_controls_get(to_vmx(vcpu)) & CPU_BASED_CR3_LOAD_EXITING)) vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); break; case VCPU_EXREG_CR4: -- cgit From 908b7d43c02c5f1f95beff66ce4ca30dfc3b134c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:04 -0700 Subject: KVM: x86/mmu: Skip the permission_fault() check on MMIO if CR0.PG=0 Skip the MMU permission_fault() check if paging is disabled when verifying the cached MMIO GVA is usable. The check is unnecessary and can theoretically get a false positive since the MMU doesn't zero out "permissions" or "pkru_mask" when guest paging is disabled. The obvious alternative is to zero out all the bitmasks when configuring nonpaging MMUs, but that's unnecessary work and doesn't align with the MMU's general approach of doing as little as possible for flows that are supposed to be unreachable. This is nearly a nop as the false positive is nothing more than an insignificant performance blip, and more or less limited to string MMIO when L1 is running with paging disabled. KVM doesn't cache MMIO if L2 is active with nested TDP since the "GVA" is really an L2 GPA. If L2 is active without nested TDP, then paging can't be disabled as neither VMX nor SVM allows entering the guest without paging of some form. Jumping back to L1 with paging disabled, in that case direct_map is true and so KVM will use CR2 as a GPA; the only time it doesn't is if the fault from the emulator doesn't match or emulator_can_use_gpa(), and that fails only on string MMIO and other instructions with multiple memory operands. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-27-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ed8b6d1b4e85..c56788f8d180 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6568,9 +6568,9 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, * there is no pkey in EPT page table for L1 guest or EPT * shadow page table for L2 guest. */ - if (vcpu_match_mmio_gva(vcpu, gva) - && !permission_fault(vcpu, vcpu->arch.walk_mmu, - vcpu->arch.mmio_access, 0, access)) { + if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) || + !permission_fault(vcpu, vcpu->arch.walk_mmu, + vcpu->arch.mmio_access, 0, access))) { *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | (gva & (PAGE_SIZE - 1)); trace_vcpu_match_mmio(gva, *gpa, write, false); -- cgit From 32437c2aea428f9c3479904e342b02f5eee3a7f6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:05 -0700 Subject: KVM: VMX: Process CR0.PG side effects after setting CR0 assets Move the long mode and EPT w/o unrestricted guest side effect processing down in vmx_set_cr0() so that the EPT && !URG case doesn't have to stuff vcpu->arch.cr0 early. This also fixes an oddity where CR0 might not be marked available, i.e. the early vcpu->arch.cr0 write would appear to be in danger of being overwritten, though that can't actually happen in the current code since CR0.TS is the only guest-owned bit, and CR0.TS is not read by vmx_set_cr4(). Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-28-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0c1e578967ff..aa547100f0df 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3003,9 +3003,11 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long hw_cr0; + unsigned long hw_cr0, old_cr0_pg; u32 tmp; + old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG); + hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); if (is_unrestricted_guest(vcpu)) hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; @@ -3021,11 +3023,16 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) enter_rmode(vcpu); } + vmcs_writel(CR0_READ_SHADOW, cr0); + vmcs_writel(GUEST_CR0, hw_cr0); + vcpu->arch.cr0 = cr0; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR0); + #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) + if (!old_cr0_pg && (cr0 & X86_CR0_PG)) enter_lmode(vcpu); - if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) + else if (old_cr0_pg && !(cr0 & X86_CR0_PG)) exit_lmode(vcpu); } #endif @@ -3066,17 +3073,11 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) exec_controls_set(vmx, tmp); } - if (!is_paging(vcpu) != !(cr0 & X86_CR0_PG)) { - vcpu->arch.cr0 = cr0; + /* Note, vmx_set_cr4() consumes the new vcpu->arch.cr0. */ + if ((old_cr0_pg ^ cr0) & X86_CR0_PG) vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); - } } - vmcs_writel(CR0_READ_SHADOW, cr0); - vmcs_writel(GUEST_CR0, hw_cr0); - vcpu->arch.cr0 = cr0; - kvm_register_mark_available(vcpu, VCPU_EXREG_CR0); - /* depends on vcpu->arch.cr0 to be set to a new value */ vmx->emulation_required = emulation_required(vcpu); } -- cgit From 1dd7a4f18fbcc0db1acc07df5c1ae8ba4ce10593 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:06 -0700 Subject: KVM: VMX: Skip emulation required checks during pmode/rmode transitions Don't refresh "emulation required" when stuffing segments during transitions to/from real mode when running without unrestricted guest. The checks are unnecessary as vmx_set_cr0() unconditionally rechecks "emulation required". They also happen to be broken, as enter_pmode() and enter_rmode() run with a stale vcpu->arch.cr0. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-29-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index aa547100f0df..ff3f75105ae9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2719,6 +2719,8 @@ static __init int alloc_kvm_area(void) return 0; } +static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); + static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { @@ -2735,7 +2737,7 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, save->dpl = save->selector & SEGMENT_RPL_MASK; save->s = 1; } - vmx_set_segment(vcpu, save, seg); + __vmx_set_segment(vcpu, save, seg); } static void enter_pmode(struct kvm_vcpu *vcpu) @@ -2756,7 +2758,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->rmode.vm86_active = 0; - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); + __vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); flags = vmcs_readl(GUEST_RFLAGS); flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; @@ -3291,7 +3293,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) return ar; } -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -3304,7 +3306,7 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) vmcs_write16(sf->selector, var->selector); else if (var->s) fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - goto out; + return; } vmcs_writel(sf->base, var->base); @@ -3326,9 +3328,13 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) var->type |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); +} -out: - vmx->emulation_required = emulation_required(vcpu); +void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +{ + __vmx_set_segment(vcpu, var, seg); + + to_vmx(vcpu)->emulation_required = emulation_required(vcpu); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit From 816be9e9be8d2e19dcea35fbec06f00cd5749fed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:07 -0700 Subject: KVM: nVMX: Don't evaluate "emulation required" on nested VM-Exit Use the "internal" variants of setting segment registers when stuffing state on nested VM-Exit in order to skip the "emulation required" updates. VM-Exit must always go to protected mode, and all segments are mostly hardcoded (to valid values) on VM-Exit. The bits of the segments that aren't hardcoded are explicitly checked during VM-Enter, e.g. the selector RPLs must all be zero. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-30-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 16 ++++++++-------- arch/x86/kvm/vmx/vmx.c | 6 ++---- arch/x86/kvm/vmx/vmx.h | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 7f8184f432b4..a77cfc8bcf11 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4267,7 +4267,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, seg.l = 1; else seg.db = 1; - vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); seg = (struct kvm_segment) { .base = 0, .limit = 0xFFFFFFFF, @@ -4278,17 +4278,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, .g = 1 }; seg.selector = vmcs12->host_ds_selector; - vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); seg.selector = vmcs12->host_es_selector; - vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); seg.selector = vmcs12->host_ss_selector; - vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); seg.selector = vmcs12->host_fs_selector; seg.base = vmcs12->host_fs_base; - vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); seg.selector = vmcs12->host_gs_selector; seg.base = vmcs12->host_gs_base; - vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); seg = (struct kvm_segment) { .base = vmcs12->host_tr_base, .limit = 0x67, @@ -4296,11 +4296,11 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, .type = 11, .present = 1 }; - vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); memset(&seg, 0, sizeof(seg)); seg.unusable = 1; - vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); + __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR); kvm_set_dr(vcpu, 7, 0x400); vmcs_write64(GUEST_IA32_DEBUGCTL, 0); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ff3f75105ae9..359c30be62bb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2719,8 +2719,6 @@ static __init int alloc_kvm_area(void) return 0; } -static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); - static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { @@ -3293,7 +3291,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) return ar; } -static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -3330,7 +3328,7 @@ static void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, in vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); } -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) +static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { __vmx_set_segment(vcpu, var, seg); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index db88ed4f2121..fd076bd26676 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -371,7 +371,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void set_cr4_guest_host_mask(struct vcpu_vmx *vmx); void ept_save_pdptrs(struct kvm_vcpu *vcpu); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +void __vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu); -- cgit From ef8a0fa59be78e7b80bdf35c3391d60c061c7b24 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:08 -0700 Subject: KVM: SVM: Tweak order of cr0/cr4/efer writes at RESET/INIT Hoist svm_set_cr0() up in the sequence of register initialization during vCPU RESET/INIT, purely to match VMX so that a future patch can move the sequences to common x86. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-31-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9007adeb01fb..0afa060798a5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1249,18 +1249,13 @@ static void init_vmcb(struct kvm_vcpu *vcpu) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); + svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); svm_set_cr4(vcpu, 0); svm_set_efer(vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0; - /* - * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. - * It also updates the guest-visible cr0 value. - */ - svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); - save->cr4 = X86_CR4_PAE; if (npt_enabled) { -- cgit From 6cfe7b83acdcdd4d4ea9354f007a88bc0ccb16b9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:09 -0700 Subject: KVM: SVM: Drop redundant writes to vmcb->save.cr4 at RESET/INIT Drop direct writes to vmcb->save.cr4 during vCPU RESET/INIT, as the values being written are fully redundant with respect to svm_set_cr4(vcpu, 0) a few lines earlier. Note, svm_set_cr4() also correctly forces X86_CR4_PAE when NPT is disabled. No functional change intended. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-32-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0afa060798a5..1038bfdd7bbb 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1256,8 +1256,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0; - save->cr4 = X86_CR4_PAE; - if (npt_enabled) { /* Setup VMCB for Nested Paging */ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; @@ -1267,7 +1265,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_clr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = vcpu->arch.pat; save->cr3 = 0; - save->cr4 = 0; } svm->current_vmcb->asid_generation = 0; svm->asid = 0; -- cgit From d0f9f826d8ac06446391ceb3d4a440f5b48b3134 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:10 -0700 Subject: KVM: SVM: Stuff save->dr6 at during VMSA sync, not at RESET/INIT Move code to stuff vmcb->save.dr6 to its architectural init value from svm_vcpu_reset() into sev_es_sync_vmsa(). Except for protected guests, a.k.a. SEV-ES guests, vmcb->save.dr6 is set during VM-Enter, i.e. the extra write is unnecessary. For SEV-ES, stuffing save->dr6 handles a theoretical case where the VMSA could be encrypted before the first KVM_RUN. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-33-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 1 + arch/x86/kvm/svm/svm.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..9f1585f40c85 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -584,6 +584,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xcr0 = svm->vcpu.arch.xcr0; save->pkru = svm->vcpu.arch.pkru; save->xss = svm->vcpu.arch.ia32_xss; + save->dr6 = svm->vcpu.arch.dr6; /* * SEV-ES will use a VMSA that is pointed to by the VMCB, not diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1038bfdd7bbb..64563f876fdf 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1252,7 +1252,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); svm_set_cr4(vcpu, 0); svm_set_efer(vcpu, 0); - save->dr6 = 0xffff0ff0; kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0; -- cgit From 400dd54b37172b64e4ceaa6bfaf2729585840375 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:11 -0700 Subject: KVM: VMX: Skip pointless MSR bitmap update when setting EFER Split setup_msrs() into vmx_setup_uret_msrs() and an open coded refresh of the MSR bitmap, and skip the latter when refreshing the user return MSRs during an EFER load. Only the x2APIC MSRs are dynamically exposed and hidden, and those are not affected by a change in EFER. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-34-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 359c30be62bb..d5d9fc5df259 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1647,11 +1647,12 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr, } /* - * Set up the vmcs to automatically save and restore system - * msrs. Don't touch the 64-bit msrs if the guest is in legacy - * mode, as fiddling with msrs is very expensive. + * Configuring user return MSRs to automatically save, load, and restore MSRs + * that need to be shoved into hardware when running the guest. Note, omitting + * an MSR here does _NOT_ mean it's not emulated, only that it will not be + * loaded into hardware when running the guest. */ -static void setup_msrs(struct vcpu_vmx *vmx) +static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx) { #ifdef CONFIG_X86_64 bool load_syscall_msrs; @@ -1681,9 +1682,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) */ vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM)); - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(&vmx->vcpu); - /* * The set of MSRs to load may have changed, reload MSRs before the * next VM-Enter. @@ -2874,7 +2872,7 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) msr->data = efer & ~EFER_LME; } - setup_msrs(vmx); + vmx_setup_uret_msrs(vmx); return 0; } @@ -4470,7 +4468,10 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (kvm_mpx_supported()) vmcs_write64(GUEST_BNDCFGS, 0); - setup_msrs(vmx); + vmx_setup_uret_msrs(vmx); + + if (cpu_has_vmx_msr_bitmap()) + vmx_update_msr_bitmap(&vmx->vcpu); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ -- cgit From 432979b5034208890e323e86724417f02825abb7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:12 -0700 Subject: KVM: VMX: Refresh list of user return MSRs after setting guest CPUID After a CPUID update, refresh the list of user return MSRs that are loaded into hardware when running the vCPU. This is necessary to handle the oddball case where userspace exposes X86_FEATURE_RDTSCP to the guest after the vCPU is running. Fixes: 0023ef39dc35 ("kvm: vmx: Set IA32_TSC_AUX for legacy mode guests") Fixes: 4e47c7a6d714 ("KVM: VMX: Add instruction rdtscp support for guest") Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-35-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d5d9fc5df259..09c5a021e49b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7170,6 +7170,8 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */ vcpu->arch.xsaves_enabled = false; + vmx_setup_uret_msrs(vmx); + if (cpu_has_secondary_exec_ctrls()) { vmx_compute_secondary_exec_control(vmx); vmcs_set_secondary_exec_control(vmx); -- cgit From c5c9f920f7a50ea205c9efec7e589556ebaf85dc Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:13 -0700 Subject: KVM: VMX: Don't _explicitly_ reconfigure user return MSRs on vCPU INIT When emulating vCPU INIT, do not unconditionally refresh the list of user return MSRs that need to be loaded into hardware when running the guest. Unconditionally refreshing the list is confusing, as the vast majority of MSRs are not modified on INIT. The real motivation is to handle the case where an INIT during long mode obviates the need to load the SYSCALL MSRs, and that is handled as needed by vmx_set_efer(). Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-36-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 09c5a021e49b..2e2b469b8ced 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4410,6 +4410,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmx->pt_desc.guest.output_mask = 0x7F; vmcs_write64(GUEST_IA32_RTIT_CTL, 0); } + + vmx_setup_uret_msrs(vmx); } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -4468,8 +4470,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (kvm_mpx_supported()) vmcs_write64(GUEST_BNDCFGS, 0); - vmx_setup_uret_msrs(vmx); - if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); -- cgit From f39e805ee115a67878c5475f1ef84466d424bb2e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:14 -0700 Subject: KVM: x86: Move setting of sregs during vCPU RESET/INIT to common x86 Move the setting of CR0, CR4, EFER, RFLAGS, and RIP from vendor code to common x86. VMX and SVM now have near-identical sequences, the only difference being that VMX updates the exception bitmap. Updating the bitmap on SVM is unnecessary, but benign. Unfortunately it can't be left behind in VMX due to the need to update exception intercepts after the control registers are set. Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-37-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 9 --------- arch/x86/kvm/x86.c | 8 ++++++++ 3 files changed, 8 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 64563f876fdf..8ebdcd92007d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1249,12 +1249,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); - svm_set_cr4(vcpu, 0); - svm_set_efer(vcpu, 0); - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - vcpu->arch.regs[VCPU_REGS_RIP] = 0x0000fff0; - if (npt_enabled) { /* Setup VMCB for Nested Paging */ control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2e2b469b8ced..7518d4cf8156 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4455,9 +4455,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0xfff0); - vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -4485,12 +4482,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - vmx_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); - vmx_set_cr4(vcpu, 0); - vmx_set_efer(vcpu, 0); - - vmx_update_exception_bitmap(vcpu); - vpid_sync_context(vmx->vpid); if (init_event) vmx_clear_hlt(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c56788f8d180..6c55f2e83a7c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10875,6 +10875,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static_call(kvm_x86_vcpu_reset)(vcpu, init_event); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rip_write(vcpu, 0xfff0); + + static_call(kvm_x86_set_cr0)(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + static_call(kvm_x86_set_cr4)(vcpu, 0); + static_call(kvm_x86_set_efer)(vcpu, 0); + static_call(kvm_x86_update_exception_bitmap)(vcpu); + /* * Reset the MMU context if paging was enabled prior to INIT (which is * implied if CR0.PG=1 as CR0 will be '0' prior to RESET). Unlike the -- cgit From 9e4784e19daaa5cd637753d5300fc5a42d0c694a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:15 -0700 Subject: KVM: VMX: Remove obsolete MSR bitmap refresh at vCPU RESET/INIT Remove an unnecessary MSR bitmap refresh during vCPU RESET/INIT. In both cases, the MSR bitmap already has the desired values and state. At RESET, the vCPU is guaranteed to be running with x2APIC disabled, the x2APIC MSRs are guaranteed to be intercepted due to the MSR bitmap being initialized to all ones by alloc_loaded_vmcs(), and vmx->msr_bitmap_mode is guaranteed to be zero, i.e. reflecting x2APIC disabled. At INIT, the APIC_BASE MSR is not modified, thus there can't be any change in x2APIC state. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-38-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7518d4cf8156..af7aa9bb4bdb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4467,9 +4467,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (kvm_mpx_supported()) vmcs_write64(GUEST_BNDCFGS, 0); - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(&vmx->vcpu); - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ if (cpu_has_vmx_tpr_shadow() && !init_event) { -- cgit From 284036c644a1dc71890503c849b22c3126308067 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:16 -0700 Subject: KVM: nVMX: Remove obsolete MSR bitmap refresh at nested transitions Drop unnecessary MSR bitmap updates during nested transitions, as L1's APIC_BASE MSR is not modified by the standard VM-Enter/VM-Exit flows, and L2's MSR bitmap is managed separately. In the unlikely event that L1 is pathological and loads APIC_BASE via the VM-Exit load list, KVM will handle updating the bitmap in its normal WRMSR flows. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-39-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 2 +- arch/x86/kvm/vmx/vmx.h | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a77cfc8bcf11..0d0dd6580cfd 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4305,9 +4305,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_set_dr(vcpu, 7, 0x400); vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); - if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, vmcs12->vm_exit_msr_load_count)) nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); @@ -4386,9 +4383,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); - /* * This nasty bit of open coding is a compromise between blindly * loading L1's MSRs using the exit load lists (incorrect emulation diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index af7aa9bb4bdb..6e79f1b2653b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3865,7 +3865,7 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode) } } -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u8 mode = vmx_msr_bitmap_mode(vcpu); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index fd076bd26676..a408a9070662 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -376,7 +376,6 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); -- cgit From 002f87a41e9a6ef795f7f493f9434f51fb9f7d35 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:17 -0700 Subject: KVM: VMX: Don't redo x2APIC MSR bitmaps when userspace filter is changed Drop an explicit call to update the x2APIC MSRs when the userspace MSR filter is modified. The x2APIC MSRs are deliberately exempt from userspace filtering. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-40-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6e79f1b2653b..7100e70ab718 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3936,7 +3936,6 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) } pt_update_intercept_for_msr(vcpu); - vmx_update_msr_bitmap_x2apic(vcpu, vmx_msr_bitmap_mode(vcpu)); } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, -- cgit From e7c701dd7a5019c8628007c91c0e8d804c194667 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:18 -0700 Subject: KVM: VMX: Remove unnecessary initialization of msr_bitmap_mode Don't bother initializing msr_bitmap_mode to 0, all of struct vcpu_vmx is zero initialized. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-41-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7100e70ab718..308820173505 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6841,7 +6841,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); } - vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); -- cgit From 84ec8d2d539f7286d4504c2d377002f1ea7458d6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:19 -0700 Subject: KVM: VMX: Smush x2APIC MSR bitmap adjustments into single function Consolidate all of the dynamic MSR bitmap adjustments into vmx_update_msr_bitmap_x2apic(), and rename the mode tracker to reflect that it is x2APIC specific. If KVM gains more cases of dynamic MSR pass-through, odds are very good that those new cases will be better off with their own logic, e.g. see Intel PT MSRs and MSR_IA32_SPEC_CTRL. Attempting to handle all updates in a common helper did more harm than good, as KVM ended up collecting a large number of useless "updates". Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-42-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 55 +++++++++++++++++++------------------------------- arch/x86/kvm/vmx/vmx.h | 2 +- 2 files changed, 22 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 308820173505..96d9147620bf 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3812,21 +3812,6 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type) vmx_set_msr_bitmap_write(msr_bitmap, msr); } -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) -{ - u8 mode = 0; - - if (cpu_has_secondary_exec_ctrls() && - (secondary_exec_controls_get(to_vmx(vcpu)) & - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { - mode |= MSR_BITMAP_MODE_X2APIC; - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) - mode |= MSR_BITMAP_MODE_X2APIC_APICV; - } - - return mode; -} - static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode) { unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; @@ -3844,11 +3829,29 @@ static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode) } } -static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode) +static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + u8 mode; + if (!cpu_has_vmx_msr_bitmap()) return; + if (cpu_has_secondary_exec_ctrls() && + (secondary_exec_controls_get(vmx) & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { + mode = MSR_BITMAP_MODE_X2APIC; + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) + mode |= MSR_BITMAP_MODE_X2APIC_APICV; + } else { + mode = 0; + } + + if (mode == vmx->x2apic_msr_bitmap_mode) + return; + + vmx->x2apic_msr_bitmap_mode = mode; + vmx_reset_x2apic_msrs(vcpu, mode); /* @@ -3865,21 +3868,6 @@ static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu, u8 mode) } } -static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - u8 mode = vmx_msr_bitmap_mode(vcpu); - u8 changed = mode ^ vmx->msr_bitmap_mode; - - if (!changed) - return; - - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) - vmx_update_msr_bitmap_x2apic(vcpu, mode); - - vmx->msr_bitmap_mode = mode; -} - void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4139,8 +4127,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); } - if (cpu_has_vmx_msr_bitmap()) - vmx_update_msr_bitmap(vcpu); + vmx_update_msr_bitmap_x2apic(vcpu); } u32 vmx_exec_control(struct vcpu_vmx *vmx) @@ -6190,7 +6177,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) } secondary_exec_controls_set(vmx, sec_exec_control); - vmx_update_msr_bitmap(vcpu); + vmx_update_msr_bitmap_x2apic(vcpu); } static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a408a9070662..0ecc41189292 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -227,7 +227,7 @@ struct nested_vmx { struct vcpu_vmx { struct kvm_vcpu vcpu; u8 fail; - u8 msr_bitmap_mode; + u8 x2apic_msr_bitmap_mode; /* * If true, host state has been stored in vmx->loaded_vmcs for -- cgit From 7aa13fc3d8260c4215b8aea971d120e675d8781f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:20 -0700 Subject: KVM: VMX: Remove redundant write to set vCPU as active at RESET/INIT Drop a call to vmx_clear_hlt() during vCPU INIT, the guest's activity state is unconditionally set to "active" a few lines earlier in vmx_vcpu_reset(). No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-43-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 96d9147620bf..b306ca2238ed 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4466,8 +4466,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); - if (init_event) - vmx_clear_hlt(vcpu); } static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) -- cgit From e54949408abfbe3905ae19ac3b42ecd2d29bc3c5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:21 -0700 Subject: KVM: VMX: Move RESET-only VMWRITE sequences to init_vmcs() Move VMWRITE sequences in vmx_vcpu_reset() guarded by !init_event into init_vmcs() to make it more obvious that they're, uh, initializing the VMCS. No meaningful functional change intended (though the order of VMWRITEs and whatnot is different). Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-44-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b306ca2238ed..ae8e62df16dd 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4397,6 +4397,19 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmcs_write64(GUEST_IA32_RTIT_CTL, 0); } + vmcs_write32(GUEST_SYSENTER_CS, 0); + vmcs_writel(GUEST_SYSENTER_ESP, 0); + vmcs_writel(GUEST_SYSENTER_EIP, 0); + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); + + if (cpu_has_vmx_tpr_shadow()) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); + if (cpu_need_tpr_shadow(&vmx->vcpu)) + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, + __pa(vmx->vcpu.arch.apic->regs)); + vmcs_write32(TPR_THRESHOLD, 0); + } + vmx_setup_uret_msrs(vmx); } @@ -4434,13 +4447,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); - if (!init_event) { - vmcs_write32(GUEST_SYSENTER_CS, 0); - vmcs_writel(GUEST_SYSENTER_ESP, 0); - vmcs_writel(GUEST_SYSENTER_EIP, 0); - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); - } - vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -4455,14 +4461,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ - if (cpu_has_vmx_tpr_shadow() && !init_event) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (cpu_need_tpr_shadow(vcpu)) - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - __pa(vcpu->arch.apic->regs)); - vmcs_write32(TPR_THRESHOLD, 0); - } - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); -- cgit From 265e43530cb2eb14d5b78e89d310c79959e0eb26 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:22 -0700 Subject: KVM: SVM: Emulate #INIT in response to triple fault shutdown Emulate a full #INIT instead of simply initializing the VMCB if the guest hits a shutdown. Initializing the VMCB but not other vCPU state, much of which is mirrored by the VMCB, results in incoherent and broken vCPU state. Ideally, KVM would not automatically init anything on shutdown, and instead put the vCPU into e.g. KVM_MP_STATE_UNINITIALIZED and force userspace to explicitly INIT or RESET the vCPU. Even better would be to add KVM_MP_STATE_SHUTDOWN, since technically NMI can break shutdown (and SMI on Intel CPUs). But, that ship has sailed, and emulating #INIT is the next best thing as that has at least some connection with reality since there exist bare metal platforms that automatically INIT the CPU if it hits shutdown. Fixes: 46fe4ddd9dbb ("[PATCH] KVM: SVM: Propagate cpu shutdown events to userspace") Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-45-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 10 +++++++--- arch/x86/kvm/x86.c | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8ebdcd92007d..4102e9e3bcdc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2050,11 +2050,15 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) return -EINVAL; /* - * VMCB is undefined after a SHUTDOWN intercept - * so reinitialize it. + * VMCB is undefined after a SHUTDOWN intercept. INIT the vCPU to put + * the VMCB in a known good state. Unfortuately, KVM doesn't have + * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking + * userspace. At a platform view, INIT is acceptable behavior as + * there exist bare metal platforms that automatically INIT the CPU + * in response to shutdown. */ clear_page(svm->vmcb); - init_vmcb(vcpu); + kvm_vcpu_reset(vcpu, true); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c55f2e83a7c..5b04c07c1ec5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10906,6 +10906,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } +EXPORT_SYMBOL_GPL(kvm_vcpu_reset); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) { -- cgit From 46f4898b207ffeeeaebc0fdd4bf4082bf0f88107 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:23 -0700 Subject: KVM: SVM: Drop redundant clearing of vcpu->arch.hflags at INIT/RESET Drop redundant clears of vcpu->arch.hflags in init_vmcb() since kvm_vcpu_reset() always clears hflags, and it is also always zero at vCPU creation time. And of course, the second clearing in init_vmcb() was always redundant. Suggested-by: Reiji Watanabe Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-46-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4102e9e3bcdc..9d72b1df426e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1161,8 +1161,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; - vcpu->arch.hflags = 0; - svm_set_intercept(svm, INTERCEPT_CR0_READ); svm_set_intercept(svm, INTERCEPT_CR3_READ); svm_set_intercept(svm, INTERCEPT_CR4_READ); @@ -1264,7 +1262,6 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->nested.vmcb12_gpa = INVALID_GPA; svm->nested.last_vmcb12_gpa = INVALID_GPA; - vcpu->arch.hflags = 0; if (!kvm_pause_in_guest(vcpu->kvm)) { control->pause_filter_count = pause_filter_count; -- cgit From 4c72ab5aa6e0ac2a5c11f9180e1fff89d7f2d38b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Jul 2021 09:33:24 -0700 Subject: KVM: x86: Preserve guest's CR0.CD/NW on INIT Preserve CR0.CD and CR0.NW on INIT instead of forcing them to '1', as defined by both Intel's SDM and AMD's APM. Note, current versions of Intel's SDM are very poorly written with respect to INIT behavior. Table 9-1. "IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT" quite clearly lists power-up, RESET, _and_ INIT as setting CR0=60000010H, i.e. CD/NW=1. But the SDM then attempts to qualify CD/NW behavior in a footnote: 2. The CD and NW flags are unchanged, bit 4 is set to 1, all other bits are cleared. Presumably that footnote is only meant for INIT, as the RESET case and especially the power-up case are rather non-sensical. Another footnote all but confirms that: 6. Internal caches are invalid after power-up and RESET, but left unchanged with an INIT. Bare metal testing shows that CD/NW are indeed preserved on INIT (someone else can hack their BIOS to check RESET and power-up :-D). Reported-by: Reiji Watanabe Reviewed-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210713163324.627647-47-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5b04c07c1ec5..4d246b7f6ce1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10792,6 +10792,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { unsigned long old_cr0 = kvm_read_cr0(vcpu); + unsigned long new_cr0; u32 eax, dummy; kvm_lapic_reset(vcpu, init_event); @@ -10878,7 +10879,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); - static_call(kvm_x86_set_cr0)(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); + /* + * CR0.CD/NW are set on RESET, preserved on INIT. Note, some versions + * of Intel's SDM list CD/NW as being set on INIT, but they contradict + * (or qualify) that with a footnote stating that CD/NW are preserved. + */ + new_cr0 = X86_CR0_ET; + if (init_event) + new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD)); + else + new_cr0 |= X86_CR0_NW | X86_CR0_CD; + + static_call(kvm_x86_set_cr0)(vcpu, new_cr0); static_call(kvm_x86_set_cr4)(vcpu, 0); static_call(kvm_x86_set_efer)(vcpu, 0); static_call(kvm_x86_update_exception_bitmap)(vcpu); -- cgit From db105fab8d141fc0d9179600c51eba0d168dad34 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 2 Aug 2021 08:52:50 -0400 Subject: KVM: nSVM: remove useless kvm_clear_*_queue For an event to be in injected state when nested_svm_vmrun executes, it must have come from exitintinfo when svm_complete_interrupts ran: vcpu_enter_guest static_call(kvm_x86_run) -> svm_vcpu_run svm_complete_interrupts // now the event went from "exitintinfo" to "injected" static_call(kvm_x86_handle_exit) -> handle_exit svm_invoke_exit_handler vmrun_interception nested_svm_vmrun However, no event could have been in exitintinfo before a VMRUN vmexit. The code in svm.c is a bit more permissive than the one in vmx.c: if (is_external_interrupt(svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) but in any case, a VMRUN instruction would not even start to execute during an attempted event delivery. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 61738ff8ef33..5e13357da21e 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -659,11 +659,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) goto out; } - - /* Clear internal status */ - kvm_clear_exception_queue(vcpu); - kvm_clear_interrupt_queue(vcpu); - /* * Since vmcb01 is not in use, we can use it to store some of the L1 * state. -- cgit From 269e9552d208179bc14ea7f80a9e3e8ae97795a2 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Mon, 12 Jul 2021 22:33:38 -0400 Subject: KVM: const-ify all relevant uses of struct kvm_memory_slot As alluded to in commit f36f3f2846b5 ("KVM: add "new" argument to kvm_arch_commit_memory_region"), a bunch of other places where struct kvm_memory_slot is used, needs to be refactored to preserve the "const"ness of struct kvm_memory_slot across-the-board. Signed-off-by: Hamza Mahfooz Message-Id: <20210713023338.57108-1-someguy@effective-light.com> [Do not touch body of slot_rmap_walk_init. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/mmu/mmu.c | 42 ++++++++++++++++++++--------------------- arch/x86/kvm/mmu/mmu_internal.h | 4 ++-- arch/x86/kvm/mmu/tdp_mmu.c | 7 ++++--- arch/x86/kvm/mmu/tdp_mmu.h | 6 +++--- arch/x86/kvm/x86.c | 7 ++----- 6 files changed, 34 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ec8e4aca69c8..99f37781a6fc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1537,12 +1537,12 @@ void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, int start_level); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, - struct kvm_memory_slot *memslot); + const struct kvm_memory_slot *memslot); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 29010abb659c..e702361b4409 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -794,7 +794,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, return &slot->arch.lpage_info[level - 2][idx]; } -static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot, +static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot, gfn_t gfn, int count) { struct kvm_lpage_info *linfo; @@ -807,12 +807,12 @@ static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot, } } -void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn) +void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn) { update_gfn_disallow_lpage_count(slot, gfn, 1); } -void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn) +void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn) { update_gfn_disallow_lpage_count(slot, gfn, -1); } @@ -999,7 +999,7 @@ static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep) } static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, - struct kvm_memory_slot *slot) + const struct kvm_memory_slot *slot) { unsigned long idx; @@ -1228,7 +1228,7 @@ static bool spte_wrprot_for_clear_dirty(u64 *sptep) * Returns true iff any D or W bits were cleared. */ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot) + const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; @@ -1387,7 +1387,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) } static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot) + const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; @@ -1452,7 +1452,7 @@ restart: struct slot_rmap_walk_iterator { /* input fields. */ - struct kvm_memory_slot *slot; + const struct kvm_memory_slot *slot; gfn_t start_gfn; gfn_t end_gfn; int start_level; @@ -1479,7 +1479,7 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level) static void slot_rmap_walk_init(struct slot_rmap_walk_iterator *iterator, - struct kvm_memory_slot *slot, int start_level, + const struct kvm_memory_slot *slot, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn) { iterator->slot = slot; @@ -5313,12 +5313,13 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level, EXPORT_SYMBOL_GPL(kvm_configure_mmu); /* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot); +typedef bool (*slot_level_handler) (struct kvm *kvm, + struct kvm_rmap_head *rmap_head, + const struct kvm_memory_slot *slot); /* The caller should hold mmu-lock before calling this function. */ static __always_inline bool -slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, +slot_handle_level_range(struct kvm *kvm, const struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, gfn_t start_gfn, gfn_t end_gfn, bool flush_on_yield, bool flush) @@ -5345,7 +5346,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, } static __always_inline bool -slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, +slot_handle_level(struct kvm *kvm, const struct kvm_memory_slot *memslot, slot_level_handler fn, int start_level, int end_level, bool flush_on_yield) { @@ -5356,7 +5357,7 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, } static __always_inline bool -slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, +slot_handle_leaf(struct kvm *kvm, const struct kvm_memory_slot *memslot, slot_level_handler fn, bool flush_on_yield) { return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K, @@ -5615,7 +5616,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) if (start >= end) continue; - flush = slot_handle_level_range(kvm, memslot, + flush = slot_handle_level_range(kvm, + (const struct kvm_memory_slot *) memslot, kvm_zap_rmapp, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, true, flush); @@ -5643,13 +5645,13 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) static bool slot_rmap_write_protect(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot) + const struct kvm_memory_slot *slot) { return __rmap_write_protect(kvm, rmap_head, false); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, int start_level) { bool flush = false; @@ -5685,7 +5687,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot) + const struct kvm_memory_slot *slot) { u64 *sptep; struct rmap_iterator iter; @@ -5724,10 +5726,8 @@ restart: } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - const struct kvm_memory_slot *memslot) + const struct kvm_memory_slot *slot) { - /* FIXME: const-ify all uses of struct kvm_memory_slot. */ - struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot; bool flush = false; if (kvm_memslots_have_rmaps(kvm)) { @@ -5763,7 +5763,7 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, } void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, - struct kvm_memory_slot *memslot) + const struct kvm_memory_slot *memslot) { bool flush = false; diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 626cb848dab4..ca7b7595bbfc 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -124,8 +124,8 @@ static inline bool is_nx_huge_page_enabled(void) int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync); -void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); -void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); +void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn); +void kvm_mmu_gfn_allow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 41cee1d22918..43f12f5d12c0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1246,8 +1246,8 @@ retry: * only affect leaf SPTEs down to min_level. * Returns true if an SPTE has been changed and the TLBs need to be flushed. */ -bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, - int min_level) +bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, + const struct kvm_memory_slot *slot, int min_level) { struct kvm_mmu_page *root; bool spte_set = false; @@ -1317,7 +1317,8 @@ retry: * each SPTE. Returns true if an SPTE has been changed and the TLBs need to * be flushed. */ -bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) +bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, + const struct kvm_memory_slot *slot) { struct kvm_mmu_page *root; bool spte_set = false; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 361b47f98cc5..b224d126adf9 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -61,10 +61,10 @@ bool kvm_tdp_mmu_age_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_tdp_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, - int min_level); +bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, + const struct kvm_memory_slot *slot, int min_level); bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, - struct kvm_memory_slot *slot); + const struct kvm_memory_slot *slot); void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, unsigned long mask, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4d246b7f6ce1..348452bb16bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11520,7 +11520,7 @@ static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable) static void kvm_mmu_slot_apply_flags(struct kvm *kvm, struct kvm_memory_slot *old, - struct kvm_memory_slot *new, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES; @@ -11600,10 +11600,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_mmu_change_mmu_pages(kvm, kvm_mmu_calculate_default_mmu_pages(kvm)); - /* - * FIXME: const-ify all uses of struct kvm_memory_slot. - */ - kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change); + kvm_mmu_slot_apply_flags(kvm, old, new, change); /* Free the arrays associated with the old memslot. */ if (change == KVM_MR_MOVE) -- cgit From a6cae77f1bc89368a4e2822afcddc45c3062d499 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 29 Jul 2021 20:01:03 +0200 Subject: powerpc/stacktrace: Include linux/delay.h commit 7c6986ade69e ("powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi()") introduces udelay() call without including the linux/delay.h header. This may happen to work on master but the header that declares the functionshould be included nonetheless. Fixes: 7c6986ade69e ("powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi()") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729180103.15578-1-msuchanek@suse.de --- arch/powerpc/kernel/stacktrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 2b0d04a1b7d2..9e4a4a7af380 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -8,6 +8,7 @@ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp. */ +#include #include #include #include -- cgit From 156ca4e650bfb9a4259b427069caa11b5a4df3d4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 23:19:35 +0900 Subject: powerpc: remove unused zInstall target from arch/powerpc/boot/Makefile Commit c913e5f95e54 ("powerpc/boot: Don't install zImage.* from make install") added the zInstall target to arch/powerpc/boot/Makefile, but you cannot use it since the corresponding hook is missing in arch/powerpc/Makefile. It has never worked since its addition. Nobody has complained about it for 7 years, which means this code was unneeded. With this removal, the install.sh will be passed in with 4 parameters. Simplify the shell script. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729141937.445051-1-masahiroy@kernel.org --- arch/powerpc/boot/Makefile | 6 +----- arch/powerpc/boot/install.sh | 13 ------------- 2 files changed, 1 insertion(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e312ea802aa6..a702f9d1ec0d 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -448,11 +448,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" -# Install the vmlinux and other built boot targets. -zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) - sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^ - -PHONY += install zInstall +PHONY += install # anything not in $(targets) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index b6a256bc96ee..658c93ca7437 100644 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -15,7 +15,6 @@ # $2 - kernel image file # $3 - kernel map file # $4 - default install path (blank if root directory) -# $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc. # # Bail with error code if anything goes wrong @@ -41,15 +40,3 @@ fi cat $2 > $4/$image_name cp $3 $4/System.map - -# Copy all the bootable image files -path=$4 -shift 4 -while [ $# -ne 0 ]; do - image_name=`basename $1` - if [ -f $path/$image_name ]; then - mv $path/$image_name $path/$image_name.old - fi - cat $1 > $path/$image_name - shift -done; -- cgit From 9bef456b20581e630ef9a13555ca04fed65a859d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 23:19:36 +0900 Subject: powerpc: make the install target not depend on any build artifact The install target should not depend on any build artifact. The reason is explained in commit 19514fc665ff ("arm, kbuild: make "make install" not depend on vmlinux"). Change the PowerPC installation code in a similar way. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729141937.445051-2-masahiroy@kernel.org --- arch/powerpc/boot/Makefile | 2 +- arch/powerpc/boot/install.sh | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a702f9d1ec0d..0d165bd98b61 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -445,7 +445,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) $(Q)rm -f $@; ln $< $@ # Only install the vmlinux -install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) +install: sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" PHONY += install diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index 658c93ca7437..14473150ddb4 100644 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -20,6 +20,20 @@ # Bail with error code if anything goes wrong set -e +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + # User may have a custom install script if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi -- cgit From 86ff0bce2e9665c8b074930fe6caed615da070c1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 23:19:37 +0900 Subject: powerpc: move the install rule to arch/powerpc/Makefile Currently, the install target in arch/powerpc/Makefile descends into arch/powerpc/boot/Makefile to invoke the shell script, but there is no good reason to do so. arch/powerpc/Makefile can run the shell script directly. Signed-off-by: Masahiro Yamada Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210729141937.445051-3-masahiroy@kernel.org --- arch/powerpc/Makefile | 3 ++- arch/powerpc/boot/Makefile | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 6505d66f1193..9aaf1abbc641 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -407,7 +407,8 @@ endef PHONY += install install: - $(Q)$(MAKE) $(build)=$(boot) install + sh -x $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" vmlinux \ + System.map "$(INSTALL_PATH)" archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 0d165bd98b61..10c0fb306f15 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -444,12 +444,6 @@ $(obj)/zImage: $(addprefix $(obj)/, $(image-y)) $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) $(Q)rm -f $@; ln $< $@ -# Only install the vmlinux -install: - sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" - -PHONY += install - # anything not in $(targets) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ -- cgit From a4bec516b9c0823d7e2bb8c8928c98b535cf9adf Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 28 Jul 2021 23:26:05 +0530 Subject: powerpc/cacheinfo: Lookup cache by dt node and thread-group id Currently the cacheinfo code on powerpc indexes the "cache" objects (modelling the L1/L2/L3 caches) where the key is device-tree node corresponding to that cache. On some of the POWER server platforms thread-groups within the core share different sets of caches (Eg: On SMT8 POWER9 systems, threads 0,2,4,6 of a core share L1 cache and threads 1,3,5,7 of the same core share another L1 cache). On such platforms, there is a single device-tree node corresponding to that cache and the cache-configuration within the threads of the core is indicated via "ibm,thread-groups" device-tree property. Since the current code is not aware of the "ibm,thread-groups" property, on the aforementoined systems, cacheinfo code still treats all the threads in the core to be sharing the cache because of the single device-tree node (In the earlier example, the cacheinfo code would says CPUs 0-7 share L1 cache). In this patch, we make the powerpc cacheinfo code aware of the "ibm,thread-groups" property. We indexe the "cache" objects by the key-pair (device-tree node, thread-group id). For any CPUX, for a given level of cache, the thread-group id is defined to be the first CPU in the "ibm,thread-groups" cache-group containing CPUX. For levels of cache which are not represented in "ibm,thread-groups" property, the thread-group id is -1. [parth: Remove "static" keyword for the definition of "thread_group_l1_cache_map" and "thread_group_l2_cache_map" to get rid of the compile error.] Signed-off-by: Gautham R. Shenoy Signed-off-by: Parth Shah Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210728175607.591679-2-parth@linux.ibm.com --- arch/powerpc/include/asm/smp.h | 3 ++ arch/powerpc/kernel/cacheinfo.c | 80 +++++++++++++++++++++++++++++------------ arch/powerpc/kernel/smp.c | 4 +-- 3 files changed, 63 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 03b3d010cbab..1259040cc3a4 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -33,6 +33,9 @@ extern bool coregroup_enabled; extern int cpu_to_chip_id(int cpu); extern int *chip_id_lookup_table; +DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); +DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); + #ifdef CONFIG_SMP struct smp_ops_t { diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 6f903e9aa20b..5a6925d87424 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -120,6 +120,7 @@ struct cache { struct cpumask shared_cpu_map; /* online CPUs using this cache */ int type; /* split cache disambiguation */ int level; /* level not explicit in device tree */ + int group_id; /* id of the group of threads that share this cache */ struct list_head list; /* global list of cache objects */ struct cache *next_local; /* next cache of >= level */ }; @@ -142,22 +143,24 @@ static const char *cache_type_string(const struct cache *cache) } static void cache_init(struct cache *cache, int type, int level, - struct device_node *ofnode) + struct device_node *ofnode, int group_id) { cache->type = type; cache->level = level; cache->ofnode = of_node_get(ofnode); + cache->group_id = group_id; INIT_LIST_HEAD(&cache->list); list_add(&cache->list, &cache_list); } -static struct cache *new_cache(int type, int level, struct device_node *ofnode) +static struct cache *new_cache(int type, int level, + struct device_node *ofnode, int group_id) { struct cache *cache; cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (cache) - cache_init(cache, type, level, ofnode); + cache_init(cache, type, level, ofnode, group_id); return cache; } @@ -309,20 +312,24 @@ static struct cache *cache_find_first_sibling(struct cache *cache) return cache; list_for_each_entry(iter, &cache_list, list) - if (iter->ofnode == cache->ofnode && iter->next_local == cache) + if (iter->ofnode == cache->ofnode && + iter->group_id == cache->group_id && + iter->next_local == cache) return iter; return cache; } -/* return the first cache on a local list matching node */ -static struct cache *cache_lookup_by_node(const struct device_node *node) +/* return the first cache on a local list matching node and thread-group id */ +static struct cache *cache_lookup_by_node_group(const struct device_node *node, + int group_id) { struct cache *cache = NULL; struct cache *iter; list_for_each_entry(iter, &cache_list, list) { - if (iter->ofnode != node) + if (iter->ofnode != node || + iter->group_id != group_id) continue; cache = cache_find_first_sibling(iter); break; @@ -352,14 +359,15 @@ static int cache_is_unified_d(const struct device_node *np) CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED; } -static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level) +static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id, + int level) { pr_debug("creating L%d ucache for %pOFP\n", level, node); - return new_cache(cache_is_unified_d(node), level, node); + return new_cache(cache_is_unified_d(node), level, node, group_id); } -static struct cache *cache_do_one_devnode_split(struct device_node *node, +static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id, int level) { struct cache *dcache, *icache; @@ -367,8 +375,8 @@ static struct cache *cache_do_one_devnode_split(struct device_node *node, pr_debug("creating L%d dcache and icache for %pOFP\n", level, node); - dcache = new_cache(CACHE_TYPE_DATA, level, node); - icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node); + dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id); + icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id); if (!dcache || !icache) goto err; @@ -382,31 +390,32 @@ err: return NULL; } -static struct cache *cache_do_one_devnode(struct device_node *node, int level) +static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level) { struct cache *cache; if (cache_node_is_unified(node)) - cache = cache_do_one_devnode_unified(node, level); + cache = cache_do_one_devnode_unified(node, group_id, level); else - cache = cache_do_one_devnode_split(node, level); + cache = cache_do_one_devnode_split(node, group_id, level); return cache; } static struct cache *cache_lookup_or_instantiate(struct device_node *node, + int group_id, int level) { struct cache *cache; - cache = cache_lookup_by_node(node); + cache = cache_lookup_by_node_group(node, group_id); WARN_ONCE(cache && cache->level != level, "cache level mismatch on lookup (got %d, expected %d)\n", cache->level, level); if (!cache) - cache = cache_do_one_devnode(node, level); + cache = cache_do_one_devnode(node, group_id, level); return cache; } @@ -443,7 +452,27 @@ static void do_subsidiary_caches_debugcheck(struct cache *cache) of_node_get_device_type(cache->ofnode)); } -static void do_subsidiary_caches(struct cache *cache) +/* + * If sub-groups of threads in a core containing @cpu_id share the + * L@level-cache (information obtained via "ibm,thread-groups" + * device-tree property), then we identify the group by the first + * thread-sibling in the group. We define this to be the group-id. + * + * In the absence of any thread-group information for L@level-cache, + * this function returns -1. + */ +static int get_group_id(unsigned int cpu_id, int level) +{ + if (has_big_cores && level == 1) + return cpumask_first(per_cpu(thread_group_l1_cache_map, + cpu_id)); + else if (thread_group_shares_l2 && level == 2) + return cpumask_first(per_cpu(thread_group_l2_cache_map, + cpu_id)); + return -1; +} + +static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id) { struct device_node *subcache_node; int level = cache->level; @@ -452,9 +481,11 @@ static void do_subsidiary_caches(struct cache *cache) while ((subcache_node = of_find_next_cache_node(cache->ofnode))) { struct cache *subcache; + int group_id; level++; - subcache = cache_lookup_or_instantiate(subcache_node, level); + group_id = get_group_id(cpu_id, level); + subcache = cache_lookup_or_instantiate(subcache_node, group_id, level); of_node_put(subcache_node); if (!subcache) break; @@ -468,6 +499,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id) { struct device_node *cpu_node; struct cache *cpu_cache = NULL; + int group_id; pr_debug("creating cache object(s) for CPU %i\n", cpu_id); @@ -476,11 +508,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id) if (!cpu_node) goto out; - cpu_cache = cache_lookup_or_instantiate(cpu_node, 1); + group_id = get_group_id(cpu_id, 1); + + cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1); if (!cpu_cache) goto out; - do_subsidiary_caches(cpu_cache); + do_subsidiary_caches(cpu_cache, cpu_id); cache_cpu_set(cpu_cache, cpu_id); out: @@ -848,13 +882,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id) { struct device_node *cpu_node; struct cache *cache; + int group_id; cpu_node = of_get_cpu_node(cpu_id, NULL); WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id); if (!cpu_node) return NULL; - cache = cache_lookup_by_node(cpu_node); + group_id = get_group_id(cpu_id, 1); + cache = cache_lookup_by_node_group(cpu_node, group_id); of_node_put(cpu_node); return cache; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 447b78a87c8f..a7fcac44a8e2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -122,14 +122,14 @@ static struct thread_groups_list tgl[NR_CPUS] __initdata; * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -static DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); /* * On some big-cores system, thread_group_l2_cache_map for each CPU * corresponds to the set its siblings within the core that share the * L2-cache. */ -static DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; -- cgit From 69aa8e078545bc14d84a8b4b3cb914ac8f9f280e Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 28 Jul 2021 23:26:06 +0530 Subject: powerpc/cacheinfo: Remove the redundant get_shared_cpu_map() The helper function get_shared_cpu_map() was added in 'commit 500fe5f550ec ("powerpc/cacheinfo: Report the correct shared_cpu_map on big-cores")' and subsequently expanded upon in 'commit 0be47634db0b ("powerpc/cacheinfo: Print correct cache-sibling map/list for L2 cache")' in order to help report the correct groups of threads sharing these caches on big-core systems where groups of threads within a core can share different sets of caches. Now that powerpc/cacheinfo is aware of "ibm,thread-groups" property, cache->shared_cpu_map contains the correct set of thread-siblings sharing the cache. Hence we no longer need the functions get_shared_cpu_map(). This patch removes this function. We also remove the helper function index_dir_to_cpu() which was only called by get_shared_cpu_map(). With these functions removed, we can still see the correct cache-sibling map/list for L1 and L2 caches on systems with L1 and L2 caches distributed among groups of threads in a core. With this patch, on a SMT8 POWER10 system where the L1 and L2 caches are split between the two groups of threads in a core, for CPUs 8,9, the L1-Data, L1-Instruction, L2, L3 cache CPU sibling list is as follows: $ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8,10,12,14 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8,10,12,14 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8,10,12,14 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-15 /sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9,11,13,15 /sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9,11,13,15 /sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9,11,13,15 /sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-15 $ ppc64_cpu --smt=4 $ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8,10 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8,10 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8,10 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-11 /sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9,11 /sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9,11 /sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9,11 /sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-11 $ ppc64_cpu --smt=2 $ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8-9 /sys/devices/system/cpu/cpu9/cache/index0/shared_cpu_list:9 /sys/devices/system/cpu/cpu9/cache/index1/shared_cpu_list:9 /sys/devices/system/cpu/cpu9/cache/index2/shared_cpu_list:9 /sys/devices/system/cpu/cpu9/cache/index3/shared_cpu_list:8-9 $ ppc64_cpu --smt=1 $ grep . /sys/devices/system/cpu/cpu[89]/cache/index[0123]/shared_cpu_list /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list:8 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list:8 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list:8 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list:8 Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210728175607.591679-3-parth@linux.ibm.com --- arch/powerpc/kernel/cacheinfo.c | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 5a6925d87424..20d91693eac1 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -675,45 +675,6 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char * static struct kobj_attribute cache_level_attr = __ATTR(level, 0444, level_show, NULL); -static unsigned int index_dir_to_cpu(struct cache_index_dir *index) -{ - struct kobject *index_dir_kobj = &index->kobj; - struct kobject *cache_dir_kobj = index_dir_kobj->parent; - struct kobject *cpu_dev_kobj = cache_dir_kobj->parent; - struct device *dev = kobj_to_dev(cpu_dev_kobj); - - return dev->id; -} - -/* - * On big-core systems, each core has two groups of CPUs each of which - * has its own L1-cache. The thread-siblings which share l1-cache with - * @cpu can be obtained via cpu_smallcore_mask(). - * - * On some big-core systems, the L2 cache is shared only between some - * groups of siblings. This is already parsed and encoded in - * cpu_l2_cache_mask(). - * - * TODO: cache_lookup_or_instantiate() needs to be made aware of the - * "ibm,thread-groups" property so that cache->shared_cpu_map - * reflects the correct siblings on platforms that have this - * device-tree property. This helper function is only a stop-gap - * solution so that we report the correct siblings to the - * userspace via sysfs. - */ -static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache) -{ - if (has_big_cores) { - int cpu = index_dir_to_cpu(index); - if (cache->level == 1) - return cpu_smallcore_mask(cpu); - if (cache->level == 2 && thread_group_shares_l2) - return cpu_l2_cache_mask(cpu); - } - - return &cache->shared_cpu_map; -} - static ssize_t show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list) { @@ -724,7 +685,7 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo index = kobj_to_cache_index_dir(k); cache = index->cache; - mask = get_shared_cpu_map(index, cache); + mask = &cache->shared_cpu_map; return cpumap_print_to_pagebuf(list, buf, mask); } -- cgit From e9ef81e1079b0c4c374fba0f9affa7129c7c913b Mon Sep 17 00:00:00 2001 From: Parth Shah Date: Wed, 28 Jul 2021 23:26:07 +0530 Subject: powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus in thread-group share both L2 and L3 caches. Hence, use cache_property = 2 itself to find both the L2 and L3 cache siblings. Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings, but fill the mask using same property "2" array. Signed-off-by: Parth Shah Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210728175607.591679-4-parth@linux.ibm.com --- arch/powerpc/include/asm/smp.h | 3 ++ arch/powerpc/kernel/cacheinfo.c | 3 ++ arch/powerpc/kernel/smp.c | 66 ++++++++++++++++++++++++++++------------- 3 files changed, 51 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 1259040cc3a4..7ef1cd8168a0 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -35,6 +35,7 @@ extern int *chip_id_lookup_table; DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); #ifdef CONFIG_SMP @@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu); extern bool has_big_cores; extern bool thread_group_shares_l2; +extern bool thread_group_shares_l3; #define cpu_smt_mask cpu_smt_mask #ifdef CONFIG_SCHED_SMT @@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu); #define hard_smp_processor_id() get_hard_smp_processor_id(0) #define smp_setup_cpu_maps() #define thread_group_shares_l2 0 +#define thread_group_shares_l3 0 static inline void inhibit_secondary_onlining(void) {} static inline void uninhibit_secondary_onlining(void) {} static inline const struct cpumask *cpu_sibling_mask(int cpu) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 20d91693eac1..cf1be75b7833 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level) else if (thread_group_shares_l2 && level == 2) return cpumask_first(per_cpu(thread_group_l2_cache_map, cpu_id)); + else if (thread_group_shares_l3 && level == 3) + return cpumask_first(per_cpu(thread_group_l3_cache_map, + cpu_id)); return -1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a7fcac44a8e2..f2abd88e0c25 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -78,6 +78,7 @@ struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; bool thread_group_shares_l2; +bool thread_group_shares_l3; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -101,7 +102,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 -#define THREAD_GROUP_SHARE_L2 2 +#define THREAD_GROUP_SHARE_L2_L3 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); */ DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); +/* + * On P10, thread_group_l3_cache_map for each CPU is equal to the + * thread_group_l2_cache_map + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); + /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -889,19 +896,41 @@ out: return tg; } +static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) +{ + int first_thread = cpu_first_thread_sibling(cpu); + int i; + + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); + + for (i = first_thread; i < first_thread + threads_per_core; i++) { + int i_group_start = get_cpu_thread_group_start(i, tg); + + if (unlikely(i_group_start == -1)) { + WARN_ON_ONCE(1); + return -ENODATA; + } + + if (i_group_start == cpu_group_start) + cpumask_set_cpu(i, *mask); + } + + return 0; +} + static int __init init_thread_group_cache_map(int cpu, int cache_property) { - int first_thread = cpu_first_thread_sibling(cpu); - int i, cpu_group_start = -1, err = 0; + int cpu_group_start = -1, err = 0; struct thread_groups *tg = NULL; cpumask_var_t *mask = NULL; if (cache_property != THREAD_GROUP_SHARE_L1 && - cache_property != THREAD_GROUP_SHARE_L2) + cache_property != THREAD_GROUP_SHARE_L2_L3) return -EINVAL; tg = get_thread_groups(cpu, cache_property, &err); + if (!tg) return err; @@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return -ENODATA; } - if (cache_property == THREAD_GROUP_SHARE_L1) + if (cache_property == THREAD_GROUP_SHARE_L1) { mask = &per_cpu(thread_group_l1_cache_map, cpu); - else if (cache_property == THREAD_GROUP_SHARE_L2) + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); + } + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) { mask = &per_cpu(thread_group_l2_cache_map, cpu); - - zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); - - for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, tg); - - if (unlikely(i_group_start == -1)) { - WARN_ON_ONCE(1); - return -ENODATA; - } - - if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, *mask); + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); + mask = &per_cpu(thread_group_l3_cache_map, cpu); + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); } + return 0; } @@ -1020,14 +1042,16 @@ static int __init init_big_cores(void) has_big_cores = true; for_each_possible_cpu(cpu) { - int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3); if (err) return err; } thread_group_shares_l2 = true; - pr_debug("L2 cache only shared by the threads in the small core\n"); + thread_group_shares_l3 = true; + pr_debug("L2/L3 cache only shared by the threads in the small core\n"); + return 0; } -- cgit From cf9c615cde49fb5d2480549c8c955a0a387798d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 21 Jul 2021 00:15:04 +1000 Subject: powerpc/64s/perf: Always use SIAR for kernel interrupts If an interrupt is taken in kernel mode, always use SIAR for it rather than looking at regs_sipr. This prevents samples piling up around interrupt enable (hard enable or interrupt replay via soft enable) in PMUs / modes where the PR sample indication is not in synch with SIAR. This results in better sampling of interrupt entry and exit in particular. Signed-off-by: Nicholas Piggin Tested-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210720141504.420110-1-npiggin@gmail.com --- arch/powerpc/perf/core-book3s.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index bb0ee716de91..91203ed9d0ff 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -340,6 +340,13 @@ static inline void perf_read_regs(struct pt_regs *regs) * If the PMU doesn't update the SIAR for non marked events use * pt_regs. * + * If regs is a kernel interrupt, always use SIAR. Some PMUs have an + * issue with regs_sipr not being in synch with SIAR in interrupt entry + * and return sequences, which can result in regs_sipr being true for + * kernel interrupts and SIAR, which has the effect of causing samples + * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around + * interrupt entry/exit. + * * If the PMU has HV/PR flags then check to see if they * place the exception in userspace. If so, use pt_regs. In * continuous sampling mode the SIAR and the PMU exception are @@ -356,6 +363,8 @@ static inline void perf_read_regs(struct pt_regs *regs) use_siar = 1; else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) use_siar = 0; + else if (!user_mode(regs)) + use_siar = 1; else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs)) use_siar = 0; else -- cgit From 13e47bebbe83f58ddc41d2987567e97c5068a1ec Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Fri, 2 Jul 2021 04:54:21 +0000 Subject: riscv: Implement thread_struct whitelist for hardened usercopy This whitelists the FPU register state portion of the thread_struct for copying to userspace, instead of the default entire struct. Signed-off-by: Tong Tiangen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/processor.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8fcceb8eda07..1af859b9d5bf 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -72,6 +72,7 @@ config RISCV select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 021ed64ee608..46b492c78cbb 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -37,6 +37,14 @@ struct thread_struct { unsigned long bad_cause; }; +/* Whitelist the fstate from the task_struct for hardened usercopy */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fstate); + *size = sizeof_field(struct thread_struct, fstate); +} + #define INIT_THREAD { \ .sp = sizeof(init_stack) + (long)&init_stack, \ } -- cgit From dc1cff969101afd08601e90463b44bd572e62dd4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Jul 2021 18:04:53 -0400 Subject: KVM: X86: MMU: Tune PTE_LIST_EXT to be bigger Currently rmap array element only contains 3 entries. However for EPT=N there could have a lot of guest pages that got tens of even hundreds of rmap entry. A normal distribution of a 6G guest (even if idle) shows this with rmap count statistics: Rmap_Count: 0 1 2-3 4-7 8-15 16-31 32-63 64-127 128-255 256-511 512-1023 Level=4K: 3089171 49005 14016 1363 235 212 15 7 0 0 0 Level=2M: 5951 227 0 0 0 0 0 0 0 0 0 Level=1G: 32 0 0 0 0 0 0 0 0 0 0 If we do some more fork some pages will grow even larger rmap counts. This patch makes PTE_LIST_EXT bigger so it'll be more efficient for the general use case of EPT=N as we do list reference less and the loops over PTE_LIST_EXT will be slightly more efficient; but still not too large so less waste when array not full. It should not affecting EPT=Y since EPT normally only has zero or one rmap entry for each page, so no array is even allocated. With a test case to fork 500 child and recycle them ("./rmap_fork 500" [1]), this patch speeds up fork time of about 29%. Before: 473.90 (+-5.93%) After: 366.10 (+-4.94%) [1] https://github.com/xzpeter/clibs/commit/825436f825453de2ea5aaee4bdb1c92281efe5b3 Signed-off-by: Peter Xu Message-Id: <20210730220455.26054-6-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e702361b4409..69ad8189d7eb 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -137,8 +137,8 @@ module_param(dbg, bool, 0644); #include -/* make pte_list_desc fit well in cache line */ -#define PTE_LIST_EXT 3 +/* make pte_list_desc fit well in cache lines */ +#define PTE_LIST_EXT 15 struct pte_list_desc { u64 *sptes[PTE_LIST_EXT]; -- cgit From 13236e25ebab91b3e42ddedf5354b569ace1b555 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Jul 2021 18:06:02 -0400 Subject: KVM: X86: Optimize pte_list_desc with per-array counter Add a counter field into pte_list_desc, so as to simplify the add/remove/loop logic. E.g., we don't need to loop over the array any more for most reasons. This will make more sense after we've switched the array size to be larger otherwise the counter will be a waste. Initially I wanted to store a tail pointer at the head of the array list so we don't need to traverse the list at least for pushing new ones (if without the counter we traverse both the list and the array). However that'll need slightly more change without a huge lot benefit, e.g., after we grow entry numbers per array the list traversing is not so expensive. So let's be simple but still try to get as much benefit as we can with just these extra few lines of changes (not to mention the code looks easier too without looping over arrays). I used the same a test case to fork 500 child and recycle them ("./rmap_fork 500" [1]), this patch further speeds up the total fork time of about 4%, which is a total of 33% of vanilla kernel: Vanilla: 473.90 (+-5.93%) 3->15 slots: 366.10 (+-4.94%) Add counter: 351.00 (+-3.70%) [1] https://github.com/xzpeter/clibs/commit/825436f825453de2ea5aaee4bdb1c92281efe5b3 Signed-off-by: Peter Xu Message-Id: <20210730220602.26327-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 69ad8189d7eb..f1eb2fdfa473 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -138,11 +138,21 @@ module_param(dbg, bool, 0644); #include /* make pte_list_desc fit well in cache lines */ -#define PTE_LIST_EXT 15 +#define PTE_LIST_EXT 14 +/* + * Slight optimization of cacheline layout, by putting `more' and `spte_count' + * at the start; then accessing it will only use one single cacheline for + * either full (entries==PTE_LIST_EXT) case or entries<=6. + */ struct pte_list_desc { - u64 *sptes[PTE_LIST_EXT]; struct pte_list_desc *more; + /* + * Stores number of entries stored in the pte_list_desc. No need to be + * u64 but just for easier alignment. When PTE_LIST_EXT, means full. + */ + u64 spte_count; + u64 *sptes[PTE_LIST_EXT]; }; struct kvm_shadow_walk_iterator { @@ -901,7 +911,7 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; - int i, count = 0; + int count = 0; if (!rmap_head->val) { rmap_printk("%p %llx 0->1\n", spte, *spte); @@ -911,24 +921,24 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, desc = mmu_alloc_pte_list_desc(vcpu); desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; + desc->spte_count = 2; rmap_head->val = (unsigned long)desc | 1; ++count; } else { rmap_printk("%p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc->sptes[PTE_LIST_EXT-1]) { + while (desc->spte_count == PTE_LIST_EXT) { count += PTE_LIST_EXT; - if (!desc->more) { desc->more = mmu_alloc_pte_list_desc(vcpu); desc = desc->more; + desc->spte_count = 0; break; } desc = desc->more; } - for (i = 0; desc->sptes[i]; ++i) - ++count; - desc->sptes[i] = spte; + count += desc->spte_count; + desc->sptes[desc->spte_count++] = spte; } return count; } @@ -938,13 +948,12 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, struct pte_list_desc *desc, int i, struct pte_list_desc *prev_desc) { - int j; + int j = desc->spte_count - 1; - for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j) - ; desc->sptes[i] = desc->sptes[j]; desc->sptes[j] = NULL; - if (j != 0) + desc->spte_count--; + if (desc->spte_count) return; if (!prev_desc && !desc->more) rmap_head->val = 0; @@ -977,7 +986,7 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); prev_desc = NULL; while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { + for (i = 0; i < desc->spte_count; ++i) { if (desc->sptes[i] == spte) { pte_list_desc_remove_entry(rmap_head, desc, i, prev_desc); -- cgit From a75b540451d20ef1aebaa09d183ddc5c44c6f86a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Jul 2021 18:06:05 -0400 Subject: KVM: X86: Optimize zapping rmap Using rmap_get_first() and rmap_remove() for zapping a huge rmap list could be slow. The easy way is to travers the rmap list, collecting the a/d bits and free the slots along the way. Provide a pte_list_destroy() and do exactly that. Signed-off-by: Peter Xu Message-Id: <20210730220605.26377-1-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index f1eb2fdfa473..232ced2e7bf8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1007,6 +1007,34 @@ static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep) __pte_list_remove(sptep, rmap_head); } +/* Return true if rmap existed, false otherwise */ +static bool pte_list_destroy(struct kvm_rmap_head *rmap_head) +{ + struct pte_list_desc *desc, *next; + int i; + + if (!rmap_head->val) + return false; + + if (!(rmap_head->val & 1)) { + mmu_spte_clear_track_bits((u64 *)rmap_head->val); + goto out; + } + + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + + for (; desc; desc = next) { + for (i = 0; i < desc->spte_count; i++) + mmu_spte_clear_track_bits(desc->sptes[i]); + next = desc->more; + mmu_free_pte_list_desc(desc); + } +out: + /* rmap_head is meaningless now, remember to reset it */ + rmap_head->val = 0; + return true; +} + static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, const struct kvm_memory_slot *slot) { @@ -1398,18 +1426,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { - u64 *sptep; - struct rmap_iterator iter; - bool flush = false; - - while ((sptep = rmap_get_first(rmap_head, &iter))) { - rmap_printk("spte %p %llx.\n", sptep, *sptep); - - pte_list_remove(rmap_head, sptep); - flush = true; - } - - return flush; + return pte_list_destroy(rmap_head); } static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, -- cgit From e79f49c37ccf273c8aba733f803b3774ebfbe581 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 28 Jul 2021 20:07:05 +0800 Subject: KVM: x86/pmu: Introduce pmc->is_paused to reduce the call time of perf interfaces Based on our observations, after any vm-exit associated with vPMU, there are at least two or more perf interfaces to be called for guest counter emulation, such as perf_event_{pause, read_value, period}(), and each one will {lock, unlock} the same perf_event_ctx. The frequency of calls becomes more severe when guest use counters in a multiplexed manner. Holding a lock once and completing the KVM request operations in the perf context would introduce a set of impractical new interfaces. So we can further optimize the vPMU implementation by avoiding repeated calls to these interfaces in the KVM context for at least one pattern: After we call perf_event_pause() once, the event will be disabled and its internal count will be reset to 0. So there is no need to pause it again or read its value. Once the event is paused, event period will not be updated until the next time it's resumed or reprogrammed. And there is also no need to call perf_event_period twice for a non-running counter, considering the perf_event for a running counter is never paused. Based on this implementation, for the following common usage of sampling 4 events using perf on a 4u8g guest: echo 0 > /proc/sys/kernel/watchdog echo 25 > /proc/sys/kernel/perf_cpu_time_max_percent echo 10000 > /proc/sys/kernel/perf_event_max_sample_rate echo 0 > /proc/sys/kernel/perf_cpu_time_max_percent for i in `seq 1 1 10` do taskset -c 0 perf record \ -e cpu-cycles -e instructions -e branch-instructions -e cache-misses \ /root/br_instr a done the average latency of the guest NMI handler is reduced from 37646.7 ns to 32929.3 ns (~1.14x speed up) on the Intel ICX server. Also, in addition to collecting more samples, no loss of sampling accuracy was observed compared to before the optimization. Signed-off-by: Like Xu Message-Id: <20210728120705.6855-1-likexu@tencent.com> Signed-off-by: Paolo Bonzini Acked-by: Peter Zijlstra --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/pmu.c | 5 ++++- arch/x86/kvm/pmu.h | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 99f37781a6fc..a079880d4cd5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -482,6 +482,7 @@ struct kvm_pmc { * ctrl value for fixed counters. */ u64 current_config; + bool is_paused; }; struct kvm_pmu { diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 827886c12c16..0772bad9165c 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -137,18 +137,20 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, pmc->perf_event = event; pmc_to_pmu(pmc)->event_count++; clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); + pmc->is_paused = false; } static void pmc_pause_counter(struct kvm_pmc *pmc) { u64 counter = pmc->counter; - if (!pmc->perf_event) + if (!pmc->perf_event || pmc->is_paused) return; /* update counter, reset event value to avoid redundant accumulation */ counter += perf_event_pause(pmc->perf_event, true); pmc->counter = counter & pmc_bitmask(pmc); + pmc->is_paused = true; } static bool pmc_resume_counter(struct kvm_pmc *pmc) @@ -163,6 +165,7 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) /* reuse perf_event to serve as pmc_reprogram_counter() does*/ perf_event_enable(pmc->perf_event); + pmc->is_paused = false; clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); return true; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 67e753edfa22..0e4f2b1fa9fb 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -55,7 +55,7 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) u64 counter, enabled, running; counter = pmc->counter; - if (pmc->perf_event) + if (pmc->perf_event && !pmc->is_paused) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); /* FIXME: Scaling needed? */ diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9efc1a6b8693..10cc4f65c4ef 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -437,13 +437,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event) + if (pmc->perf_event && !pmc->is_paused) perf_event_period(pmc->perf_event, get_sample_period(pmc, data)); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event) + if (pmc->perf_event && !pmc->is_paused) perf_event_period(pmc->perf_event, get_sample_period(pmc, data)); return 0; -- cgit From eb48d154cd0dade56a0e244f0cfa198ea2925ed3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Aug 2021 13:38:29 +0100 Subject: arm64: Move .hyp.rodata outside of the _sdata.._edata range The HYP rodata section is currently lumped together with the BSS, which isn't exactly what is expected (it gets registered with kmemleak, for example). Move it away so that it is actually marked RO. As an added benefit, it isn't registered with kmemleak anymore. Fixes: 380e18ade4a5 ("KVM: arm64: Introduce a BSS section for use at Hyp") Suggested-by: Catalin Marinas Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org #5.13 Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210802123830.2195174-2-maz@kernel.org --- arch/arm64/kernel/vmlinux.lds.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 709d2c433c5e..f6b1a88245db 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -181,6 +181,8 @@ SECTIONS /* everything from this point to __init_begin will be marked RO NX */ RO_DATA(PAGE_SIZE) + HYPERVISOR_DATA_SECTIONS + idmap_pg_dir = .; . += IDMAP_DIR_SIZE; idmap_pg_end = .; @@ -260,8 +262,6 @@ SECTIONS _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) - HYPERVISOR_DATA_SECTIONS - /* * Data written with the MMU off but read with the MMU on requires * cache lines to be invalidated, discarding up to a Cache Writeback -- cgit From 47e6223c841e029bfc23c3ce594dac5525cebaf8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Aug 2021 13:38:30 +0100 Subject: KVM: arm64: Unregister HYP sections from kmemleak in protected mode Booting a KVM host in protected mode with kmemleak quickly results in a pretty bad crash, as kmemleak doesn't know that the HYP sections have been taken away. This is specially true for the BSS section, which is part of the kernel BSS section and registered at boot time by kmemleak itself. Unregister the HYP part of the BSS before making that section HYP-private. The rest of the HYP-specific data is obtained via the page allocator or lives in other sections, none of which is subjected to kmemleak. Fixes: 90134ac9cabb ("KVM: arm64: Protect the .hyp sections from the host") Reviewed-by: Quentin Perret Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org # 5.13 Link: https://lore.kernel.org/r/20210802123830.2195174-3-maz@kernel.org --- arch/arm64/kvm/arm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..52242f32c4be 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1982,6 +1983,12 @@ static int finalize_hyp_mode(void) if (ret) return ret; + /* + * Exclude HYP BSS from kmemleak so that it doesn't get peeked + * at, which would end badly once the section is inaccessible. + * None of other sections should ever be introspected. + */ + kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); ret = pkvm_mark_hyp_section(__hyp_bss); if (ret) return ret; -- cgit From 8165c6ae8e3a264601f02fc17e5545d027584a2f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 8 Jul 2021 09:59:47 +0800 Subject: riscv: Allow forced irq threading The timer interrupt and the perf interrupt on riscv are with IRQF_PERCPU, so it's safe to allow forced interrupt threading. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1af859b9d5bf..1fc5a95df3f4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -98,6 +98,7 @@ config RISCV select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN + select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES select OF -- cgit From bcf11b5e99b27472ea61231a64e29ad7dd31f0da Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 8 Jul 2021 09:59:48 +0800 Subject: riscv: Enable idle generic idle loop Enable generic idle loop to support for hlt/nohlt command line options to override default idle loop behavior. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1fc5a95df3f4..95cb9efa9213 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -48,6 +48,7 @@ config RISCV select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_EARLY_IOREMAP select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO + select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_SHOW -- cgit From ecd4916c7261edccb2382c9931535e238875a44d Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 8 Jul 2021 09:59:49 +0800 Subject: riscv: Enable GENERIC_IRQ_SHOW_LEVEL The interrupt controllers on riscv support both edge and level triggered interrupts, it's useful to provide that information in /proc/interrupts. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95cb9efa9213..387cf68a5d52 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -52,6 +52,7 @@ config RISCV select GENERIC_IOREMAP select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PCI_IOMAP select GENERIC_PTDUMP if MMU -- cgit From 946e1052cdcc7e585ee5d1e72528ca49fb295243 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 18 Jul 2021 19:33:09 -0700 Subject: openrisc: don't printk() unconditionally Don't call printk() when CONFIG_PRINTK is not set. Fixes the following build errors: or1k-linux-ld: arch/openrisc/kernel/entry.o: in function `_external_irq_handler': (.text+0x804): undefined reference to `printk' (.text+0x804): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `printk' Fixes: 9d02a4283e9c ("OpenRISC: Boot code") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: openrisc@lists.librecores.org Signed-off-by: Stafford Horne --- arch/openrisc/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index bc657e55c15f..98e4f97db515 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -547,6 +547,7 @@ EXCEPTION_ENTRY(_external_irq_handler) l.bnf 1f // ext irq enabled, all ok. l.nop +#ifdef CONFIG_PRINTK l.addi r1,r1,-0x8 l.movhi r3,hi(42f) l.ori r3,r3,lo(42f) @@ -560,6 +561,7 @@ EXCEPTION_ENTRY(_external_irq_handler) .string "\n\rESR interrupt bug: in _external_irq_handler (ESR %x)\n\r" .align 4 .previous +#endif l.ori r4,r4,SPR_SR_IEE // fix the bug // l.sw PT_SR(r1),r4 -- cgit From 11648cbb7b335b7eb54e1ff973fb938939616f46 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jul 2021 19:23:38 -0700 Subject: openrisc: rename or32 code & comments to or1k From Documentation/openrisc/todo.rst, rename "or32" in the source code to "or1k" since this is the name that has been settled on. Signed-off-by: Randy Dunlap Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: openrisc@lists.librecores.org Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/pgtable.h | 6 +++--- arch/openrisc/include/asm/thread_info.h | 2 +- arch/openrisc/kernel/entry.S | 4 ++-- arch/openrisc/kernel/head.S | 6 +++--- arch/openrisc/kernel/setup.c | 4 ++-- arch/openrisc/lib/Makefile | 2 +- arch/openrisc/mm/fault.c | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 4ac591c9ca33..cdd657f80bfa 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -12,7 +12,7 @@ * et al. */ -/* or32 pgtable.h - macros and functions to manipulate page tables +/* or1k pgtable.h - macros and functions to manipulate page tables * * Based on: * include/asm-cris/pgtable.h @@ -29,14 +29,14 @@ /* * The Linux memory management assumes a three-level page table setup. On - * or32, we use that, but "fold" the mid level into the top-level page + * or1k, we use that, but "fold" the mid level into the top-level page * table. Since the MMU TLB is software loaded through an interrupt, it * supports any page table structure, so we could have used a three-level * setup, but for the amounts of memory we normally use, a two-level is * probably more efficient. * * This file contains the functions and defines necessary to modify and use - * the or32 page table tree. + * the or1k page table tree. */ extern void paging_init(void); diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 4f9d2a261455..659834ab87fa 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -25,7 +25,7 @@ /* THREAD_SIZE is the size of the task_struct/kernel_stack combo. * normally, the stack is found by doing something like p + THREAD_SIZE - * in or32, a page is 8192 bytes, which seems like a sane size + * in or1k, a page is 8192 bytes, which seems like a sane size */ #define THREAD_SIZE_ORDER 0 diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 98e4f97db515..8c8ac3451425 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -326,7 +326,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) 1: l.ori r6,r0,0x0 // !write access 2: - /* call fault.c handler in or32/mm/fault.c */ + /* call fault.c handler in openrisc/mm/fault.c */ l.jal do_page_fault l.nop l.j _ret_from_exception @@ -348,7 +348,7 @@ EXCEPTION_ENTRY(_insn_page_fault_handler) /* r4 set be EXCEPTION_HANDLE */ // effective address of fault l.ori r6,r0,0x0 // !write access - /* call fault.c handler in or32/mm/fault.c */ + /* call fault.c handler in openrisc/mm/fault.c */ l.jal do_page_fault l.nop l.j _ret_from_exception diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index af355e3f4619..15f1b38dfe03 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -599,7 +599,7 @@ flush_tlb: l.jal _flush_tlb l.nop -/* The MMU needs to be enabled before or32_early_setup is called */ +/* The MMU needs to be enabled before or1k_early_setup is called */ enable_mmu: /* @@ -641,9 +641,9 @@ enable_mmu: /* magic number mismatch, set fdt pointer to null */ l.or r25,r0,r0 _fdt_found: - /* pass fdt pointer to or32_early_setup in r3 */ + /* pass fdt pointer to or1k_early_setup in r3 */ l.or r3,r0,r25 - LOAD_SYMBOL_2_GPR(r24, or32_early_setup) + LOAD_SYMBOL_2_GPR(r24, or1k_early_setup) l.jalr r24 l.nop diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 8ae2da6ac097..7eddcac0ef2f 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -209,7 +209,7 @@ void __init setup_cpuinfo(void) } /** - * or32_early_setup + * or1k_early_setup * * Handles the pointer to the device tree that this kernel is to use * for establishing the available platform devices. @@ -217,7 +217,7 @@ void __init setup_cpuinfo(void) * Falls back on built-in device tree in case null pointer is passed. */ -void __init or32_early_setup(void *fdt) +void __init or1k_early_setup(void *fdt) { if (fdt) pr_info("FDT at %p\n", fdt); diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile index 79775aaa6baa..53327406b483 100644 --- a/arch/openrisc/lib/Makefile +++ b/arch/openrisc/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Makefile for or32 specific library files.. +# Makefile for or1k specific library files.. # obj-y := delay.o string.o memset.o memcpy.o diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index ca97d9baab51..c730d1a51686 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -28,7 +28,7 @@ unsigned long pte_misses; /* updated by do_page_fault() */ unsigned long pte_errors; /* updated by do_page_fault() */ /* __PHX__ :: - check the vmalloc_fault in do_page_fault() - * - also look into include/asm-or32/mmu_context.h + * - also look into include/asm/mmu_context.h */ volatile pgd_t *current_pgd[NR_CPUS]; -- cgit From 319afe68567b923e25140e744e7f05e3e5d889c1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 4 Aug 2021 12:48:41 -0400 Subject: KVM: xen: do not use struct gfn_to_hva_cache gfn_to_hva_cache is not thread-safe, so it is usually used only within a vCPU (whose code is protected by vcpu->mutex). The Xen interface implementation has such a cache in kvm->arch, but it is not really used except to store the location of the shared info page. Replace shinfo_set and shinfo_cache with just the value that is passed via KVM_XEN_ATTR_TYPE_SHARED_INFO; the only complication is that the initialization value is not zero anymore and therefore kvm_xen_init_vm needs to be introduced. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/x86.c | 1 + arch/x86/kvm/xen.c | 23 ++++++++++++----------- arch/x86/kvm/xen.h | 5 +++++ 4 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a079880d4cd5..6a73ff7db5f9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1003,9 +1003,8 @@ struct msr_bitmap_range { /* Xen emulation context */ struct kvm_xen { bool long_mode; - bool shinfo_set; u8 upcall_vector; - struct gfn_to_hva_cache shinfo_cache; + gfn_t shinfo_gfn; }; enum kvm_irqchip_mode { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 348452bb16bc..3cedc7cc132a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11162,6 +11162,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_hv_init_vm(kvm); kvm_page_track_init(kvm); kvm_mmu_init_vm(kvm); + kvm_xen_init_vm(kvm); return static_call(kvm_x86_vm_init)(kvm); } diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index ae17250e1efe..9ea9c3dabe37 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -25,15 +25,14 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) { gpa_t gpa = gfn_to_gpa(gfn); int wc_ofs, sec_hi_ofs; - int ret; + int ret = 0; int idx = srcu_read_lock(&kvm->srcu); - ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache, - gpa, PAGE_SIZE); - if (ret) + if (kvm_is_error_hva(gfn_to_hva(kvm, gfn))) { + ret = -EFAULT; goto out; - - kvm->arch.xen.shinfo_set = true; + } + kvm->arch.xen.shinfo_gfn = gfn; /* Paranoia checks on the 32-bit struct layout */ BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); @@ -245,7 +244,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) case KVM_XEN_ATTR_TYPE_SHARED_INFO: if (data->u.shared_info.gfn == GPA_INVALID) { - kvm->arch.xen.shinfo_set = false; + kvm->arch.xen.shinfo_gfn = GPA_INVALID; r = 0; break; } @@ -283,10 +282,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - if (kvm->arch.xen.shinfo_set) - data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); - else - data->u.shared_info.gfn = GPA_INVALID; + data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn); r = 0; break; @@ -646,6 +642,11 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) return 0; } +void kvm_xen_init_vm(struct kvm *kvm) +{ + kvm->arch.xen.shinfo_gfn = GPA_INVALID; +} + void kvm_xen_destroy_vm(struct kvm *kvm) { if (kvm->arch.xen_hvm_config.msr) diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 463a7844a8ca..cc0cf5f37450 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -21,6 +21,7 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data); int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); +void kvm_xen_init_vm(struct kvm *kvm); void kvm_xen_destroy_vm(struct kvm *kvm); static inline bool kvm_xen_msr_enabled(struct kvm *kvm) @@ -50,6 +51,10 @@ static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) return 1; } +static inline void kvm_xen_init_vm(struct kvm *kvm) +{ +} + static inline void kvm_xen_destroy_vm(struct kvm *kvm) { } -- cgit From 730d070ae9f12fff5d44fe8fb0547ae37d100da8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 3 Aug 2021 16:15:45 +0200 Subject: MIPS: Replace deprecated CPU-hotplug functions. The functions get_online_cpus() and put_online_cpus() have been deprecated during the CPU hotplug rework. They map directly to cpus_read_lock() and cpus_read_unlock(). Replace deprecated CPU-hotplug functions with the official version. The behavior remains unchanged. Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/mips-mt-fpaff.c | 10 +++++----- arch/mips/kernel/process.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 6c590ef27648..67e130d3f038 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -76,13 +76,13 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask))) return -EFAULT; - get_online_cpus(); + cpus_read_lock(); rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); - put_online_cpus(); + cpus_read_unlock(); return -ESRCH; } @@ -147,7 +147,7 @@ out_free_cpus_allowed: free_cpumask_var(cpus_allowed); out_put_task: put_task_struct(p); - put_online_cpus(); + cpus_read_unlock(); return retval; } @@ -166,7 +166,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (len < real_len) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); rcu_read_lock(); retval = -ESRCH; @@ -182,7 +182,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, out_unlock: rcu_read_unlock(); - put_online_cpus(); + cpus_read_unlock(); if (retval) return retval; if (copy_to_user(user_mask_ptr, &mask, real_len)) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 73c8e7990a97..95aa86fa6077 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -859,10 +859,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) * scheduled in then it will already have picked up the new FP mode * whilst doing so. */ - get_online_cpus(); + cpus_read_lock(); for_each_cpu_and(cpu, &process_cpus, cpu_online_mask) work_on_cpu(cpu, prepare_for_fp_mode_switch, NULL); - put_online_cpus(); + cpus_read_unlock(); return 0; } -- cgit From ad548993a66c498267695edd8b19a682be0e3a8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Aug 2021 17:02:16 -0700 Subject: MIPS: loongson2ef: don't build serial.o unconditionally LOONGSON_UART_BASE depends on EARLY_PRINTK || SERIAL_8250, but when neither of these Kconfig symbols is set, the kernel build has errors: ../arch/mips/loongson2ef/common/serial.c: In function 'serial_init': ../arch/mips/loongson2ef/common/serial.c:66:25: error: 'loongson_uart_base' undeclared (first use in this function) 66 | loongson_uart_base; ../arch/mips/loongson2ef/common/serial.c:66:25: note: each undeclared identifier is reported only once for each function it appears in ../arch/mips/loongson2ef/common/serial.c:68:41: error: '_loongson_uart_base' undeclared (first use in this function) 68 | (void __iomem *)_loongson_uart_base; Fix this by building serial.o only when one (or both) of these Kconfig symbols is enabled. Tested with: (a) EARLY_PRINTK=y, SERIAL_8250 not set; (b) EARLY_PRINTK=y, SERIAL_8250=y; (c) EARLY_PRINTK=y, SERIAL_8250=m. (d) EARLY_PRINTK not set, SERIAL_8250=y; (e) EARLY_PRINTK not set, SERIAL_8250=m; (f) EARLY_PRINTK not set, SERIAL_8250 not set. Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Jiaxun Yang Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson2ef/common/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/loongson2ef/common/Makefile b/arch/mips/loongson2ef/common/Makefile index d5ab3e543ea3..30ea8b5ca685 100644 --- a/arch/mips/loongson2ef/common/Makefile +++ b/arch/mips/loongson2ef/common/Makefile @@ -4,12 +4,14 @@ # obj-y += setup.o init.o env.o time.o reset.o irq.o \ - bonito-irq.o mem.o machtype.o platform.o serial.o + bonito-irq.o mem.o machtype.o platform.o obj-$(CONFIG_PCI) += pci.o # # Serial port support # +obj-$(CONFIG_LOONGSON_UART_BASE) += serial.o +obj-$(CONFIG_EARLY_PRINTK) += serial.o obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o obj-$(CONFIG_LOONGSON_MC146818) += rtc.o -- cgit From cb95ea79b3fc772c5873a7a4532ab4c14a455da2 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Thu, 29 Jul 2021 17:31:52 +0800 Subject: MIPS: locking/atomic: Fix atomic{_64,}_sub_if_positive This looks like a typo and that caused atomic64 test failed. Signed-off-by: Rui Wang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 95e1f7f3597f..a0b9e7c1e4fc 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -206,7 +206,7 @@ ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd) * The function returns the old value of @v minus @i. */ #define ATOMIC_SIP_OP(pfx, type, op, ll, sc) \ -static __inline__ int arch_##pfx##_sub_if_positive(type i, pfx##_t * v) \ +static __inline__ type arch_##pfx##_sub_if_positive(type i, pfx##_t * v) \ { \ type temp, result; \ \ -- cgit From 87689270b10fa9e6fac7242233b355cb6792b845 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 4 Aug 2021 22:28:38 +0000 Subject: KVM: Rename lru_slot to last_used_slot lru_slot is used to keep track of the index of the most-recently used memslot. The correct acronym would be "mru" but that is not a common acronym. So call it last_used_slot which is a bit more obvious. Suggested-by: Paolo Bonzini Signed-off-by: David Matlack Message-Id: <20210804222844.1419481-2-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/s390/kvm/kvm-s390.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4527ac7b5961..02574d7b3612 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1953,7 +1953,7 @@ out: static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) { int start = 0, end = slots->used_slots; - int slot = atomic_read(&slots->lru_slot); + int slot = atomic_read(&slots->last_used_slot); struct kvm_memory_slot *memslots = slots->memslots; if (gfn >= memslots[slot].base_gfn && @@ -1974,7 +1974,7 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) if (gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { - atomic_set(&slots->lru_slot, start); + atomic_set(&slots->last_used_slot, start); } return start; -- cgit From 0f22af940dc8ec4f437189096a5f8677995323b0 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 4 Aug 2021 22:28:39 +0000 Subject: KVM: Move last_used_slot logic out of search_memslots Make search_memslots unconditionally search all memslots and move the last_used_slot logic up one level to __gfn_to_memslot. This is in preparation for introducing a per-vCPU last_used_slot. As part of this change convert existing callers of search_memslots to __gfn_to_memslot to avoid making any functional changes. Signed-off-by: David Matlack Message-Id: <20210804222844.1419481-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_64_vio.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 8da93fdfa59e..6365087f3160 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -346,7 +346,7 @@ static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - memslot = search_memslots(kvm_memslots(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); if (!memslot) return -EINVAL; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index dc6591548f0c..f38dfe195ef2 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -80,7 +80,7 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; - memslot = search_memslots(kvm_memslots_raw(kvm), gfn); + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); if (!memslot) return -EINVAL; -- cgit From 081de470f1e6e83f9f460ba5ae8f57ff07f37692 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 4 Aug 2021 22:28:41 +0000 Subject: KVM: x86/mmu: Leverage vcpu->last_used_slot in tdp_mmu_map_handle_target_level The existing TDP MMU methods to handle dirty logging are vcpu-agnostic since they can be driven by MMU notifiers and other non-vcpu-specific events in addition to page faults. However this means that the TDP MMU is not benefiting from the new vcpu->last_used_slot. Fix that by introducing a tdp_mmu_map_set_spte_atomic() which is only called during a TDP page fault and has access to the kvm_vcpu for fast slot lookups. This improves "Populate memory time" in dirty_log_perf_test by 5%: Command | Before | After ------------------------------- | ---------------- | ------------- ./dirty_log_perf_test -v64 -x64 | 5.472321072s | 5.169832886s Signed-off-by: David Matlack Message-Id: <20210804222844.1419481-5-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 43f12f5d12c0..dab6cb46cdb2 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -542,15 +542,40 @@ static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm, return true; } -static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, - struct tdp_iter *iter, - u64 new_spte) +/* + * tdp_mmu_map_set_spte_atomic - Set a leaf TDP MMU SPTE atomically to resolve a + * TDP page fault. + * + * @vcpu: The vcpu instance that took the TDP page fault. + * @iter: a tdp_iter instance currently on the SPTE that should be set + * @new_spte: The value the SPTE should be set to + * + * Returns: true if the SPTE was set, false if it was not. If false is returned, + * this function will have no side-effects. + */ +static inline bool tdp_mmu_map_set_spte_atomic(struct kvm_vcpu *vcpu, + struct tdp_iter *iter, + u64 new_spte) { + struct kvm *kvm = vcpu->kvm; + if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte)) return false; - handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn, - iter->old_spte, new_spte, iter->level); + /* + * Use kvm_vcpu_gfn_to_memslot() instead of going through + * handle_changed_spte_dirty_log() to leverage vcpu->last_used_slot. + */ + if (is_writable_pte(new_spte)) { + struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, iter->gfn); + + if (slot && kvm_slot_dirty_track_enabled(slot)) { + /* Enforced by kvm_mmu_hugepage_adjust. */ + WARN_ON_ONCE(iter->level > PG_LEVEL_4K); + mark_page_dirty_in_slot(kvm, slot, iter->gfn); + } + } + return true; } @@ -563,7 +588,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, * immediately installing a present entry in its place * before the TLBs are flushed. */ - if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE)) + if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, REMOVED_SPTE)) return false; kvm_flush_remote_tlbs_with_address(kvm, iter->gfn, @@ -931,7 +956,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, if (new_spte == iter->old_spte) ret = RET_PF_SPURIOUS; - else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte)) + else if (!tdp_mmu_map_set_spte_atomic(vcpu, iter, new_spte)) return RET_PF_RETRY; /* @@ -1035,8 +1060,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, new_spte = make_nonleaf_spte(child_pt, !shadow_accessed_mask); - if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, - new_spte)) { + if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) { tdp_mmu_link_page(vcpu->kvm, sp, true, huge_page_disallowed && req_level >= iter.level); -- cgit From 601f8af01e5ae535b45cdb91234887c9fd861ad4 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 4 Aug 2021 22:28:42 +0000 Subject: KVM: x86/mmu: Leverage vcpu->last_used_slot for rmap_add and rmap_recycle rmap_add() and rmap_recycle() both run in the context of the vCPU and thus we can use kvm_vcpu_gfn_to_memslot() to look up the memslot. This enables rmap_add() and rmap_recycle() to take advantage of vcpu->last_used_slot and avoid expensive memslot searching. This change improves the performance of "Populate memory time" in dirty_log_perf_test with tdp_mmu=N. In addition to improving the performance, "Populate memory time" no longer scales with the number of memslots in the VM. Command | Before | After ------------------------------- | ---------------- | ------------- ./dirty_log_perf_test -v64 -x1 | 15.18001570s | 14.99469366s ./dirty_log_perf_test -v64 -x64 | 18.71336392s | 14.98675076s Reviewed-by: Paolo Bonzini Signed-off-by: David Matlack Message-Id: <20210804222844.1419481-6-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 232ced2e7bf8..3a0ae48a26e9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1044,17 +1044,6 @@ static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, return &slot->arch.rmap[level - PG_LEVEL_4K][idx]; } -static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, - struct kvm_mmu_page *sp) -{ - struct kvm_memslots *slots; - struct kvm_memory_slot *slot; - - slots = kvm_memslots_for_spte_role(kvm, sp->role); - slot = __gfn_to_memslot(slots, gfn); - return __gfn_to_rmap(gfn, sp->role.level, slot); -} - static bool rmap_can_add(struct kvm_vcpu *vcpu) { struct kvm_mmu_memory_cache *mc; @@ -1065,24 +1054,39 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu) static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { + struct kvm_memory_slot *slot; struct kvm_mmu_page *sp; struct kvm_rmap_head *rmap_head; sp = sptep_to_sp(spte); kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); - rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot); return pte_list_add(vcpu, spte, rmap_head); } + static void rmap_remove(struct kvm *kvm, u64 *spte) { + struct kvm_memslots *slots; + struct kvm_memory_slot *slot; struct kvm_mmu_page *sp; gfn_t gfn; struct kvm_rmap_head *rmap_head; sp = sptep_to_sp(spte); gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); - rmap_head = gfn_to_rmap(kvm, gfn, sp); + + /* + * Unlike rmap_add and rmap_recycle, rmap_remove does not run in the + * context of a vCPU so have to determine which memslots to use based + * on context information in sp->role. + */ + slots = kvm_memslots_for_spte_role(kvm, sp->role); + + slot = __gfn_to_memslot(slots, gfn); + rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot); + __pte_list_remove(spte, rmap_head); } @@ -1620,12 +1624,13 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { + struct kvm_memory_slot *slot; struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *sp; sp = sptep_to_sp(spte); - - rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot); kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0)); kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn, -- cgit From 93e083d4f4bfe790eb1cdc87103bd6a84be9df75 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 4 Aug 2021 22:28:43 +0000 Subject: KVM: x86/mmu: Rename __gfn_to_rmap to gfn_to_rmap gfn_to_rmap was removed in the previous patch so there is no need to retain the double underscore on __gfn_to_rmap. Reviewed-by: Paolo Bonzini Signed-off-by: David Matlack Message-Id: <20210804222844.1419481-7-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 25 ++++++++++++------------- arch/x86/kvm/mmu/mmu_audit.c | 4 ++-- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 3a0ae48a26e9..964c797dcc46 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1035,8 +1035,8 @@ out: return true; } -static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, - const struct kvm_memory_slot *slot) +static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level, + const struct kvm_memory_slot *slot) { unsigned long idx; @@ -1061,7 +1061,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) sp = sptep_to_sp(spte); kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot); + rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); return pte_list_add(vcpu, spte, rmap_head); } @@ -1085,7 +1085,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); - rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot); + rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); __pte_list_remove(spte, rmap_head); } @@ -1307,8 +1307,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, return; while (mask) { - rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PG_LEVEL_4K, slot); + rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PG_LEVEL_4K, slot); __rmap_write_protect(kvm, rmap_head, false); /* clear the first set bit */ @@ -1340,8 +1340,8 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, return; while (mask) { - rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PG_LEVEL_4K, slot); + rmap_head = gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PG_LEVEL_4K, slot); __rmap_clear_dirty(kvm, rmap_head, slot); /* clear the first set bit */ @@ -1407,7 +1407,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, if (kvm_memslots_have_rmaps(kvm)) { for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { - rmap_head = __gfn_to_rmap(gfn, i, slot); + rmap_head = gfn_to_rmap(gfn, i, slot); write_protected |= __rmap_write_protect(kvm, rmap_head, true); } } @@ -1502,9 +1502,8 @@ rmap_walk_init_level(struct slot_rmap_walk_iterator *iterator, int level) { iterator->level = level; iterator->gfn = iterator->start_gfn; - iterator->rmap = __gfn_to_rmap(iterator->gfn, level, iterator->slot); - iterator->end_rmap = __gfn_to_rmap(iterator->end_gfn, level, - iterator->slot); + iterator->rmap = gfn_to_rmap(iterator->gfn, level, iterator->slot); + iterator->end_rmap = gfn_to_rmap(iterator->end_gfn, level, iterator->slot); } static void @@ -1630,7 +1629,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) sp = sptep_to_sp(spte); slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - rmap_head = __gfn_to_rmap(gfn, sp->role.level, slot); + rmap_head = gfn_to_rmap(gfn, sp->role.level, slot); kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, __pte(0)); kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn, diff --git a/arch/x86/kvm/mmu/mmu_audit.c b/arch/x86/kvm/mmu/mmu_audit.c index cedc17b2f60e..9e7dcf999f08 100644 --- a/arch/x86/kvm/mmu/mmu_audit.c +++ b/arch/x86/kvm/mmu/mmu_audit.c @@ -147,7 +147,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) return; } - rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); + rmap_head = gfn_to_rmap(gfn, rev_sp->role.level, slot); if (!rmap_head->val) { if (!__ratelimit(&ratelimit_state)) return; @@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); - rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); + rmap_head = gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); for_each_rmap_spte(rmap_head, &iter, sptep) { if (is_writable_pte(*sptep)) -- cgit From ca33d26ac640bdf74e5fd0fcd711dbc5ab9f36c0 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:23 -0600 Subject: alpha: return error code from alpha_pci_map_sg() The .map_sg() op now expects an error code instead of zero on failure. pci_map_single_1() can fail for different reasons, but since the only supported type of error return is DMA_MAPPING_ERROR, we coalesce those errors into EIO. ENOMEM is returned when no page tables can be allocated. Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Christoph Hellwig --- arch/alpha/kernel/pci_iommu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 35d7b3096d6e..21f9ac101324 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -649,7 +649,9 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, sg->dma_address = pci_map_single_1(pdev, SG_ENT_VIRT_ADDRESS(sg), sg->length, dac_allowed); - return sg->dma_address != DMA_MAPPING_ERROR; + if (sg->dma_address == DMA_MAPPING_ERROR) + return -EIO; + return 1; } start = sg; @@ -685,8 +687,10 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, if (out < end) out->dma_length = 0; - if (out - start == 0) + if (out - start == 0) { printk(KERN_WARNING "pci_map_sg failed: no entries?\n"); + return -ENOMEM; + } DBGA("pci_map_sg: %ld entries\n", out - start); return out - start; @@ -699,7 +703,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, entries. Unmap them now. */ if (out > start) pci_unmap_sg(pdev, start, out - start, dir); - return 0; + return -ENOMEM; } /* Unmap a set of streaming mode DMA translations. Again, cpu read -- cgit From 6506932b3268001ff15c081aceb47c7f9aa9fb2d Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:24 -0600 Subject: ARM/dma-mapping: return error code from .map_sg() ops The .map_sg() op now expects an error code instead of zero on failure. In the case of a DMA_MAPPING_ERROR, -EIO is returned. Otherwise, -ENOMEM or -EINVAL is returned depending on the error from __map_sg_chunk(). Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: Russell King Cc: Thomas Bogendoerfer Signed-off-by: Christoph Hellwig --- arch/arm/mm/dma-mapping.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c4b8df2ad328..113b9cb3701b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -980,7 +980,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, { const struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; - int i, j; + int i, j, ret; for_each_sg(sg, s, nents, i) { #ifdef CONFIG_NEED_SG_DMA_LENGTH @@ -988,15 +988,17 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, #endif s->dma_address = ops->map_page(dev, sg_page(s), s->offset, s->length, dir, attrs); - if (dma_mapping_error(dev, s->dma_address)) + if (dma_mapping_error(dev, s->dma_address)) { + ret = -EIO; goto bad_mapping; + } } return nents; bad_mapping: for_each_sg(sg, s, i, j) ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); - return 0; + return ret; } /** @@ -1622,7 +1624,7 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, bool is_coherent) { struct scatterlist *s = sg, *dma = sg, *start = sg; - int i, count = 0; + int i, count = 0, ret; unsigned int offset = s->offset; unsigned int size = s->offset + s->length; unsigned int max = dma_get_max_seg_size(dev); @@ -1634,8 +1636,10 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { - if (__map_sg_chunk(dev, start, size, &dma->dma_address, - dir, attrs, is_coherent) < 0) + ret = __map_sg_chunk(dev, start, size, + &dma->dma_address, dir, attrs, + is_coherent); + if (ret < 0) goto bad_mapping; dma->dma_address += offset; @@ -1648,8 +1652,9 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, } size += s->length; } - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, - is_coherent) < 0) + ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent); + if (ret < 0) goto bad_mapping; dma->dma_address += offset; @@ -1660,7 +1665,9 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, bad_mapping: for_each_sg(sg, s, count, i) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - return 0; + if (ret == -ENOMEM) + return ret; + return -EINVAL; } /** -- cgit From 9cf88ec5e0e876c959389b1e695438f98663b92d Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 29 Jul 2021 14:15:25 -0600 Subject: ARM/dma-mapping: don't set failed sg dma_address to DMA_MAPPING_ERROR Setting the ->dma_address to DMA_MAPPING_ERROR is not part of the ->map_sg calling convention, so remove it. Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/ Suggested-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Cc: Russell King Cc: Thomas Bogendoerfer Signed-off-by: Christoph Hellwig --- arch/arm/mm/dma-mapping.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 113b9cb3701b..4b61541853ea 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1632,7 +1632,6 @@ static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, for (i = 1; i < nents; i++) { s = sg_next(s); - s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { -- cgit From 62af5ca50c29a9be266656dfb88db6d439d129a1 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:26 -0600 Subject: ia64/sba_iommu: return error code from sba_map_sg_attrs() The .map_sg() op now expects an error code instead of zero on failure. In the case of a dma_mapping_error() return -EIO as the actual cause is opaque here. sba_coalesce_chunks() may only presently fail if sba_alloc_range() fails, which in turn only fails if the iommu is out of mapping resources, hence a -ENOMEM is used in that case. Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: Michael Ellerman Cc: Niklas Schnelle Cc: Thomas Bogendoerfer Signed-off-by: Christoph Hellwig --- arch/ia64/hp/common/sba_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 9148ddbf02e5..8ad6946521d8 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1459,7 +1459,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, sglist->dma_address = sba_map_page(dev, sg_page(sglist), sglist->offset, sglist->length, dir, attrs); if (dma_mapping_error(dev, sglist->dma_address)) - return 0; + return -EIO; return 1; } @@ -1486,7 +1486,7 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents); if (coalesced < 0) { sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs); - return 0; + return -ENOMEM; } /* -- cgit From af82fe85665d49bdb72dba66cd57e3bf95b71895 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:27 -0600 Subject: MIPS/jazzdma: return error code from jazz_dma_map_sg() The .map_sg() op now expects an error code instead of zero on failure. vdma_alloc() may fail for different reasons, but since it only supports indicating an error via a return of DMA_MAPPING_ERROR, we coalesce the different reasons into -EIO as is documented on dma_map_sgtable(). Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: Thomas Bogendoerfer Signed-off-by: Christoph Hellwig --- arch/mips/jazz/jazzdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 461457b28982..eabddb89d221 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -552,7 +552,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist, dir); sg->dma_address = vdma_alloc(sg_phys(sg), sg->length); if (sg->dma_address == DMA_MAPPING_ERROR) - return 0; + return -EIO; sg_dma_len(sg) = sg->length; } -- cgit From c4e0e892ab0579d4de3ca6d260d537fc16b39790 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:28 -0600 Subject: powerpc/iommu: return error code from .map_sg() ops The .map_sg() op now expects an error code instead of zero on failure. Propagate the error up if vio_dma_iommu_map_sg() fails. ppc_iommu_map_sg() may fail either because of iommu_range_alloc() or because of tbl->it_ops->set(). The former only supports returning an error with DMA_MAPPING_ERROR and an examination of the latter indicates that it may return arch-specific errors (for example, tce_buildmulti_pSeriesLP()). Hence, coalesce all of those errors into -EIO, per the documentation on dma_map_sgtable(). Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geoff Levand Signed-off-by: Christoph Hellwig --- arch/powerpc/kernel/iommu.c | 4 ++-- arch/powerpc/platforms/ps3/system-bus.c | 2 +- arch/powerpc/platforms/pseries/vio.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 2af89a5e379f..a8ec4fe42817 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -473,7 +473,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, BUG_ON(direction == DMA_NONE); if ((nelems == 0) || !tbl) - return 0; + return -EINVAL; outs = s = segstart = &sglist[0]; outcount = 1; @@ -599,7 +599,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s == outs) break; } - return 0; + return -EIO; } diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 1a5665875165..c54eb46f0cfb 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -663,7 +663,7 @@ static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg, unsigned long attrs) { BUG(); - return 0; + return -EINVAL; } static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg, diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index e00f3725ec96..e31e59c54f30 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -560,7 +560,8 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, sgl, nelems, count) alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); - if (vio_cmo_alloc(viodev, alloc_size)) + ret = vio_cmo_alloc(viodev, alloc_size); + if (ret) goto out_fail; ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev), direction, attrs); @@ -577,7 +578,7 @@ out_deallocate: vio_cmo_dealloc(viodev, alloc_size); out_fail: atomic_inc(&viodev->cmo.allocs_failed); - return 0; + return ret; } static void vio_dma_iommu_unmap_sg(struct device *dev, -- cgit From eb86ef3b2d7e06b1447d4979e282977a845b611b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 29 Jul 2021 14:15:29 -0600 Subject: powerpc/iommu: don't set failed sg dma_address to DMA_MAPPING_ERROR Setting the ->dma_address to DMA_MAPPING_ERROR is not part of the ->map_sg calling convention, so remove it. Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/ Suggested-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geoff Levand Signed-off-by: Christoph Hellwig --- arch/powerpc/kernel/iommu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a8ec4fe42817..30b7736f0896 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -575,7 +575,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, */ if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = DMA_MAPPING_ERROR; outs->dma_length = 0; } @@ -593,7 +592,6 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages = iommu_num_pages(s->dma_address, s->dma_length, IOMMU_PAGE_SIZE(tbl)); __iommu_free(tbl, vaddr, npages); - s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) -- cgit From 911ace0ba628424637e42826c8a8b63b89bd5611 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:30 -0600 Subject: s390/pci: return error code from s390_dma_map_sg() The .map_sg() op now expects an error code instead of zero on failure. So propagate the error from __s390_dma_map_sg() up. __s390_dma_map_sg() returns either -ENOMEM on allocation failure or -EINVAL which is the same as what's expected by dma_map_sgtable(). Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Acked-by: Niklas Schnelle Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Christoph Hellwig --- arch/s390/pci/pci_dma.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index ebc9a49523aa..c78b02012764 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -487,7 +487,7 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, unsigned int max = dma_get_max_seg_size(dev); unsigned int size = s->offset + s->length; unsigned int offset = s->offset; - int count = 0, i; + int count = 0, i, ret; for (i = 1; i < nr_elements; i++) { s = sg_next(s); @@ -497,8 +497,9 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { - if (__s390_dma_map_sg(dev, start, size, - &dma->dma_address, dir)) + ret = __s390_dma_map_sg(dev, start, size, + &dma->dma_address, dir); + if (ret) goto unmap; dma->dma_address += offset; @@ -511,7 +512,8 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, } size += s->length; } - if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir)) + ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir); + if (ret) goto unmap; dma->dma_address += offset; @@ -523,7 +525,7 @@ unmap: s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); - return 0; + return ret; } static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, -- cgit From 7e4e7d4c54ecc9986041aae84e04280413f2435b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 29 Jul 2021 14:15:31 -0600 Subject: s390/pci: don't set failed sg dma_address to DMA_MAPPING_ERROR Setting the ->dma_address to DMA_MAPPING_ERROR is not part of the ->map_sg calling convention, so remove it. Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/ Suggested-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Cc: Niklas Schnelle Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Signed-off-by: Christoph Hellwig --- arch/s390/pci/pci_dma.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index c78b02012764..be48e5b5bfcf 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -492,7 +492,6 @@ static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, for (i = 1; i < nr_elements; i++) { s = sg_next(s); - s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; if (s->offset || (size & ~PAGE_MASK) || -- cgit From e02373fddb0ea86ce3962bb5d38e20ea0b6c8a05 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:32 -0600 Subject: sparc/iommu: return error codes from .map_sg() ops The .map_sg() op now expects an error code instead of zero on failure. Returning an errno from __sbus_iommu_map_sg() results in sbus_iommu_map_sg_gflush() and sbus_iommu_map_sg_pflush() returning an errno, as those functions are wrappers around __sbus_iommu_map_sg(). Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: "David S. Miller" Cc: Niklas Schnelle Cc: Michael Ellerman Signed-off-by: Christoph Hellwig --- arch/sparc/kernel/iommu.c | 4 ++-- arch/sparc/kernel/pci_sun4v.c | 4 ++-- arch/sparc/mm/iommu.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index a034f571d869..0589acd34201 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -448,7 +448,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, iommu = dev->archdata.iommu; strbuf = dev->archdata.stc; if (nelems == 0 || !iommu) - return 0; + return -EINVAL; spin_lock_irqsave(&iommu->lock, flags); @@ -580,7 +580,7 @@ iommu_map_failed: } spin_unlock_irqrestore(&iommu->lock, flags); - return 0; + return -EINVAL; } /* If contexts are being used, they are the same in all of the mappings diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 9de57e88f7a1..d90e80fa5705 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -486,7 +486,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, iommu = dev->archdata.iommu; if (nelems == 0 || !iommu) - return 0; + return -EINVAL; atu = iommu->atu; prot = HV_PCI_MAP_ATTR_READ; @@ -619,7 +619,7 @@ iommu_map_failed: } local_irq_restore(flags); - return 0; + return -EINVAL; } static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 0c0342e5b10d..9e3f6933ca13 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -256,7 +256,7 @@ static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl, sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg), sg->offset, sg->length, per_page_flush); if (sg->dma_address == DMA_MAPPING_ERROR) - return 0; + return -EIO; sg->dma_length = sg->length; } -- cgit From ba3a0482db879f33863348b81ed9770888d727a9 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 29 Jul 2021 14:15:33 -0600 Subject: sparc/iommu: don't set failed sg dma_address to DMA_MAPPING_ERROR Setting the ->dma_address to DMA_MAPPING_ERROR is not part of the ->map_sg calling convention, so remove it. Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/ Suggested-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Cc: "David S. Miller" Cc: Niklas Schnelle Cc: Michael Ellerman Signed-off-by: Christoph Hellwig --- arch/sparc/kernel/iommu.c | 2 -- arch/sparc/kernel/pci_sun4v.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 0589acd34201..da0363692528 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -546,7 +546,6 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = DMA_MAPPING_ERROR; outs->dma_length = 0; } @@ -572,7 +571,6 @@ iommu_map_failed: iommu_tbl_range_free(&iommu->tbl, vaddr, npages, IOMMU_ERROR_CODE); - s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d90e80fa5705..384480971805 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -594,7 +594,6 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, if (outcount < incount) { outs = sg_next(outs); - outs->dma_address = DMA_MAPPING_ERROR; outs->dma_length = 0; } @@ -611,7 +610,6 @@ iommu_map_failed: iommu_tbl_range_free(tbl, vaddr, npages, IOMMU_ERROR_CODE); /* XXX demap? XXX */ - s->dma_address = DMA_MAPPING_ERROR; s->dma_length = 0; } if (s == outs) -- cgit From fcacc8a614391d3bf009319509c846fef47d9352 Mon Sep 17 00:00:00 2001 From: Martin Oliveira Date: Thu, 29 Jul 2021 14:15:36 -0600 Subject: x86/amd_gart: return error code from gart_map_sg() The .map_sg() op now expects an error code instead of zero on failure. So make __dma_map_cont() return a valid errno (which is then propagated to gart_map_sg() via dma_map_cont()) and return it in case of failure. Also, return -EINVAL in case of invalid nents. Signed-off-by: Martin Oliveira Signed-off-by: Logan Gunthorpe Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Niklas Schnelle Cc: Thomas Bogendoerfer Cc: Michael Ellerman Signed-off-by: Christoph Hellwig --- arch/x86/kernel/amd_gart_64.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 9ac696487b13..46aea9a4f26b 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -331,7 +331,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int i; if (iommu_start == -1) - return -1; + return -ENOMEM; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; @@ -380,13 +380,13 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *s, *ps, *start_sg, *sgmap; - int need = 0, nextneed, i, out, start; + int need = 0, nextneed, i, out, start, ret; unsigned long pages = 0; unsigned int seg_size; unsigned int max_seg_size; if (nents == 0) - return 0; + return -EINVAL; out = 0; start = 0; @@ -414,8 +414,9 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (!iommu_merge || !nextneed || !need || s->offset || (s->length + seg_size > max_seg_size) || (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(dev, start_sg, i - start, - sgmap, pages, need) < 0) + ret = dma_map_cont(dev, start_sg, i - start, + sgmap, pages, need); + if (ret < 0) goto error; out++; @@ -432,7 +433,8 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE); ps = s; } - if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) + ret = dma_map_cont(dev, start_sg, i - start, sgmap, pages, need); + if (ret < 0) goto error; out++; flush_gart(); @@ -458,7 +460,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) s->dma_address = DMA_MAPPING_ERROR; - return 0; + return ret; } /* allocate and map a coherent mapping */ -- cgit From 183dc86335e69f516b94f169afd784cf97cb8421 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 29 Jul 2021 14:15:37 -0600 Subject: x86/amd_gart: don't set failed sg dma_address to DMA_MAPPING_ERROR Setting the ->dma_address to DMA_MAPPING_ERROR is not part of the ->map_sg calling convention, so remove it. Link: https://lore.kernel.org/linux-mips/20210716063241.GC13345@lst.de/ Suggested-by: Christoph Hellwig Signed-off-by: Logan Gunthorpe Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Niklas Schnelle Cc: Thomas Bogendoerfer Cc: Michael Ellerman Signed-off-by: Christoph Hellwig --- arch/x86/kernel/amd_gart_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 46aea9a4f26b..ed837383de5c 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -458,8 +458,6 @@ error: panic("dma_map_sg: overflow on %lu pages\n", pages); iommu_full(dev, pages << PAGE_SHIFT, dir); - for_each_sg(sg, s, nents, i) - s->dma_address = DMA_MAPPING_ERROR; return ret; } -- cgit From d8285639550578a1bf2d102391d1a9e08e0586ca Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 25 Jul 2021 03:35:56 +0900 Subject: kbuild: do not require sub-make for separate output tree builds As explained in commit 3204a7fb98a3 ("kbuild: prefix $(srctree)/ to some included Makefiles"), I want to stop using --include-dir some day. I already fixed up the top Makefile, but some arch Makefiles (mips, um, x86) still include check-in Makefiles without $(srctree)/. Fix them up so 'need-sub-make := 1' can go away for this case. Signed-off-by: Masahiro Yamada --- arch/mips/Makefile | 2 +- arch/um/Makefile | 6 +++--- arch/x86/Makefile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 653befc1b176..5fd26d514851 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -254,7 +254,7 @@ endif # # Board-dependent options and extra files # -include arch/mips/Kbuild.platforms +include $(srctree)/arch/mips/Kbuild.platforms ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) diff --git a/arch/um/Makefile b/arch/um/Makefile index 12a7acef0357..f2fe63bfd819 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -41,8 +41,8 @@ endif HOST_DIR := arch/$(HEADER_ARCH) -include $(ARCH_DIR)/Makefile-skas -include $(HOST_DIR)/Makefile.um +include $(srctree)/$(ARCH_DIR)/Makefile-skas +include $(srctree)/$(HOST_DIR)/Makefile.um core-y += $(HOST_DIR)/um/ @@ -76,7 +76,7 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ #This will adjust *FLAGS accordingly to the platform. -include $(ARCH_DIR)/Makefile-os-$(OS) +include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \ -I$(srctree)/$(HOST_DIR)/include/uapi \ diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 307fd0000a83..0fa7dc73b5d8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -75,7 +75,7 @@ ifeq ($(CONFIG_X86_32),y) KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4)) # CPU-specific tuning. Anything which can be shared with UML should go here. - include arch/x86/Makefile_32.cpu + include $(srctree)/arch/x86/Makefile_32.cpu KBUILD_CFLAGS += $(cflags-y) # temporary until string.h is fixed -- cgit From 52cc02b910284d6bddba46cce402044ab775f314 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 6 Aug 2021 00:01:02 +0900 Subject: kbuild: check CONFIG_AS_IS_LLVM instead of LLVM_IAS LLVM_IAS is the user interface to set the -(no-)integrated-as flag, and it should be used only for that purpose. LLVM_IAS is checked in some places to determine the assembler type, but it is not precise. For example, $ make CC=gcc LLVM_IAS=1 ... will use the GNU assembler (i.e. binutils) since LLVM_IAS=1 is effective only when $(CC) is clang. Of course, 'CC=gcc LLVM_IAS=1' is an odd combination, but the build system can be more robust against such insane input. Commit ba64beb17493a ("kbuild: check the minimum assembler version in Kconfig") introduced CONFIG_AS_IS_GNU/LLVM, which is more precise because Kconfig checks the version string from the assembler in use. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor --- arch/riscv/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index bc74afdbf31e..dcfbd2a87d41 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -41,7 +41,7 @@ endif ifeq ($(CONFIG_LD_IS_LLD),y) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax -ifneq ($(LLVM_IAS),1) +ifndef CONFIG_AS_IS_LLVM KBUILD_CFLAGS += -Wa,-mno-relax KBUILD_AFLAGS += -Wa,-mno-relax endif -- cgit From c755238d2ce0960ced9ffccc2ce14de2cd01b647 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 11 Jul 2021 23:31:43 +0100 Subject: ARM: 9099/1: crypto: rename 'mod_init' & 'mod_exit' functions to be module-specific Rename module_init & module_exit functions that are named "mod_init" and "mod_exit" so that they are unique in both the System.map file and in initcall_debug output instead of showing up as almost anonymous "mod_init". This is helpful for debugging and in determining how long certain module_init calls take to execute. Signed-off-by: Randy Dunlap Cc: Jason A. Donenfeld Cc: linux-arm-kernel@lists.infradead.org Cc: patches@armlinux.org.uk Signed-off-by: Russell King (Oracle) --- arch/arm/crypto/curve25519-glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c index 31eb75b6002f..9bdafd57888c 100644 --- a/arch/arm/crypto/curve25519-glue.c +++ b/arch/arm/crypto/curve25519-glue.c @@ -112,7 +112,7 @@ static struct kpp_alg curve25519_alg = { .max_size = curve25519_max_size, }; -static int __init mod_init(void) +static int __init arm_curve25519_init(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); @@ -122,14 +122,14 @@ static int __init mod_init(void) return 0; } -static void __exit mod_exit(void) +static void __exit arm_curve25519_exit(void) { if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON) crypto_unregister_kpp(&curve25519_alg); } -module_init(mod_init); -module_exit(mod_exit); +module_init(arm_curve25519_init); +module_exit(arm_curve25519_exit); MODULE_ALIAS_CRYPTO("curve25519"); MODULE_ALIAS_CRYPTO("curve25519-neon"); -- cgit From d7bcc5e22967c96685d03dbbd167e1a1ddf9b910 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 15:03:51 +0100 Subject: ARM: 9102/1: move theinstall rules to arch/arm/Makefile Currently, the (z/u)install targets in arch/arm/Makefile descend into arch/arm/boot/Makefile to invoke the shell script, but there is no good reason to do so. arch/arm/Makefile can run the shell script directly. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King (Oracle) --- arch/arm/Makefile | 3 ++- arch/arm/boot/Makefile | 14 +------------- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 173da685a52e..847c31e7c368 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -308,7 +308,8 @@ $(BOOT_TARGETS): vmlinux @$(kecho) ' Kernel: $(boot)/$@ is ready' $(INSTALL_TARGETS): - $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ + $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" \ + $(boot)/$(patsubst %install,%Image,$@) System.map "$(INSTALL_PATH)" PHONY += vdso_install vdso_install: diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index 0b3cd7a33a26..54a09f9464fb 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -96,23 +96,11 @@ $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE $(obj)/bootpImage: $(obj)/bootp/bootp FORCE $(call if_changed,objcopy) -PHONY += initrd install zinstall uinstall +PHONY += initrd initrd: @test "$(INITRD_PHYS)" != "" || \ (echo This machine does not support INITRD; exit -1) @test "$(INITRD)" != "" || \ (echo You must specify INITRD; exit -1) -install: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ - $(obj)/Image System.map "$(INSTALL_PATH)" - -zinstall: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ - $(obj)/zImage System.map "$(INSTALL_PATH)" - -uinstall: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \ - $(obj)/uImage System.map "$(INSTALL_PATH)" - subdir- := bootp compressed dts -- cgit From 6fec92d9b2bfd2fb1a2a4295dc859d9bab16c8fc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 30 Jul 2021 10:43:02 +0100 Subject: ARM: 9103/1: Drop ARCH_NR_GPIOS definition The conditional by the generic header is the same, hence drop unnecessary duplication. Link: https://lore.kernel.org/r/20210510114107.43006-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/gpio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h index c50e383358c4..f3bb8a2bf788 100644 --- a/arch/arm/include/asm/gpio.h +++ b/arch/arm/include/asm/gpio.h @@ -2,10 +2,6 @@ #ifndef _ARCH_ARM_GPIO_H #define _ARCH_ARM_GPIO_H -#if CONFIG_ARCH_NR_GPIO > 0 -#define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO -#endif - /* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */ #include -- cgit From b30d0289de72c62516df03fdad8d53f552c69839 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Mon, 9 Aug 2021 19:07:30 +0100 Subject: ARM: 9105/1: atags_to_fdt: don't warn about stack size The merge_fdt_bootargs() function by definition consumes more than 1024 bytes of stack because it has a 1024 byte command line on the stack, meaning that we always get a warning when building this file: arch/arm/boot/compressed/atags_to_fdt.c: In function 'merge_fdt_bootargs': arch/arm/boot/compressed/atags_to_fdt.c:98:1: warning: the frame size of 1032 bytes is larger than 1024 bytes [-Wframe-larger-than=] However, as this is the decompressor and we know that it has a very shallow call chain, and we do not actually risk overflowing the kernel stack at runtime here. This just shuts up the warning by disabling the warning flag for this file. Tested on Nexus 7 2012 builds. Acked-by: Nicolas Pitre Signed-off-by: David Heidelberg Signed-off-by: Arnd Bergmann Cc: Signed-off-by: Russell King (Oracle) --- arch/arm/boot/compressed/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 9d91ae1091b0..91265e7ff672 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -85,6 +85,8 @@ compress-$(CONFIG_KERNEL_LZ4) = lz4 libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) +CFLAGS_REMOVE_atags_to_fdt.o += -Wframe-larger-than=${CONFIG_FRAME_WARN} +CFLAGS_atags_to_fdt.o += -Wframe-larger-than=1280 OBJS += $(libfdt_objs) atags_to_fdt.o endif ifeq ($(CONFIG_USE_OF),y) -- cgit From 09ca497528dac12cbbceab8197011c875a96d053 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sun, 27 Jun 2021 17:09:18 +0000 Subject: powerpc: Remove in_kernel_text() Last user of in_kernel_text() stopped using in with commit 549e8152de80 ("powerpc: Make the 64-bit kernel as a position-independent executable"). Generic function is_kernel_text() does the same. So remote it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/2a3a5b6f8cc0ef4e854d7b764f66aa8d2ee270d2.1624813698.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/sections.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 324d7b298ec3..6e4af4492a14 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -38,14 +38,6 @@ extern char start_virt_trampolines[]; extern char end_virt_trampolines[]; #endif -static inline int in_kernel_text(unsigned long addr) -{ - if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end) - return 1; - - return 0; -} - static inline unsigned long kernel_toc_addr(void) { /* Defined by the linker, see vmlinux.lds.S */ -- cgit From c8a6d91005343dea0d53be0ff0620c66934dcd44 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 5 Jul 2021 12:00:50 +0000 Subject: powerpc/non-smp: Unconditionaly call smp_mb() on switch_mm Commit 3ccfebedd8cf ("powerpc, membarrier: Skip memory barrier in switch_mm()") added some logic to skip the smp_mb() in switch_mm_irqs_off() before the call to switch_mmu_context(). However, on non SMP smp_mb() is just a compiler barrier and doing it unconditionaly is simpler than the logic used to check whether the barrier is needed or not. After the patch: 00000000 : ... c: 7c 04 18 40 cmplw r4,r3 10: 81 24 00 24 lwz r9,36(r4) 14: 91 25 04 c8 stw r9,1224(r5) 18: 4d 82 00 20 beqlr 1c: 48 00 00 00 b 1c 1c: R_PPC_REL24 switch_mmu_context Before the patch: 00000000 : ... c: 7c 04 18 40 cmplw r4,r3 10: 81 24 00 24 lwz r9,36(r4) 14: 91 25 04 c8 stw r9,1224(r5) 18: 4d 82 00 20 beqlr 1c: 81 24 00 28 lwz r9,40(r4) 20: 71 29 00 0a andi. r9,r9,10 24: 40 82 00 34 bne 58 28: 48 00 00 00 b 28 28: R_PPC_REL24 switch_mmu_context ... 58: 2c 03 00 00 cmpwi r3,0 5c: 41 82 ff cc beq 28 60: 48 00 00 00 b 60 60: R_PPC_REL24 switch_mmu_context Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e9d501da0c59f60ca767b1b3ea4603fce6d02b9e.1625486440.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/membarrier.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h index 6e20bb5c74ea..de7f79157918 100644 --- a/arch/powerpc/include/asm/membarrier.h +++ b/arch/powerpc/include/asm/membarrier.h @@ -12,7 +12,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev, * when switching from userspace to kernel is not needed after * store to rq->curr. */ - if (likely(!(atomic_read(&next->membarrier_state) & + if (IS_ENABLED(CONFIG_SMP) && + likely(!(atomic_read(&next->membarrier_state) & (MEMBARRIER_STATE_PRIVATE_EXPEDITED | MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev)) return; -- cgit From 9c7248bb8de31f51c693bfa6a6ea53b1c07e0fa8 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Tue, 11 May 2021 09:31:36 +0200 Subject: powerpc/numa: Consider the max NUMA node for migratable LPAR When a LPAR is migratable, we should consider the maximum possible NUMA node instead of the number of NUMA nodes from the actual system. The DT property 'ibm,current-associativity-domains' defines the maximum number of nodes the LPAR can see when running on that box. But if the LPAR is being migrated on another box, it may see up to the nodes defined by 'ibm,max-associativity-domains'. So if a LPAR is migratable, that value should be used. Unfortunately, there is no easy way to know if an LPAR is migratable or not. The hypervisor exports the property 'ibm,migratable-partition' in the case it set to migrate partition, but that would not mean that the current partition is migratable. Without this patch, when a LPAR is started on a 2 node box and then migrated to a 3 node box, the hypervisor may spread the LPAR's CPUs on the 3rd node. In that case if a CPU from that 3rd node is added to the LPAR, it will be wrongly assigned to the node because the kernel has been set to use up to 2 nodes (the configuration of the departure node). With this patch applies, the CPU is correctly added to the 3rd node. Fixes: f9f130ff2ec9 ("powerpc/numa: Detect support for coregroup") Signed-off-by: Laurent Dufour Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210511073136.17795-1-ldufour@linux.ibm.com --- arch/powerpc/mm/numa.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f2bf98bdcea2..094a1076fd1f 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -893,7 +893,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) static void __init find_possible_nodes(void) { struct device_node *rtas; - const __be32 *domains; + const __be32 *domains = NULL; int prop_length, max_nodes; u32 i; @@ -909,9 +909,14 @@ static void __init find_possible_nodes(void) * it doesn't exist, then fallback on ibm,max-associativity-domains. * Current denotes what the platform can support compared to max * which denotes what the Hypervisor can support. + * + * If the LPAR is migratable, new nodes might be activated after a LPM, + * so we should consider the max number in that case. */ - domains = of_get_property(rtas, "ibm,current-associativity-domains", - &prop_length); + if (!of_get_property(of_root, "ibm,migratable-partition", NULL)) + domains = of_get_property(rtas, + "ibm,current-associativity-domains", + &prop_length); if (!domains) { domains = of_get_property(rtas, "ibm,max-associativity-domains", &prop_length); @@ -920,6 +925,8 @@ static void __init find_possible_nodes(void) } max_nodes = of_read_number(&domains[min_common_depth], 1); + pr_info("Partition configured for %d NUMA nodes.\n", max_nodes); + for (i = 0; i < max_nodes; i++) { if (!node_possible(i)) node_set(i, node_possible_map); -- cgit From d144f4d5a8a804133d20ff311d7be70bcdbfaac2 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 17 May 2021 11:06:06 +0200 Subject: pseries/drmem: update LMBs after LPM After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be updated by the hypervisor in the case the NUMA topology of the LPAR's memory is updated. This is handled by the kernel, but the memory's node is not updated because there is no way to move a memory block between nodes from the Linux kernel point of view. If later a memory block is added or removed, drmem_update_dt() is called and it is overwriting the DT node ibm,dynamic-reconfiguration-memory to match the added or removed LMB. But the LMB's associativity node has not been updated after the DT node update and thus the node is overwritten by the Linux's topology instead of the hypervisor one. Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is updated to force an update of the LMB's associativity. However, ignore the call to that hook when the update has been triggered by drmem_update_dt(). Because, in that case, the LMB tree has been used to set the DT property and thus it doesn't need to be updated back. Since drmem_update_dt() is called under the protection of the device_hotplug_lock and the hook is called in the same context, use a simple boolean variable to detect that call. Signed-off-by: Laurent Dufour Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210517090606.56930-1-ldufour@linux.ibm.com --- arch/powerpc/include/asm/drmem.h | 1 + arch/powerpc/mm/drmem.c | 46 +++++++++++++++++++++++++ arch/powerpc/platforms/pseries/hotplug-memory.c | 4 +++ 3 files changed, 51 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index bf2402fed3e0..4265d5e95c2c 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -111,6 +111,7 @@ int drmem_update_dt(void); int __init walk_drmem_lmbs_early(unsigned long node, void *data, int (*func)(struct drmem_lmb *, const __be32 **, void *)); +void drmem_update_lmbs(struct property *prop); #endif static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 9af3832c9d8d..22197b18d85e 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -18,6 +18,7 @@ static int n_root_addr_cells, n_root_size_cells; static struct drmem_lmb_info __drmem_info; struct drmem_lmb_info *drmem_info = &__drmem_info; +static bool in_drmem_update; u64 drmem_lmb_memory_max(void) { @@ -178,6 +179,11 @@ int drmem_update_dt(void) if (!memory) return -1; + /* + * Set in_drmem_update to prevent the notifier callback to process the + * DT property back since the change is coming from the LMB tree. + */ + in_drmem_update = true; prop = of_find_property(memory, "ibm,dynamic-memory", NULL); if (prop) { rc = drmem_update_dt_v1(memory, prop); @@ -186,6 +192,7 @@ int drmem_update_dt(void) if (prop) rc = drmem_update_dt_v2(memory, prop); } + in_drmem_update = false; of_node_put(memory); return rc; @@ -307,6 +314,45 @@ int __init walk_drmem_lmbs_early(unsigned long node, void *data, return ret; } +/* + * Update the LMB associativity index. + */ +static int update_lmb(struct drmem_lmb *updated_lmb, + __maybe_unused const __be32 **usm, + __maybe_unused void *data) +{ + struct drmem_lmb *lmb; + + for_each_drmem_lmb(lmb) { + if (lmb->drc_index != updated_lmb->drc_index) + continue; + + lmb->aa_index = updated_lmb->aa_index; + break; + } + return 0; +} + +/* + * Update the LMB associativity index. + * + * This needs to be called when the hypervisor is updating the + * dynamic-reconfiguration-memory node property. + */ +void drmem_update_lmbs(struct property *prop) +{ + /* + * Don't update the LMBs if triggered by the update done in + * drmem_update_dt(), the LMB values have been used to the update the DT + * property in that case. + */ + if (in_drmem_update) + return; + if (!strcmp(prop->name, "ibm,dynamic-memory")) + __walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb); + else if (!strcmp(prop->name, "ibm,dynamic-memory-v2")) + __walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb); +} #endif static int init_drmem_lmb_size(struct device_node *dn) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 377d852f5a9a..0beb3ca2b549 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -979,6 +979,10 @@ static int pseries_memory_notifier(struct notifier_block *nb, case OF_RECONFIG_DETACH_NODE: err = pseries_remove_mem_node(rd->dn); break; + case OF_RECONFIG_UPDATE_PROPERTY: + if (!strcmp(rd->dn->name, + "ibm,dynamic-reconfiguration-memory")) + drmem_update_lmbs(rd->prop); } return notifier_from_errno(err); } -- cgit From bd1dd4c5f5286df0148b5b316f37c583b8f55fa1 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 29 Apr 2021 19:49:08 +0200 Subject: powerpc/pseries: Prevent free CPU ids being reused on another node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CPU is hot added, the CPU ids are taken from the available mask from the lower possible set. If that set of values was previously used for a CPU attached to a different node, it appears to an application as if these CPUs have migrated from one node to another node which is not expected. To prevent this, it is needed to record the CPU ids used for each node and to not reuse them on another node. However, to prevent CPU hot plug to fail, in the case the CPU ids is starved on a node, the capability to reuse other nodes’ free CPU ids is kept. A warning is displayed in such a case to warn the user. A new CPU bit mask (node_recorded_ids_map) is introduced for each possible node. It is populated with the CPU onlined at boot time, and then when a CPU is hot plugged to a node. The bits in that mask remain when the CPU is hot unplugged, to remind this CPU ids have been used for this node. If no id set was found, a retry is made without removing the ids used on the other nodes to try reusing them. This is the way ids have been allocated prior to this patch. The effect of this patch can be seen by removing and adding CPUs using the Qemu monitor. In the following case, the first CPU from the node 2 is removed, then the first one from the node 1 is removed too. Later, the first CPU of the node 2 is added back. Without that patch, the kernel will number these CPUs using the first CPU ids available which are the ones freed when removing the second CPU of the node 0. This leads to the CPU ids 16-23 to move from the node 1 to the node 2. With the patch applied, the CPU ids 32-39 are used since they are the lowest free ones which have not been used on another node. At boot time: [root@vm40 ~]# numactl -H | grep cpus node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 node 1 cpus: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 2 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 Vanilla kernel, after the CPU hot unplug/plug operations: [root@vm40 ~]# numactl -H | grep cpus node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 node 1 cpus: 24 25 26 27 28 29 30 31 node 2 cpus: 16 17 18 19 20 21 22 23 40 41 42 43 44 45 46 47 Patched kernel, after the CPU hot unplug/plug operations: [root@vm40 ~]# numactl -H | grep cpus node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 node 1 cpus: 24 25 26 27 28 29 30 31 node 2 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 Signed-off-by: Laurent Dufour Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210429174908.16613-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 171 +++++++++++++++++++++------ 1 file changed, 132 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 7e970f81d8ff..e1f224320102 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -39,6 +39,12 @@ /* This version can't take the spinlock, because it never returns */ static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; +/* + * Record the CPU ids used on each nodes. + * Protected by cpu_add_remove_lock. + */ +static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES]; + static void rtas_stop_self(void) { static struct rtas_args args; @@ -139,72 +145,148 @@ static void pseries_cpu_die(unsigned int cpu) paca_ptrs[cpu]->cpu_start = 0; } +/** + * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids. + * @nthreads : the number of threads (cpu ids) + * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any + * node can be peek. + * @cpu_mask: the returned CPU mask. + * + * Returns 0 on success. + */ +static int find_cpu_id_range(unsigned int nthreads, int assigned_node, + cpumask_var_t *cpu_mask) +{ + cpumask_var_t candidate_mask; + unsigned int cpu, node; + int rc = -ENOSPC; + + if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(*cpu_mask); + for (cpu = 0; cpu < nthreads; cpu++) + cpumask_set_cpu(cpu, *cpu_mask); + + BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); + + /* Get a bitmap of unoccupied slots. */ + cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); + + if (assigned_node != NUMA_NO_NODE) { + /* + * Remove free ids previously assigned on the other nodes. We + * can walk only online nodes because once a node became online + * it is not turned offlined back. + */ + for_each_online_node(node) { + if (node == assigned_node) + continue; + cpumask_andnot(candidate_mask, candidate_mask, + node_recorded_ids_map[node]); + } + } + + if (cpumask_empty(candidate_mask)) + goto out; + + while (!cpumask_empty(*cpu_mask)) { + if (cpumask_subset(*cpu_mask, candidate_mask)) + /* Found a range where we can insert the new cpu(s) */ + break; + cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads); + } + + if (!cpumask_empty(*cpu_mask)) + rc = 0; + +out: + free_cpumask_var(candidate_mask); + return rc; +} + /* * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle - * here is that a cpu device node may represent up to two logical cpus + * here is that a cpu device node may represent multiple logical cpus * in the SMT case. We must honor the assumption in other code that * the logical ids for sibling SMT threads x and y are adjacent, such * that x^1 == y and y^1 == x. */ static int pseries_add_processor(struct device_node *np) { - unsigned int cpu; - cpumask_var_t candidate_mask, tmp; - int err = -ENOSPC, len, nthreads, i; + int len, nthreads, node, cpu, assigned_node; + int rc = 0; + cpumask_var_t cpu_mask; const __be32 *intserv; intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); if (!intserv) return 0; - zalloc_cpumask_var(&candidate_mask, GFP_KERNEL); - zalloc_cpumask_var(&tmp, GFP_KERNEL); - nthreads = len / sizeof(u32); - for (i = 0; i < nthreads; i++) - cpumask_set_cpu(i, tmp); - cpu_maps_update_begin(); + if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; - BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); + /* + * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA + * node id the added CPU belongs to. + */ + node = of_node_to_nid(np); + if (node < 0 || !node_possible(node)) + node = first_online_node; - /* Get a bitmap of unoccupied slots. */ - cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); - if (cpumask_empty(candidate_mask)) { - /* If we get here, it most likely means that NR_CPUS is - * less than the partition's max processors setting. + BUG_ON(node == NUMA_NO_NODE); + assigned_node = node; + + cpu_maps_update_begin(); + + rc = find_cpu_id_range(nthreads, node, &cpu_mask); + if (rc && nr_node_ids > 1) { + /* + * Try again, considering the free CPU ids from the other node. */ - printk(KERN_ERR "Cannot add cpu %pOF; this system configuration" - " supports %d logical cpus.\n", np, - num_possible_cpus()); - goto out_unlock; + node = NUMA_NO_NODE; + rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask); } - while (!cpumask_empty(tmp)) - if (cpumask_subset(tmp, candidate_mask)) - /* Found a range where we can insert the new cpu(s) */ - break; - else - cpumask_shift_left(tmp, tmp, nthreads); - - if (cpumask_empty(tmp)) { - printk(KERN_ERR "Unable to find space in cpu_present_mask for" - " processor %pOFn with %d thread(s)\n", np, - nthreads); - goto out_unlock; + if (rc) { + pr_err("Cannot add cpu %pOF; this system configuration" + " supports %d logical cpus.\n", np, num_possible_cpus()); + goto out; } - for_each_cpu(cpu, tmp) { + for_each_cpu(cpu, cpu_mask) { BUG_ON(cpu_present(cpu)); set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++)); } - err = 0; -out_unlock: + + /* Record the newly used CPU ids for the associate node. */ + cpumask_or(node_recorded_ids_map[assigned_node], + node_recorded_ids_map[assigned_node], cpu_mask); + + /* + * If node is set to NUMA_NO_NODE, CPU ids have be reused from + * another node, remove them from its mask. + */ + if (node == NUMA_NO_NODE) { + cpu = cpumask_first(cpu_mask); + pr_warn("Reusing free CPU ids %d-%d from another node\n", + cpu, cpu + nthreads - 1); + for_each_online_node(node) { + if (node == assigned_node) + continue; + cpumask_andnot(node_recorded_ids_map[node], + node_recorded_ids_map[node], + cpu_mask); + } + } + +out: cpu_maps_update_done(); - free_cpumask_var(candidate_mask); - free_cpumask_var(tmp); - return err; + free_cpumask_var(cpu_mask); + return rc; } /* @@ -908,6 +990,7 @@ static struct notifier_block pseries_smp_nb = { static int __init pseries_cpu_hotplug_init(void) { int qcss_tok; + unsigned int node; #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE ppc_md.cpu_probe = dlpar_cpu_probe; @@ -929,8 +1012,18 @@ static int __init pseries_cpu_hotplug_init(void) smp_ops->cpu_die = pseries_cpu_die; /* Processors can be added/removed only on LPAR */ - if (firmware_has_feature(FW_FEATURE_LPAR)) + if (firmware_has_feature(FW_FEATURE_LPAR)) { + for_each_node(node) { + alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]); + + /* Record ids of CPU added at boot time */ + cpumask_or(node_recorded_ids_map[node], + node_recorded_ids_map[node], + node_to_cpumask_map[node]); + } + of_reconfig_notifier_register(&pseries_smp_nb); + } return 0; } -- cgit From c00103abf76fd3916596afd07dd3fdeee0dca15d Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Tue, 3 Aug 2021 16:59:55 +0200 Subject: powerpc/kexec: fix for_each_child.cocci warning for_each_node_by_type should have of_node_put() before return. Generated by: scripts/coccinelle/iterators/for_each_child.cocci Reported-by: kernel test robot Signed-off-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2108031654080.17639@hadrien --- arch/powerpc/kexec/core_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 84618d3c8013..89c069d664a5 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -64,8 +64,10 @@ int default_machine_kexec_prepare(struct kimage *image) begin = image->segment[i].mem; end = begin + image->segment[i].memsz; - if ((begin < high) && (end > low)) + if ((begin < high) && (end > low)) { + of_node_put(node); return -ETXTBSY; + } } } -- cgit From 5ae36401ca4ea2737d779ce7c267444b16530001 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 3 Aug 2021 16:15:46 +0200 Subject: powerpc: Replace deprecated CPU-hotplug functions. The functions get_online_cpus() and put_online_cpus() have been deprecated during the CPU hotplug rework. They map directly to cpus_read_lock() and cpus_read_unlock(). Replace deprecated CPU-hotplug functions with the official version. The behavior remains unchanged. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210803141621.780504-4-bigeasy@linutronix.de --- arch/powerpc/kernel/rtasd.c | 4 ++-- arch/powerpc/kvm/book3s_hv_builtin.c | 10 +++++----- arch/powerpc/platforms/powernv/idle.c | 4 ++-- arch/powerpc/platforms/powernv/opal-imc.c | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 8561dfb33f24..32ee17753eb4 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -429,7 +429,7 @@ static void rtas_event_scan(struct work_struct *w) do_event_scan(); - get_online_cpus(); + cpus_read_lock(); /* raw_ OK because just using CPU as starting point. */ cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); @@ -451,7 +451,7 @@ static void rtas_event_scan(struct work_struct *w) schedule_delayed_work_on(cpu, &event_scan_work, __round_jiffies_relative(event_scan_delay, cpu)); - put_online_cpus(); + cpus_read_unlock(); } #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index be8ef1c5b1bf..fcf4760a3a0e 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -137,23 +137,23 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, * exist in the system. We use a counter of VMs to track this. * * One of the operations we need to block is onlining of secondaries, so we - * protect hv_vm_count with get/put_online_cpus(). + * protect hv_vm_count with cpus_read_lock/unlock(). */ static atomic_t hv_vm_count; void kvm_hv_vm_activated(void) { - get_online_cpus(); + cpus_read_lock(); atomic_inc(&hv_vm_count); - put_online_cpus(); + cpus_read_unlock(); } EXPORT_SYMBOL_GPL(kvm_hv_vm_activated); void kvm_hv_vm_deactivated(void) { - get_online_cpus(); + cpus_read_lock(); atomic_dec(&hv_vm_count); - put_online_cpus(); + cpus_read_unlock(); } EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 528a7e0cf83a..aa27689b832d 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -199,12 +199,12 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, */ power7_fastsleep_workaround_exit = false; - get_online_cpus(); + cpus_read_lock(); primary_thread_mask = cpu_online_cores_map(); on_each_cpu_mask(&primary_thread_mask, pnv_fastsleep_workaround_apply, &err, 1); - put_online_cpus(); + cpus_read_unlock(); if (err) { pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); goto fail; diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 7824cc364bc4..ba02a75c1410 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -186,7 +186,7 @@ static void disable_nest_pmu_counters(void) int nid, cpu; const struct cpumask *l_cpumask; - get_online_cpus(); + cpus_read_lock(); for_each_node_with_cpus(nid) { l_cpumask = cpumask_of_node(nid); cpu = cpumask_first_and(l_cpumask, cpu_online_mask); @@ -195,7 +195,7 @@ static void disable_nest_pmu_counters(void) opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(cpu)); } - put_online_cpus(); + cpus_read_unlock(); } static void disable_core_pmu_counters(void) @@ -203,7 +203,7 @@ static void disable_core_pmu_counters(void) cpumask_t cores_map; int cpu, rc; - get_online_cpus(); + cpus_read_lock(); /* Disable the IMC Core functions */ cores_map = cpu_online_cores_map(); for_each_cpu(cpu, &cores_map) { @@ -213,7 +213,7 @@ static void disable_core_pmu_counters(void) pr_err("%s: Failed to stop Core (cpu = %d)\n", __FUNCTION__, cpu); } - put_online_cpus(); + cpus_read_unlock(); } int get_max_nest_dev(void) -- cgit From 27fd1111051dc218e5b6cb2da5dbb3f342879ff1 Mon Sep 17 00:00:00 2001 From: Jordan Niethe Date: Wed, 4 Aug 2021 11:37:24 +1000 Subject: powerpc: Always inline radix_enabled() to fix build failure This is the same as commit acdad8fb4a15 ("powerpc: Force inlining of mmu_has_feature to fix build failure") but for radix_enabled(). The config in the linked bugzilla causes the following build failure: LD .tmp_vmlinux.kallsyms1 powerpc64-linux-ld: arch/powerpc/mm/pgtable.o: in function `.__ptep_set_access_flags': pgtable.c:(.text+0x17c): undefined reference to `.radix__ptep_set_access_flags' powerpc64-linux-ld: arch/powerpc/mm/pageattr.o: in function `.change_page_attr': pageattr.c:(.text+0xc0): undefined reference to `.radix__flush_tlb_kernel_range' etc. This is due to radix_enabled() not being inlined. See extract from building with -Winline: In file included from arch/powerpc/include/asm/lppaca.h:46, from arch/powerpc/include/asm/paca.h:17, from arch/powerpc/include/asm/current.h:13, from include/linux/thread_info.h:23, from include/asm-generic/preempt.h:5, from ./arch/powerpc/include/generated/asm/preempt.h:1, from include/linux/preempt.h:78, from include/linux/spinlock.h:51, from include/linux/mmzone.h:8, from include/linux/gfp.h:6, from arch/powerpc/mm/pgtable.c:21: arch/powerpc/include/asm/book3s/64/pgtable.h: In function '__ptep_set_access_flags': arch/powerpc/include/asm/mmu.h:327:20: error: inlining failed in call to 'radix_enabled': call is unlikely and code size would grow [-Werror=inline] The code relies on constant folding of MMU_FTRS_POSSIBLE at buildtime and elimination of non possible parts of code at compile time. For this to work radix_enabled() must be inlined so make it __always_inline. Reported-by: Erhard F. Suggested-by: Michael Ellerman Tested-by: Randy Dunlap Signed-off-by: Jordan Niethe [mpe: Trimmed error messages in change log] Signed-off-by: Michael Ellerman Link: https://bugzilla.kernel.org/show_bug.cgi?id=213803 Link: https://lore.kernel.org/r/20210804013724.514468-1-jniethe5@gmail.com --- arch/powerpc/include/asm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 27016b98ecb2..8abe8e42e045 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -324,7 +324,7 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) } #endif /* !CONFIG_DEBUG_VM */ -static inline bool radix_enabled(void) +static __always_inline bool radix_enabled(void) { return mmu_has_feature(MMU_FTR_TYPE_RADIX); } -- cgit From 9b49f979b3d560cb75ea9f1a596baf432d566798 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 5 Aug 2021 11:20:05 +1000 Subject: powerpc/configs: Disable legacy ptys on microwatt defconfig We shouldn't need legacy ptys, and disabling the option improves boot time by about 0.5 seconds. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210805112005.3cb1f412@kryten.localdomain --- arch/powerpc/configs/microwatt_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index a08b739123da..ebc90aefbc0c 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -57,6 +57,7 @@ CONFIG_NETDEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y -- cgit From 2ac78e0c00184a9ba53d507be7549c69a3f566b6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 5 Aug 2021 17:56:49 +1000 Subject: KVM: PPC: Use arch_get_random_seed_long instead of powernv variant The powernv_get_random_long() does not work in nested KVM (which is pseries) and produces a crash when accessing in_be64(rng->regs) in powernv_get_random_long(). This replaces powernv_get_random_long with the ppc_md machine hook wrapper. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210805075649.2086567-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 085fb8ecbf68..9f957ceee58a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1165,7 +1165,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; #endif case H_RANDOM: - if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) + if (!arch_get_random_seed_long(&vcpu->arch.regs.gpr[4])) ret = H_HARDWARE; break; case H_RPT_INVALIDATE: -- cgit From 786e5b102a0007d81579822eac23cb5bfaa0b65f Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:19 +0200 Subject: powerpc/pseries/pci: Introduce __find_pe_total_msi() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will help to size the PCI MSI domain. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-2-clg@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 637300330507..d2d090e04745 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -164,12 +164,12 @@ static int check_req_msix(struct pci_dev *pdev, int nvec) /* Quota calculation */ -static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +static struct device_node *__find_pe_total_msi(struct device_node *node, int *total) { struct device_node *dn; const __be32 *p; - dn = of_node_get(pci_device_to_OF_node(dev)); + dn = of_node_get(node); while (dn) { p = of_get_property(dn, "ibm,pe-total-#msi", NULL); if (p) { @@ -185,6 +185,11 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) return NULL; } +static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +{ + return __find_pe_total_msi(pci_device_to_OF_node(dev), total); +} + static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) { struct device_node *dn; -- cgit From e81202007363bd694b711f307f02320b5f98edaa Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:20 +0200 Subject: powerpc/pseries/pci: Introduce rtas_prepare_msi_irqs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This splits the routine setting the MSIs in two parts: allocation of MSIs for the PCI device at the FW level (RTAS) and the actual mapping and activation of the IRQs. rtas_prepare_msi_irqs() will serve as a handler for the PCI MSI domain. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-3-clg@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index d2d090e04745..4bf14f27e1aa 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -373,12 +373,11 @@ static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0); } -static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) +static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, + msi_alloc_info_t *arg) { struct pci_dn *pdn; - int hwirq, virq, i, quota, rc; - struct msi_desc *entry; - struct msi_msg msg; + int quota, rc; int nvec = nvec_in; int use_32bit_msi_hack = 0; @@ -456,6 +455,22 @@ again: return rc; } + return 0; +} + +static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) +{ + struct pci_dn *pdn; + int hwirq, virq, i; + int rc; + struct msi_desc *entry; + struct msi_msg msg; + + rc = rtas_prepare_msi_irqs(pdev, nvec_in, type, NULL); + if (rc) + return rc; + + pdn = pci_get_pdn(pdev); i = 0; for_each_pci_msi_entry(entry, pdev) { hwirq = rtas_query_irq_number(pdn, i++); -- cgit From 14be098c5387eb93b794f299f3c3e2ddf6038ec7 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:21 +0200 Subject: powerpc/xive: Add support for IRQ domain hierarchy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds handlers to allocate/free IRQs in a domain hierarchy. We could try to use xive_irq_domain_map() in xive_irq_domain_alloc() but we rely on xive_irq_alloc_data() to set the IRQ handler data and duplicating the code is simpler. xive_irq_free_data() needs to be called when IRQ are freed to clear the MMIO mappings and free the XIVE handler data, xive_irq_data structure. This is going to be a problem with MSI domains which we will address later. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-4-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index dbdbbc2f1dc5..420d96deb7b6 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1366,7 +1366,71 @@ static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d, } #endif +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +static int xive_irq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) +{ + return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode), + fwspec->param, fwspec->param_count, + hwirq, type); +} + +static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_fwspec *fwspec = arg; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + int i, rc; + + rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type); + if (rc) + return rc; + + pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + /* TODO: call xive_irq_domain_map() */ + + /* + * Mark interrupts as edge sensitive by default so that resend + * actually works. Will fix that up below if needed. + */ + irq_clear_status_flags(virq, IRQ_LEVEL); + + /* allocates and sets handler data */ + rc = xive_irq_alloc_data(virq + i, hwirq + i); + if (rc) + return rc; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &xive_irq_chip, domain->host_data); + irq_set_handler(virq + i, handle_fasteoi_irq); + } + + return 0; +} + +static void xive_irq_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + int i; + + pr_debug("%s %d #%d\n", __func__, virq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) + xive_irq_free_data(virq + i); +} +#endif + static const struct irq_domain_ops xive_irq_domain_ops = { +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + .alloc = xive_irq_domain_alloc, + .free = xive_irq_domain_free, + .translate = xive_irq_domain_translate, +#endif .match = xive_irq_domain_match, .map = xive_irq_domain_map, .unmap = xive_irq_domain_unmap, -- cgit From 6c2ab2a5d634d4e30445ee5d52d5d1469bf74aa2 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:22 +0200 Subject: powerpc/xive: Ease debugging of xive_irq_set_affinity() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pr_debug() is easier to activate and it helps to know how the kernel configures the HW when tweaking the IRQ subsystem. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-5-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 420d96deb7b6..823f9fe3542a 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -713,7 +713,7 @@ static int xive_irq_set_affinity(struct irq_data *d, u32 target, old_target; int rc = 0; - pr_devel("xive_irq_set_affinity: irq %d\n", d->irq); + pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) @@ -758,7 +758,7 @@ static int xive_irq_set_affinity(struct irq_data *d, return rc; } - pr_devel(" target: 0x%x\n", target); + pr_debug(" target: 0x%x\n", target); xd->target = target; /* Give up previous target */ -- cgit From a5f3d2c17b07e69166b93209f34a5fb8271a6810 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:23 +0200 Subject: powerpc/pseries/pci: Add MSI domains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two IRQ domains are added on top of default machine IRQ domain. First, the top level "pSeries-PCI-MSI" domain deals with the MSI specificities. In this domain, the HW IRQ numbers are generated by the PCI MSI layer, they compose a unique ID for an MSI source with the PCI device identifier and the MSI vector number. These numbers can be quite large on a pSeries machine running under the IBM Hypervisor and /sys/kernel/irq/ and /proc/interrupts will require small fixes to show them correctly. Second domain is the in-the-middle "pSeries-MSI" domain which acts as a proxy between the PCI MSI subsystem and the machine IRQ subsystem. It usually allocate the MSI vector numbers but, on pSeries machines, this is done by the RTAS FW and RTAS returns IRQ numbers in the IRQ number space of the machine. This is why the in-the-middle "pSeries-MSI" domain has the same HW IRQ numbers as its parent domain. Only the XIVE (P9/P10) parent domain is supported for now. We still need to add support for IRQ domain hierarchy under XICS. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-6-clg@kaod.org --- arch/powerpc/include/asm/pci-bridge.h | 5 + arch/powerpc/kernel/pci-common.c | 6 + arch/powerpc/platforms/pseries/msi.c | 185 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/pseries.h | 1 + arch/powerpc/platforms/pseries/setup.c | 2 + 5 files changed, 199 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 74424c14515c..90f488fa4c17 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -126,6 +126,11 @@ struct pci_controller { #endif /* CONFIG_PPC64 */ void *private_data; + + /* IRQ domain hierarchy */ + struct irq_domain *dev_domain; + struct irq_domain *msi_domain; + struct fwnode_handle *fwnode; }; /* These are used for config access before all the PCI probing diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 001e90cd8948..c3573430919d 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -1060,11 +1061,16 @@ void pcibios_bus_add_device(struct pci_dev *dev) int pcibios_add_device(struct pci_dev *dev) { + struct irq_domain *d; + #ifdef CONFIG_PCI_IOV if (ppc_md.pcibios_fixup_sriov) ppc_md.pcibios_fixup_sriov(dev); #endif /* CONFIG_PCI_IOV */ + d = dev_get_msi_domain(&dev->bus->dev); + if (d) + dev_set_msi_domain(&dev->dev, d); return 0; } diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 4bf14f27e1aa..86c6809ebac2 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "pseries.h" @@ -518,6 +519,190 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) return 0; } +static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct msi_desc *desc = first_pci_msi_entry(pdev); + int type = desc->msi_attrib.is_msix ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI; + + return rtas_prepare_msi_irqs(pdev, nvec, type, arg); +} + +static struct msi_domain_ops pseries_pci_msi_domain_ops = { + .msi_prepare = pseries_msi_ops_prepare, +}; + +static void pseries_msi_shutdown(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_shutdown) + d->chip->irq_shutdown(d); +} + +static void pseries_msi_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void pseries_msi_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip pseries_pci_msi_irq_chip = { + .name = "pSeries-PCI-MSI", + .irq_shutdown = pseries_msi_shutdown, + .irq_mask = pseries_msi_mask, + .irq_unmask = pseries_msi_unmask, + .irq_eoi = irq_chip_eoi_parent, +}; + +static struct msi_domain_info pseries_msi_domain_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), + .ops = &pseries_pci_msi_domain_ops, + .chip = &pseries_pci_msi_irq_chip, +}; + +static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); +} + +static struct irq_chip pseries_msi_irq_chip = { + .name = "pSeries-MSI", + .irq_shutdown = pseries_msi_shutdown, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_compose_msi_msg = pseries_msi_compose_msg, +}; + +static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct irq_fwspec parent_fwspec; + int ret; + + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 2; + parent_fwspec.param[0] = hwirq; + parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + + ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); + if (ret) + return ret; + + return 0; +} + +static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct pci_controller *phb = domain->host_data; + msi_alloc_info_t *info = arg; + struct msi_desc *desc = info->desc; + struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + int hwirq; + int i, ret; + + hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_attrib.entry_nr); + if (hwirq < 0) { + dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq); + return hwirq; + } + + dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__, + phb->dn, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i); + if (ret) + goto out; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &pseries_msi_irq_chip, domain->host_data); + } + + return 0; + +out: + /* TODO: handle RTAS cleanup in ->msi_finish() ? */ + irq_domain_free_irqs_parent(domain, virq, i - 1); + return ret; +} + +static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct pci_controller *phb = irq_data_get_irq_chip_data(d); + + pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs); + + irq_domain_free_irqs_parent(domain, virq, nr_irqs); +} + +static const struct irq_domain_ops pseries_irq_domain_ops = { + .alloc = pseries_irq_domain_alloc, + .free = pseries_irq_domain_free, +}; + +static int __pseries_msi_allocate_domains(struct pci_controller *phb, + unsigned int count) +{ + struct irq_domain *parent = irq_get_default_host(); + + phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI", + phb->global_number); + if (!phb->fwnode) + return -ENOMEM; + + phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count, + phb->fwnode, + &pseries_irq_domain_ops, phb); + if (!phb->dev_domain) { + pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + irq_domain_free_fwnode(phb->fwnode); + return -ENOMEM; + } + + phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn), + &pseries_msi_domain_info, + phb->dev_domain); + if (!phb->msi_domain) { + pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + irq_domain_free_fwnode(phb->fwnode); + irq_domain_remove(phb->dev_domain); + return -ENOMEM; + } + + return 0; +} + +int pseries_msi_allocate_domains(struct pci_controller *phb) +{ + int count; + + /* Only supported by the XIVE driver */ + if (!xive_enabled()) + return -ENODEV; + + if (!__find_pe_total_msi(phb->dn, &count)) { + pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + return -ENOSPC; + } + + return __pseries_msi_allocate_domains(phb, count); +} + static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) { /* No LSI -> leave MSIs (if any) configured */ diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 1f051a786fb3..d9280262588b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -85,6 +85,7 @@ struct pci_host_bridge; int pseries_root_bridge_prepare(struct pci_host_bridge *bridge); extern struct pci_controller_ops pseries_pci_controller_ops; +int pseries_msi_allocate_domains(struct pci_controller *phb); unsigned long pseries_memory_block_size(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..35724caf8a83 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -486,6 +486,8 @@ static void __init pSeries_discover_phbs(void) /* create pci_dn's for DT nodes under this PHB */ pci_devs_phb_init_dynamic(phb); + + pseries_msi_allocate_domains(phb); } of_node_put(root); -- cgit From 5690bcae186084a8544b1819f0d89399268bd0cf Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:24 +0200 Subject: powerpc/xive: Drop unmask of MSIs at startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was a workaround in the XIVE domain because of the lack of MSI domain. This is now handled. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-7-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 823f9fe3542a..356f584cc51b 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -616,16 +616,6 @@ static unsigned int xive_irq_startup(struct irq_data *d) pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", d->irq, hw_irq, d); -#ifdef CONFIG_PCI_MSI - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (irq_data_get_msi_desc(d)) - pci_msi_unmask_irq(d); -#endif - /* Pick a target */ target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); if (target == XIVE_INVALID_TARGET) { -- cgit From 292145a6e598c1e6633b8f5f607706b46f552ab9 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:25 +0200 Subject: powerpc/xive: Remove irqd_is_started() check when setting the affinity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the early days of XIVE support, commit cffb717ceb8e ("powerpc/xive: Ensure active irqd when setting affinity") tried to fix an issue related to interrupt migration. If the root cause was related to CPU unplug, it should have been fixed and there is no reason to keep the irqd_is_started() check. This test is also breaking affinity setting of MSIs which can set before starting the associated IRQ. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-8-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 356f584cc51b..0631c97b3a14 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -709,10 +709,6 @@ static int xive_irq_set_affinity(struct irq_data *d, if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) return -EINVAL; - /* Don't do anything if the interrupt isn't started */ - if (!irqd_is_started(d)) - return IRQ_SET_MASK_OK; - /* * If existing target is already in the new mask, and is * online then do nothing. -- cgit From 07817a578a7a79638537480b8847dc7a12f293c5 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:26 +0200 Subject: powerpc/pseries/pci: Add a domain_free_irqs() handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RTAS firmware can not disable one MSI at a time. It's all or nothing. We need a custom free IRQ handler for that. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-9-clg@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 86c6809ebac2..591cee9cbc9e 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -529,8 +529,24 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev return rtas_prepare_msi_irqs(pdev, nvec, type, arg); } +/* + * RTAS can not disable one MSI at a time. It's all or nothing. Do it + * at the end after all IRQs have been freed. + */ +static void pseries_msi_domain_free_irqs(struct irq_domain *domain, + struct device *dev) +{ + if (WARN_ON_ONCE(!dev_is_pci(dev))) + return; + + __msi_domain_free_irqs(domain, dev); + + rtas_disable_msi(to_pci_dev(dev)); +} + static struct msi_domain_ops pseries_pci_msi_domain_ops = { .msi_prepare = pseries_msi_ops_prepare, + .domain_free_irqs = pseries_msi_domain_free_irqs, }; static void pseries_msi_shutdown(struct irq_data *d) -- cgit From 9a014f456881e947bf8cdd8c984a207097e6c096 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:27 +0200 Subject: powerpc/pseries/pci: Add a msi_free() handler to clear XIVE data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MSI domain clears the IRQ with msi_domain_free(), which calls irq_domain_free_irqs_top(), which clears the handler data. This is a problem for the XIVE controller since we need to unmap MMIO pages and free a specific XIVE structure. The 'msi_free()' handler is called before irq_domain_free_irqs_top() when the handler data is still available. Use that to clear the XIVE controller data. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-10-clg@kaod.org --- arch/powerpc/include/asm/xive.h | 1 + arch/powerpc/platforms/pseries/msi.c | 16 +++++++++++++++- arch/powerpc/sysdev/xive/common.c | 5 ++++- 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index aa094a8655b0..20ae50ab083c 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -111,6 +111,7 @@ void xive_native_free_vp_block(u32 vp_base); int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data); void xive_cleanup_irq_data(struct xive_irq_data *xd); +void xive_irq_free_data(unsigned int virq); void xive_native_free_irq(u32 irq); int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 591cee9cbc9e..f9635b01b2bf 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -529,6 +529,19 @@ static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev return rtas_prepare_msi_irqs(pdev, nvec, type, arg); } +/* + * ->msi_free() is called before irq_domain_free_irqs_top() when the + * handler data is still available. Use that to clear the XIVE + * controller data. + */ +static void pseries_msi_ops_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, + unsigned int irq) +{ + if (xive_enabled()) + xive_irq_free_data(irq); +} + /* * RTAS can not disable one MSI at a time. It's all or nothing. Do it * at the end after all IRQs have been freed. @@ -546,6 +559,7 @@ static void pseries_msi_domain_free_irqs(struct irq_domain *domain, static struct msi_domain_ops pseries_pci_msi_domain_ops = { .msi_prepare = pseries_msi_ops_prepare, + .msi_free = pseries_msi_ops_msi_free, .domain_free_irqs = pseries_msi_domain_free_irqs, }; @@ -660,7 +674,7 @@ static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs); - irq_domain_free_irqs_parent(domain, virq, nr_irqs); + /* XIVE domain data is cleared through ->msi_free() */ } static const struct irq_domain_ops pseries_irq_domain_ops = { diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 0631c97b3a14..107f442d3411 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -975,6 +975,8 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { + pr_debug("%s for HW %x\n", __func__, xd->hw_irq); + if (xd->eoi_mmio) { iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) @@ -1016,7 +1018,7 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) return 0; } -static void xive_irq_free_data(unsigned int virq) +void xive_irq_free_data(unsigned int virq) { struct xive_irq_data *xd = irq_get_handler_data(virq); @@ -1026,6 +1028,7 @@ static void xive_irq_free_data(unsigned int virq) xive_cleanup_irq_data(xd); kfree(xd); } +EXPORT_SYMBOL_GPL(xive_irq_free_data); #ifdef CONFIG_SMP -- cgit From 174db9e7f775ce06fc6949c9abbe758b3eb8171c Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:28 +0200 Subject: powerpc/pseries/pci: Add support of MSI domains to PHB hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simply allocate or release the MSI domains when a PHB is inserted in or removed from the machine. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-11-clg@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 10 ++++++++++ arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ arch/powerpc/platforms/pseries/pseries.h | 1 + 3 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index f9635b01b2bf..e2127a3f7ebd 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -733,6 +733,16 @@ int pseries_msi_allocate_domains(struct pci_controller *phb) return __pseries_msi_allocate_domains(phb, count); } +void pseries_msi_free_domains(struct pci_controller *phb) +{ + if (phb->msi_domain) + irq_domain_remove(phb->msi_domain); + if (phb->dev_domain) + irq_domain_remove(phb->dev_domain); + if (phb->fwnode) + irq_domain_free_fwnode(phb->fwnode); +} + static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) { /* No LSI -> leave MSIs (if any) configured */ diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index a8f9140a24fa..90c9d3531694 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -33,6 +33,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pci_devs_phb_init_dynamic(phb); + pseries_msi_allocate_domains(phb); + /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -74,6 +76,8 @@ int remove_phb_dynamic(struct pci_controller *phb) } } + pseries_msi_free_domains(phb); + /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index d9280262588b..3544778e06d0 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -86,6 +86,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge); extern struct pci_controller_ops pseries_pci_controller_ops; int pseries_msi_allocate_domains(struct pci_controller *phb); +void pseries_msi_free_domains(struct pci_controller *phb); unsigned long pseries_memory_block_size(void); -- cgit From 2c50d7e99e39eba92b93210e740f3f9e5a06ba54 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:29 +0200 Subject: powerpc/powernv/pci: Introduce __pnv_pci_ioda_msi_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be used as a 'compose_msg' handler of the MSI domain introduced later. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-12-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7de464679292..2922674cc934 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2016,15 +2016,17 @@ bool is_pnv_opal_msi(struct irq_chip *chip) } EXPORT_SYMBOL_GPL(is_pnv_opal_msi); -static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, - unsigned int hwirq, unsigned int virq, - unsigned int is_64, struct msi_msg *msg) +static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, + unsigned int xive_num, + unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); - unsigned int xive_num = hwirq - phb->msi_base; __be32 data; int rc; + dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__, + is_64 ? "64" : "32", xive_num); + /* No PE assigned ? bail out ... no MSI for you ! */ if (pe == NULL) return -ENXIO; @@ -2072,12 +2074,28 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } msg->data = be32_to_cpu(data); + return 0; +} + +static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, + unsigned int hwirq, unsigned int virq, + unsigned int is_64, struct msi_msg *msg) +{ + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); + unsigned int xive_num = hwirq - phb->msi_base; + int rc; + + rc = __pnv_pci_ioda_msi_setup(phb, dev, xive_num, is_64, msg); + if (rc) + return rc; + + /* P8 only */ pnv_set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," " address=%x_%08x data=%x PE# %x\n", pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, - msg->address_hi, msg->address_lo, data, pe->pe_number); + msg->address_hi, msg->address_lo, msg->data, pe->pe_number); return 0; } -- cgit From 0fcfe2247e75070361af2b6845030cada92cdbf8 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:30 +0200 Subject: powerpc/powernv/pci: Add MSI domains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is very similar to the MSI domains of the pSeries platform. The MSI allocator is directly handled under the Linux PHB in the in-the-middle "PNV-MSI" domain. Only the XIVE (P9/P10) parent domain is supported for now. Support for XICS will come later. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-13-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 188 ++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2922674cc934..d2a17fcb6002 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -2100,6 +2101,189 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return 0; } +/* + * The msi_free() op is called before irq_domain_free_irqs_top() when + * the handler data is still available. Use that to clear the XIVE + * controller. + */ +static void pnv_msi_ops_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, + unsigned int irq) +{ + if (xive_enabled()) + xive_irq_free_data(irq); +} + +static struct msi_domain_ops pnv_pci_msi_domain_ops = { + .msi_free = pnv_msi_ops_msi_free, +}; + +static void pnv_msi_shutdown(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_shutdown) + d->chip->irq_shutdown(d); +} + +static void pnv_msi_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void pnv_msi_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip pnv_pci_msi_irq_chip = { + .name = "PNV-PCI-MSI", + .irq_shutdown = pnv_msi_shutdown, + .irq_mask = pnv_msi_mask, + .irq_unmask = pnv_msi_unmask, + .irq_eoi = irq_chip_eoi_parent, +}; + +static struct msi_domain_info pnv_msi_domain_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), + .ops = &pnv_pci_msi_domain_ops, + .chip = &pnv_pci_msi_irq_chip, +}; + +static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct msi_desc *entry = irq_data_get_msi_desc(d); + struct pci_dev *pdev = msi_desc_to_pci_dev(entry); + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + int rc; + + rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq, + entry->msi_attrib.is_64, msg); + if (rc) + dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n", + entry->msi_attrib.is_64 ? "64" : "32", d->hwirq, rc); +} + +static struct irq_chip pnv_msi_irq_chip = { + .name = "PNV-MSI", + .irq_shutdown = pnv_msi_shutdown, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_compose_msi_msg = pnv_msi_compose_msg, +}; + +static int pnv_irq_parent_domain_alloc(struct irq_domain *domain, + unsigned int virq, int hwirq) +{ + struct irq_fwspec parent_fwspec; + int ret; + + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 2; + parent_fwspec.param[0] = hwirq; + parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + + ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); + if (ret) + return ret; + + return 0; +} + +static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct pci_controller *hose = domain->host_data; + struct pnv_phb *phb = hose->private_data; + msi_alloc_info_t *info = arg; + struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc); + int hwirq; + int i, ret; + + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs); + if (hwirq < 0) { + dev_warn(&pdev->dev, "failed to find a free MSI\n"); + return -ENOSPC; + } + + dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__, + hose->dn, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + ret = pnv_irq_parent_domain_alloc(domain, virq + i, + phb->msi_base + hwirq + i); + if (ret) + goto out; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &pnv_msi_irq_chip, hose); + } + + return 0; + +out: + irq_domain_free_irqs_parent(domain, virq, i - 1); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs); + return ret; +} + +static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + + pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn, + virq, d->hwirq, nr_irqs); + + msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs); + /* XIVE domain is cleared through ->msi_free() */ +} + +static const struct irq_domain_ops pnv_irq_domain_ops = { + .alloc = pnv_irq_domain_alloc, + .free = pnv_irq_domain_free, +}; + +static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) +{ + struct pnv_phb *phb = hose->private_data; + struct irq_domain *parent = irq_get_default_host(); + + hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id); + if (!hose->fwnode) + return -ENOMEM; + + hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count, + hose->fwnode, + &pnv_irq_domain_ops, hose); + if (!hose->dev_domain) { + pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n", + hose->dn, hose->global_number); + irq_domain_free_fwnode(hose->fwnode); + return -ENOMEM; + } + + hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn), + &pnv_msi_domain_info, + hose->dev_domain); + if (!hose->msi_domain) { + pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n", + hose->dn, hose->global_number); + irq_domain_free_fwnode(hose->fwnode); + irq_domain_remove(hose->dev_domain); + return -ENOMEM; + } + + return 0; +} + static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { unsigned int count; @@ -2124,6 +2308,10 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) phb->msi32_support = 1; pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", count, phb->msi_base); + + /* Only supported by the XIVE driver */ + if (xive_enabled()) + pnv_msi_allocate_domains(phb->hose, count); } static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, -- cgit From ba418a0278265ad65f2f9544e743b7dbff3b994b Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:31 +0200 Subject: KVM: PPC: Book3S HV: Use the new IRQ chip to detect passthrough interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passthrough PCI MSI interrupts are detected in KVM with a check on a specific EOI handler (P8) or on XIVE (P9). We can now check the PCI-MSI IRQ chip which is cleaner. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-14-clg@kaod.org --- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9f957ceee58a..1a757c3d33f7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5355,7 +5355,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * what our real-mode EOI code does, or a XIVE interrupt */ chip = irq_data_get_irq_chip(&desc->irq_data); - if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) { + if (!chip || !is_pnv_opal_msi(chip)) { pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", host_irq, guest_gsi); mutex_unlock(&kvm->lock); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d2a17fcb6002..e77caa4dbbdf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2007,13 +2007,15 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } +static struct irq_chip pnv_pci_msi_irq_chip; + /* * Returns true iff chip is something that we could call * pnv_opal_pci_msi_eoi for. */ bool is_pnv_opal_msi(struct irq_chip *chip) { - return chip->irq_eoi == pnv_ioda2_msi_eoi; + return chip->irq_eoi == pnv_ioda2_msi_eoi || chip == &pnv_pci_msi_irq_chip; } EXPORT_SYMBOL_GPL(is_pnv_opal_msi); -- cgit From e5e78b15113a73d0294141d9796969fa7b10fa3c Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:32 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Change interface of passthrough interrupt routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The routine kvmppc_set_passthru_irq() calls kvmppc_xive_set_mapped() and kvmppc_xive_clr_mapped() with an IRQ descriptor. Use directly the host IRQ number to remove a useless conversion. Add some debug. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-15-clg@kaod.org --- arch/powerpc/include/asm/kvm_ppc.h | 4 ++-- arch/powerpc/kvm/book3s_hv.c | 4 ++-- arch/powerpc/kvm/book3s_xive.c | 17 ++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2d88944f9f34..671fbd1a765e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -664,9 +664,9 @@ extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu); extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, - struct irq_desc *host_desc); + unsigned long host_irq); extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, - struct irq_desc *host_desc); + unsigned long host_irq); extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1a757c3d33f7..05b3a3548c18 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5398,7 +5398,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) pimap->n_mapped++; if (xics_on_xive()) - rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); + rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq); else kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); if (rc) @@ -5439,7 +5439,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) } if (xics_on_xive()) - rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); + rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq); else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 8cfab3547494..45a0434e9d85 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -922,13 +922,12 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) } int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, - struct irq_desc *host_desc) + unsigned long host_irq) { struct kvmppc_xive *xive = kvm->arch.xive; struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; - struct irq_data *host_data = irq_desc_get_irq_data(host_desc); - unsigned int host_irq = irq_desc_get_irq(host_desc); + struct irq_data *host_data = irq_get_irq_data(host_irq); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data); u16 idx; u8 prio; @@ -937,7 +936,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, if (!xive) return -ENODEV; - pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq); + pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n", + __func__, guest_irq, host_irq, hw_irq); sb = kvmppc_xive_find_source(xive, guest_irq, &idx); if (!sb) @@ -959,7 +959,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, */ rc = irq_set_vcpu_affinity(host_irq, state); if (rc) { - pr_err("Failed to set VCPU affinity for irq %d\n", host_irq); + pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq); return rc; } @@ -1019,12 +1019,11 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped); int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, - struct irq_desc *host_desc) + unsigned long host_irq) { struct kvmppc_xive *xive = kvm->arch.xive; struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; - unsigned int host_irq = irq_desc_get_irq(host_desc); u16 idx; u8 prio; int rc; @@ -1032,7 +1031,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, if (!xive) return -ENODEV; - pr_devel("clr_mapped girq 0x%lx...\n", guest_irq); + pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq); sb = kvmppc_xive_find_source(xive, guest_irq, &idx); if (!sb) @@ -1059,7 +1058,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, /* Release the passed-through interrupt to the host */ rc = irq_set_vcpu_affinity(host_irq, NULL); if (rc) { - pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq); + pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq); return rc; } -- cgit From 51be9e51a8000ffc6a33083ceca9da9303ed4dc5 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:33 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Fix mapping of passthrough interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI MSI interrupt numbers are now mapped in a PCI-MSI domain but the underlying calls handling the passthrough of the interrupt in the guest need a number in the XIVE IRQ domain. Use the IRQ data mapped in the XIVE IRQ domain and not the one in the PCI-MSI domain. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-16-clg@kaod.org --- arch/powerpc/kvm/book3s_xive.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 45a0434e9d85..6878026ee8ec 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -927,7 +927,8 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, struct kvmppc_xive *xive = kvm->arch.xive; struct kvmppc_xive_src_block *sb; struct kvmppc_xive_irq_state *state; - struct irq_data *host_data = irq_get_irq_data(host_irq); + struct irq_data *host_data = + irq_domain_get_irq_data(irq_get_default_host(), host_irq); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data); u16 idx; u8 prio; -- cgit From 298f6f952885eeb1f25461f085c6c238bcd9fc5e Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:34 +0200 Subject: powerpc/xics: Remove ICS list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always had only one ICS per machine. Simplify the XICS driver by removing the ICS list. The ICS stored in the chip data of the XICS domain becomes useless and we don't need it anymore to migrate away IRQs from a CPU. This will be removed in a subsequent patch. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-17-clg@kaod.org --- arch/powerpc/sysdev/xics/xics-common.c | 45 ++++++++++++++-------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index b14c502e56a8..08c25748efb9 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -38,7 +38,7 @@ DEFINE_PER_CPU(struct xics_cppr, xics_cppr); struct irq_domain *xics_host; -static LIST_HEAD(ics_list); +static struct ics *xics_ics; void xics_update_irq_servers(void) { @@ -111,12 +111,11 @@ void xics_setup_cpu(void) void xics_mask_unknown_vec(unsigned int vec) { - struct ics *ics; - pr_err("Interrupt 0x%x (real) is invalid, disabling it.\n", vec); - list_for_each_entry(ics, &ics_list, link) - ics->mask_unknown(ics, vec); + if (WARN_ON(!xics_ics)) + return; + xics_ics->mask_unknown(xics_ics, vec); } @@ -198,7 +197,6 @@ void xics_migrate_irqs_away(void) struct irq_chip *chip; long server; unsigned long flags; - struct ics *ics; /* We can't set affinity on ISA interrupts */ if (virq < NR_IRQS_LEGACY) @@ -219,13 +217,10 @@ void xics_migrate_irqs_away(void) raw_spin_lock_irqsave(&desc->lock, flags); /* Locate interrupt server */ - server = -1; - ics = irq_desc_get_chip_data(desc); - if (ics) - server = ics->get_server(ics, irq); + server = xics_ics->get_server(xics_ics, irq); if (server < 0) { - printk(KERN_ERR "%s: Can't find server for irq %d\n", - __func__, irq); + pr_err("%s: Can't find server for irq %d/%x\n", + __func__, virq, irq); goto unlock; } @@ -307,13 +302,9 @@ int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, static int xics_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { - struct ics *ics; - - list_for_each_entry(ics, &ics_list, link) - if (ics->host_match(ics, node)) - return 1; - - return 0; + if (WARN_ON(!xics_ics)) + return 0; + return xics_ics->host_match(xics_ics, node) ? 1 : 0; } /* Dummies */ @@ -330,8 +321,6 @@ static struct irq_chip xics_ipi_chip = { static int xics_host_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { - struct ics *ics; - pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); /* @@ -348,12 +337,14 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq, return 0; } + if (WARN_ON(!xics_ics)) + return -EINVAL; + /* Let the ICS setup the chip data */ - list_for_each_entry(ics, &ics_list, link) - if (ics->map(ics, virq) == 0) - return 0; + if (xics_ics->map(xics_ics, virq)) + return -EINVAL; - return -EINVAL; + return 0; } static int xics_host_xlate(struct irq_domain *h, struct device_node *ct, @@ -427,7 +418,9 @@ static void __init xics_init_host(void) void __init xics_register_ics(struct ics *ics) { - list_add(&ics->link, &ics_list); + if (WARN_ONCE(xics_ics, "XICS: Source Controller is already defined !")) + return; + xics_ics = ics; } static void __init xics_get_server_size(void) -- cgit From 248af248a8f45461662fb633eca4adf24ae704ad Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:35 +0200 Subject: powerpc/xics: Rename the map handler in a check handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves the IRQ initialization done under the different ICS backends in the common part of XICS. The 'map' handler becomes a simple 'check' on the HW IRQ at the FW level. As we don't need an ICS anymore in xics_migrate_irqs_away(), the XICS domain does not set a chip data for the IRQ. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-18-clg@kaod.org --- arch/powerpc/include/asm/xics.h | 3 ++- arch/powerpc/sysdev/xics/ics-native.c | 13 +++++-------- arch/powerpc/sysdev/xics/ics-opal.c | 27 ++++++++++----------------- arch/powerpc/sysdev/xics/ics-rtas.c | 28 ++++++++++------------------ arch/powerpc/sysdev/xics/xics-common.c | 15 +++++++++------ 5 files changed, 36 insertions(+), 50 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index d9cf192368ad..0ac9bfddf704 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -89,10 +89,11 @@ static inline int ics_opal_init(void) { return -ENODEV; } /* ICS instance, hooked up to chip_data of an irq */ struct ics { struct list_head link; - int (*map)(struct ics *ics, unsigned int virq); + int (*check)(struct ics *ics, unsigned int hwirq); void (*mask_unknown)(struct ics *ics, unsigned long vec); long (*get_server)(struct ics *ics, unsigned long vec); int (*host_match)(struct ics *ics, struct device_node *node); + struct irq_chip *chip; char data[]; }; diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c index d450502f4053..dec7d93a8ba1 100644 --- a/arch/powerpc/sysdev/xics/ics-native.c +++ b/arch/powerpc/sysdev/xics/ics-native.c @@ -131,19 +131,15 @@ static struct irq_chip ics_native_irq_chip = { .irq_retrigger = xics_retrigger, }; -static int ics_native_map(struct ics *ics, unsigned int virq) +static int ics_native_check(struct ics *ics, unsigned int hw_irq) { - unsigned int vec = (unsigned int)virq_to_hw(virq); struct ics_native *in = to_ics_native(ics); - pr_devel("%s: vec=0x%x\n", __func__, vec); + pr_devel("%s: hw_irq=0x%x\n", __func__, hw_irq); - if (vec < in->ibase || vec >= (in->ibase + in->icount)) + if (hw_irq < in->ibase || hw_irq >= (in->ibase + in->icount)) return -EINVAL; - irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq); - irq_set_chip_data(virq, ics); - return 0; } @@ -177,10 +173,11 @@ static int ics_native_host_match(struct ics *ics, struct device_node *node) } static struct ics ics_native_template = { - .map = ics_native_map, + .check = ics_native_check, .mask_unknown = ics_native_mask_unknown, .get_server = ics_native_get_server, .host_match = ics_native_host_match, + .chip = &ics_native_irq_chip, }; static int __init ics_native_add_one(struct device_node *np) diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 823f6c9664cd..8c7ddcc718b6 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -157,26 +157,13 @@ static struct irq_chip ics_opal_irq_chip = { .irq_retrigger = xics_retrigger, }; -static int ics_opal_map(struct ics *ics, unsigned int virq); -static void ics_opal_mask_unknown(struct ics *ics, unsigned long vec); -static long ics_opal_get_server(struct ics *ics, unsigned long vec); - static int ics_opal_host_match(struct ics *ics, struct device_node *node) { return 1; } -/* Only one global & state struct ics */ -static struct ics ics_hal = { - .map = ics_opal_map, - .mask_unknown = ics_opal_mask_unknown, - .get_server = ics_opal_get_server, - .host_match = ics_opal_host_match, -}; - -static int ics_opal_map(struct ics *ics, unsigned int virq) +static int ics_opal_check(struct ics *ics, unsigned int hw_irq) { - unsigned int hw_irq = (unsigned int)virq_to_hw(virq); int64_t rc; __be16 server; int8_t priority; @@ -189,9 +176,6 @@ static int ics_opal_map(struct ics *ics, unsigned int virq) if (rc != OPAL_SUCCESS) return -ENXIO; - irq_set_chip_and_handler(virq, &ics_opal_irq_chip, handle_fasteoi_irq); - irq_set_chip_data(virq, &ics_hal); - return 0; } @@ -222,6 +206,15 @@ static long ics_opal_get_server(struct ics *ics, unsigned long vec) return ics_opal_unmangle_server(be16_to_cpu(server)); } +/* Only one global & state struct ics */ +static struct ics ics_hal = { + .check = ics_opal_check, + .mask_unknown = ics_opal_mask_unknown, + .get_server = ics_opal_get_server, + .host_match = ics_opal_host_match, + .chip = &ics_opal_irq_chip, +}; + int __init ics_opal_init(void) { if (!firmware_has_feature(FW_FEATURE_OPAL)) diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 4cf18000f07c..6d19d711ed35 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -24,19 +24,6 @@ static int ibm_set_xive; static int ibm_int_on; static int ibm_int_off; -static int ics_rtas_map(struct ics *ics, unsigned int virq); -static void ics_rtas_mask_unknown(struct ics *ics, unsigned long vec); -static long ics_rtas_get_server(struct ics *ics, unsigned long vec); -static int ics_rtas_host_match(struct ics *ics, struct device_node *node); - -/* Only one global & state struct ics */ -static struct ics ics_rtas = { - .map = ics_rtas_map, - .mask_unknown = ics_rtas_mask_unknown, - .get_server = ics_rtas_get_server, - .host_match = ics_rtas_host_match, -}; - static void ics_rtas_unmask_irq(struct irq_data *d) { unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); @@ -169,9 +156,8 @@ static struct irq_chip ics_rtas_irq_chip = { .irq_retrigger = xics_retrigger, }; -static int ics_rtas_map(struct ics *ics, unsigned int virq) +static int ics_rtas_check(struct ics *ics, unsigned int hw_irq) { - unsigned int hw_irq = (unsigned int)virq_to_hw(virq); int status[2]; int rc; @@ -183,9 +169,6 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq) if (rc) return -ENXIO; - irq_set_chip_and_handler(virq, &ics_rtas_irq_chip, handle_fasteoi_irq); - irq_set_chip_data(virq, &ics_rtas); - return 0; } @@ -213,6 +196,15 @@ static int ics_rtas_host_match(struct ics *ics, struct device_node *node) return !of_device_is_compatible(node, "chrp,iic"); } +/* Only one global & state struct ics */ +static struct ics ics_rtas = { + .check = ics_rtas_check, + .mask_unknown = ics_rtas_mask_unknown, + .get_server = ics_rtas_get_server, + .host_match = ics_rtas_host_match, + .chip = &ics_rtas_irq_chip, +}; + __init int ics_rtas_init(void) { ibm_get_xive = rtas_token("ibm,get-xive"); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 08c25748efb9..ed2bc14f56bd 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -318,10 +318,10 @@ static struct irq_chip xics_ipi_chip = { .irq_unmask = xics_ipi_unmask, }; -static int xics_host_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) +static int xics_host_map(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) { - pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); + pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hwirq); /* * Mark interrupts as edge sensitive by default so that resend @@ -331,7 +331,7 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq, irq_clear_status_flags(virq, IRQ_LEVEL); /* Don't call into ICS for IPIs */ - if (hw == XICS_IPI) { + if (hwirq == XICS_IPI) { irq_set_chip_and_handler(virq, &xics_ipi_chip, handle_percpu_irq); return 0; @@ -340,10 +340,13 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq, if (WARN_ON(!xics_ics)) return -EINVAL; - /* Let the ICS setup the chip data */ - if (xics_ics->map(xics_ics, virq)) + if (xics_ics->check(xics_ics, hwirq)) return -EINVAL; + /* No chip data for the XICS domain */ + irq_domain_set_info(domain, virq, hwirq, xics_ics->chip, + NULL, handle_fasteoi_irq, NULL, NULL); + return 0; } -- cgit From 7d14f6c60b76fa7f3f89d81a95385576ca33b483 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:36 +0200 Subject: powerpc/xics: Give a name to the default XICS IRQ domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit and clean up the error path. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-19-clg@kaod.org --- arch/powerpc/sysdev/xics/xics-common.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index ed2bc14f56bd..18d3de2f2249 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -412,11 +412,22 @@ static const struct irq_domain_ops xics_host_ops = { .xlate = xics_host_xlate, }; -static void __init xics_init_host(void) +static int __init xics_allocate_domain(void) { - xics_host = irq_domain_add_tree(NULL, &xics_host_ops, NULL); - BUG_ON(xics_host == NULL); + struct fwnode_handle *fn; + + fn = irq_domain_alloc_named_fwnode("XICS"); + if (!fn) + return -ENOMEM; + + xics_host = irq_domain_create_tree(fn, &xics_host_ops, NULL); + if (!xics_host) { + irq_domain_free_fwnode(fn); + return -ENOMEM; + } + irq_set_default_host(xics_host); + return 0; } void __init xics_register_ics(struct ics *ics) @@ -480,6 +491,8 @@ void __init xics_init(void) /* Initialize common bits */ xics_get_server_size(); xics_update_irq_servers(); - xics_init_host(); + rc = xics_allocate_domain(); + if (rc < 0) + pr_err("XICS: Failed to create IRQ domain"); xics_setup_cpu(); } -- cgit From 53b34e8db73af98fa652641bf490384dc665d0f2 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:37 +0200 Subject: powerpc/xics: Add debug logging to the set_irq_affinity handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It really helps to know how the HW is configured when tweaking the IRQ subsystem. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-20-clg@kaod.org --- arch/powerpc/sysdev/xics/ics-opal.c | 2 +- arch/powerpc/sysdev/xics/ics-rtas.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 8c7ddcc718b6..bf26cae1b982 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -133,7 +133,7 @@ static int ics_opal_set_affinity(struct irq_data *d, } server = ics_opal_mangle_server(wanted_server); - pr_devel("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n", + pr_debug("ics-hal: set-affinity irq %d [hw 0x%x] server: 0x%x/0x%x\n", d->irq, hw_irq, wanted_server, server); rc = opal_set_xive(hw_irq, server, priority); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 6d19d711ed35..b50c6341682e 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -133,6 +133,9 @@ static int ics_rtas_set_affinity(struct irq_data *d, return -1; } + pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq, + hw_irq, irq_server); + status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq, irq_server, xics_status[1]); -- cgit From e4f0aa3b4731430ad73fb4485e97f751c7500668 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:38 +0200 Subject: powerpc/xics: Add support for IRQ domain hierarchy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XICS doesn't have any state associated with the IRQ. The support is straightforward and simpler than for XIVE. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-21-clg@kaod.org --- arch/powerpc/sysdev/xics/xics-common.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 18d3de2f2249..febab57f060f 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -406,7 +406,48 @@ int xics_retrigger(struct irq_data *data) return 0; } +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +static int xics_host_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *hwirq, unsigned int *type) +{ + return xics_host_xlate(d, to_of_node(fwspec->fwnode), fwspec->param, + fwspec->param_count, hwirq, type); +} + +static int xics_host_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_fwspec *fwspec = arg; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + int i, rc; + + rc = xics_host_domain_translate(domain, fwspec, &hwirq, &type); + if (rc) + return rc; + + pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) + irq_domain_set_info(domain, virq + i, hwirq + i, xics_ics->chip, + xics_ics, handle_fasteoi_irq, NULL, NULL); + + return 0; +} + +static void xics_host_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + pr_debug("%s %d #%d\n", __func__, virq, nr_irqs); +} +#endif + static const struct irq_domain_ops xics_host_ops = { +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + .alloc = xics_host_domain_alloc, + .free = xics_host_domain_free, + .translate = xics_host_domain_translate, +#endif .match = xics_host_match, .map = xics_host_map, .xlate = xics_host_xlate, -- cgit From bbb25af8fbdba4acaf955e412a84eb2eea48697c Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:39 +0200 Subject: powerpc/powernv/pci: Customize the MSI EOI handler to support PHB3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHB3s need an extra OPAL call to EOI the interrupt. The call takes an OPAL HW IRQ number but it is translated into a vector number in OPAL. Here, we directly use the vector number of the in-the-middle "PNV-MSI" domain instead of grabbing the OPAL HW IRQ number in the XICS parent domain. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-22-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e77caa4dbbdf..b498876a976f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2169,12 +2169,33 @@ static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg) entry->msi_attrib.is_64 ? "64" : "32", d->hwirq, rc); } +/* + * The IRQ data is mapped in the MSI domain in which HW IRQ numbers + * correspond to vector numbers. + */ +static void pnv_msi_eoi(struct irq_data *d) +{ + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_PHB3) { + /* + * The EOI OPAL call takes an OPAL HW IRQ number but + * since it is translated into a vector number in + * OPAL, use that directly. + */ + WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq)); + } + + irq_chip_eoi_parent(d); +} + static struct irq_chip pnv_msi_irq_chip = { .name = "PNV-MSI", .irq_shutdown = pnv_msi_shutdown, .irq_mask = irq_chip_mask_parent, .irq_unmask = irq_chip_unmask_parent, - .irq_eoi = irq_chip_eoi_parent, + .irq_eoi = pnv_msi_eoi, .irq_set_affinity = irq_chip_set_affinity_parent, .irq_compose_msi_msg = pnv_msi_compose_msg, }; -- cgit From 679e30b9536eeb93bc8c9a39c0ddc77dec536f6b Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:40 +0200 Subject: powerpc/pci: Drop XIVE restriction on MSI domains MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PowerNV and pSeries platforms now have support for both the XICS and XIVE IRQ domains. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-23-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 4 +--- arch/powerpc/platforms/pseries/msi.c | 4 ---- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b498876a976f..e2454439e574 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2332,9 +2332,7 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", count, phb->msi_base); - /* Only supported by the XIVE driver */ - if (xive_enabled()) - pnv_msi_allocate_domains(phb->hose, count); + pnv_msi_allocate_domains(phb->hose, count); } static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index e2127a3f7ebd..e196cc1b8540 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -720,10 +720,6 @@ int pseries_msi_allocate_domains(struct pci_controller *phb) { int count; - /* Only supported by the XIVE driver */ - if (!xive_enabled()) - return -ENODEV; - if (!__find_pe_total_msi(phb->dn, &count)) { pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n", phb->dn, phb->global_number); -- cgit From 1e661f81a522eadfe4bc5bb1ec9fbae27c13f163 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:41 +0200 Subject: powerpc/xics: Drop unmask of MSIs at startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was a workaround in the XICS domain because of the lack of MSI domain. This is now handled. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-24-clg@kaod.org --- arch/powerpc/sysdev/xics/ics-opal.c | 11 ----------- arch/powerpc/sysdev/xics/ics-rtas.c | 9 --------- 2 files changed, 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index bf26cae1b982..c4d95d8beb6f 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -62,17 +62,6 @@ static void ics_opal_unmask_irq(struct irq_data *d) static unsigned int ics_opal_startup(struct irq_data *d) { -#ifdef CONFIG_PCI_MSI - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (irq_data_get_msi_desc(d)) - pci_msi_unmask_irq(d); -#endif - - /* unmask it */ ics_opal_unmask_irq(d); return 0; } diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index b50c6341682e..b9da317b7a2d 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -57,15 +57,6 @@ static void ics_rtas_unmask_irq(struct irq_data *d) static unsigned int ics_rtas_startup(struct irq_data *d) { -#ifdef CONFIG_PCI_MSI - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - if (irq_data_get_msi_desc(d)) - pci_msi_unmask_irq(d); -#endif /* unmask it */ ics_rtas_unmask_irq(d); return 0; -- cgit From 3005123eea0daa18d98602ab64b2ce3ad087d849 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:42 +0200 Subject: powerpc/pseries/pci: Drop unused MSI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSIs should be fully managed by the PCI and IRQ subsystems now. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-25-clg@kaod.org --- arch/powerpc/platforms/pseries/msi.c | 87 ------------------------------------ 1 file changed, 87 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index e196cc1b8540..1b305e411862 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -111,21 +111,6 @@ static int rtas_query_irq_number(struct pci_dn *pdn, int offset) return rtas_ret[0]; } -static void rtas_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct msi_desc *entry; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->irq) - continue; - - irq_set_msi_desc(entry->irq, NULL); - irq_dispose_mapping(entry->irq); - } - - rtas_disable_msi(pdev); -} - static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) { struct device_node *dn; @@ -459,66 +444,6 @@ again: return 0; } -static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) -{ - struct pci_dn *pdn; - int hwirq, virq, i; - int rc; - struct msi_desc *entry; - struct msi_msg msg; - - rc = rtas_prepare_msi_irqs(pdev, nvec_in, type, NULL); - if (rc) - return rc; - - pdn = pci_get_pdn(pdev); - i = 0; - for_each_pci_msi_entry(entry, pdev) { - hwirq = rtas_query_irq_number(pdn, i++); - if (hwirq < 0) { - pr_debug("rtas_msi: error (%d) getting hwirq\n", rc); - return hwirq; - } - - /* - * Depending on the number of online CPUs in the original - * kernel, it is likely for CPU #0 to be offline in a kdump - * kernel. The associated IRQs in the affinity mappings - * provided by irq_create_affinity_masks() are thus not - * started by irq_startup(), as per-design for managed IRQs. - * This can be a problem with multi-queue block devices driven - * by blk-mq : such a non-started IRQ is very likely paired - * with the single queue enforced by blk-mq during kdump (see - * blk_mq_alloc_tag_set()). This causes the device to remain - * silent and likely hangs the guest at some point. - * - * We don't really care for fine-grained affinity when doing - * kdump actually : simply ignore the pre-computed affinity - * masks in this case and let the default mask with all CPUs - * be used when creating the IRQ mappings. - */ - if (is_kdump_kernel()) - virq = irq_create_mapping(NULL, hwirq); - else - virq = irq_create_mapping_affinity(NULL, hwirq, - entry->affinity); - - if (!virq) { - pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); - return -ENOSPC; - } - - dev_dbg(&pdev->dev, "rtas_msi: allocated virq %d\n", virq); - irq_set_msi_desc(virq, entry); - - /* Read config space back so we can restore after reset */ - __pci_read_msi_msg(entry, &msg); - entry->msg = msg; - } - - return 0; -} - static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { @@ -759,8 +684,6 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) static int rtas_msi_init(void) { - struct pci_controller *phb; - query_token = rtas_token("ibm,query-interrupt-source-number"); change_token = rtas_token("ibm,change-msi"); @@ -772,16 +695,6 @@ static int rtas_msi_init(void) pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); - WARN_ON(pseries_pci_controller_ops.setup_msi_irqs); - pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; - pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; - - list_for_each_entry(phb, &hose_list, list_node) { - WARN_ON(phb->controller_ops.setup_msi_irqs); - phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; - phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; - } - WARN_ON(ppc_md.pci_irq_fixup); ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; -- cgit From 6d9ba6121b1cf453985d08c141970a1b44cd9cf1 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:43 +0200 Subject: powerpc/powernv/pci: Drop unused MSI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSIs should be fully managed by the PCI and IRQ subsystems now. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-26-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 27 ------------- arch/powerpc/platforms/powernv/pci.c | 67 ------------------------------- arch/powerpc/platforms/powernv/pci.h | 6 --- 3 files changed, 100 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e2454439e574..eb38ce1fd434 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2080,29 +2080,6 @@ static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return 0; } -static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, - unsigned int hwirq, unsigned int virq, - unsigned int is_64, struct msi_msg *msg) -{ - struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); - unsigned int xive_num = hwirq - phb->msi_base; - int rc; - - rc = __pnv_pci_ioda_msi_setup(phb, dev, xive_num, is_64, msg); - if (rc) - return rc; - - /* P8 only */ - pnv_set_msi_irq_chip(phb, virq); - - pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," - " address=%x_%08x data=%x PE# %x\n", - pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num, - msg->address_hi, msg->address_lo, msg->data, pe->pe_number); - - return 0; -} - /* * The msi_free() op is called before irq_domain_free_irqs_top() when * the handler data is still available. Use that to clear the XIVE @@ -2327,8 +2304,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) return; } - phb->msi_setup = pnv_pci_ioda_msi_setup; - phb->msi32_support = 1; pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", count, phb->msi_base); @@ -2936,8 +2911,6 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .dma_dev_setup = pnv_pci_ioda_dma_dev_setup, .dma_bus_setup = pnv_pci_ioda_dma_bus_setup, .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported, - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, .enable_device_hook = pnv_pci_enable_device_hook, .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 6bb3c52633fb..9a8391b983d1 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -160,73 +160,6 @@ exit: } EXPORT_SYMBOL_GPL(pnv_pci_set_power_state); -int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); - struct msi_desc *entry; - struct msi_msg msg; - int hwirq; - unsigned int virq; - int rc; - - if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) - return -ENODEV; - - if (pdev->no_64bit_msi && !phb->msi32_support) - return -ENODEV; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->msi_attrib.is_64 && !phb->msi32_support) { - pr_warn("%s: Supports only 64-bit MSIs\n", - pci_name(pdev)); - return -ENXIO; - } - hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, 1); - if (hwirq < 0) { - pr_warn("%s: Failed to find a free MSI\n", - pci_name(pdev)); - return -ENOSPC; - } - virq = irq_create_mapping(NULL, phb->msi_base + hwirq); - if (!virq) { - pr_warn("%s: Failed to map MSI to linux irq\n", - pci_name(pdev)); - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1); - return -ENOMEM; - } - rc = phb->msi_setup(phb, pdev, phb->msi_base + hwirq, - virq, entry->msi_attrib.is_64, &msg); - if (rc) { - pr_warn("%s: Failed to setup MSI\n", pci_name(pdev)); - irq_dispose_mapping(virq); - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1); - return rc; - } - irq_set_msi_desc(virq, entry); - pci_write_msi_msg(virq, &msg); - } - return 0; -} - -void pnv_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); - struct msi_desc *entry; - irq_hw_number_t hwirq; - - if (WARN_ON(!phb)) - return; - - for_each_pci_msi_entry(entry, pdev) { - if (!entry->irq) - continue; - hwirq = virq_to_hw(entry->irq); - irq_set_msi_desc(entry->irq, NULL); - irq_dispose_mapping(entry->irq); - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); - } -} - /* Nicely print the contents of the PE State Tables (PEST). */ static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index c8d4f222a86f..966a9eb64339 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -123,11 +123,7 @@ struct pnv_phb { #endif unsigned int msi_base; - unsigned int msi32_support; struct msi_bitmap msi_bmp; - int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev, - unsigned int hwirq, unsigned int virq, - unsigned int is_64, struct msi_msg *msg); int (*init_m64)(struct pnv_phb *phb); int (*get_pe_state)(struct pnv_phb *phb, int pe_no); void (*freeze_pe)(struct pnv_phb *phb, int pe_no); @@ -289,8 +285,6 @@ extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); -extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); -extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); -- cgit From f1a377f86f51b381cfc30bf2270f8a5f81e35ee9 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:44 +0200 Subject: powerpc/powernv/pci: Adapt is_pnv_opal_msi() to detect passthrough interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pnv_ioda2_msi_eoi() chip handler is not used anymore for MSIs. Simply use the check on the PSI-MSI chip. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-27-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index eb38ce1fd434..6c4b37598bcc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2015,7 +2015,7 @@ static struct irq_chip pnv_pci_msi_irq_chip; */ bool is_pnv_opal_msi(struct irq_chip *chip) { - return chip->irq_eoi == pnv_ioda2_msi_eoi || chip == &pnv_pci_msi_irq_chip; + return chip == &pnv_pci_msi_irq_chip; } EXPORT_SYMBOL_GPL(is_pnv_opal_msi); -- cgit From c80198a21792ac59412871e4e6fad5041c9be8e4 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:45 +0200 Subject: powerpc/xics: Fix IRQ migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit desc->irq_data points to the top level IRQ data descriptor which is not necessarily in the XICS IRQ domain. MSIs are in another domain for instance. Fix that by looking for a mapping on the low level XICS IRQ domain. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-28-clg@kaod.org --- arch/powerpc/sysdev/xics/xics-common.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index febab57f060f..4a7687caec75 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -183,6 +183,8 @@ void xics_migrate_irqs_away(void) unsigned int irq, virq; struct irq_desc *desc; + pr_debug("%s: CPU %u\n", __func__, cpu); + /* If we used to be the default server, move to the new "boot_cpuid" */ if (hw_cpu == xics_default_server) xics_update_irq_servers(); @@ -197,6 +199,7 @@ void xics_migrate_irqs_away(void) struct irq_chip *chip; long server; unsigned long flags; + struct irq_data *irqd; /* We can't set affinity on ISA interrupts */ if (virq < NR_IRQS_LEGACY) @@ -204,9 +207,11 @@ void xics_migrate_irqs_away(void) /* We only need to migrate enabled IRQS */ if (!desc->action) continue; - if (desc->irq_data.domain != xics_host) + /* We need a mapping in the XICS IRQ domain */ + irqd = irq_domain_get_irq_data(xics_host, virq); + if (!irqd) continue; - irq = desc->irq_data.hwirq; + irq = irqd_to_hwirq(irqd); /* We need to get IPIs still. */ if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) continue; -- cgit From 5cd69651ceeed15e021cf7d19f1b1be0a80c0c7a Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:46 +0200 Subject: powerpc/powernv/pci: Set the IRQ chip data for P8/CXL devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before MSI domains, the default IRQ chip of PHB3 MSIs was patched by pnv_set_msi_irq_chip() with the custom EOI handler pnv_ioda2_msi_eoi() and the owning PHB was deduced from the 'ioda.irq_chip' field. This path has been deprecated by the MSI domains but it is still in use by the P8 CAPI 'cxl' driver. Rewriting this driver to support MSI would be a waste of time. Nevertheless, we can still remove the IRQ chip patch and set the IRQ chip data instead. This is cleaner. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-29-clg@kaod.org --- arch/powerpc/platforms/powernv/pci-ioda.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6c4b37598bcc..aa97245eedbf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1971,19 +1971,23 @@ int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) return opal_pci_msi_eoi(phb->opal_id, hw_irq); } +/* + * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers + */ static void pnv_ioda2_msi_eoi(struct irq_data *d) { int64_t rc; unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct irq_chip *chip = irq_data_get_irq_chip(d); + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; - rc = pnv_opal_pci_msi_eoi(chip, hw_irq); + rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); WARN_ON_ONCE(rc); icp_native_eoi(d); } - +/* P8/CXL only */ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) { struct irq_data *idata; @@ -2005,6 +2009,7 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; } irq_set_chip(virq, &phb->ioda.irq_chip); + irq_set_chip_data(virq, phb->hose); } static struct irq_chip pnv_pci_msi_irq_chip; -- cgit From c325712b5f85e561ea89bae2ba5d0104e797e42c Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:47 +0200 Subject: powerpc/powernv/pci: Rework pnv_opal_pci_msi_eoi() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pnv_opal_pci_msi_eoi() is called from KVM to EOI passthrough interrupts when in real mode. Adding MSI domain broke the hack using the 'ioda.irq_chip' field to deduce the owning PHB. Fix that by using the IRQ chip data in the MSI domain. The 'ioda.irq_chip' field is now unused and could be removed from the pnv_phb struct. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-30-clg@kaod.org --- arch/powerpc/include/asm/pnv-pci.h | 2 +- arch/powerpc/kvm/book3s_hv_rm_xics.c | 8 ++++---- arch/powerpc/platforms/powernv/pci-ioda.c | 17 +++++++++++++---- 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index d0ee0ede5767..b3f480799352 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -33,7 +33,7 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); int pnv_cxl_get_irq_count(struct pci_dev *dev); struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); -int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq); +int64_t pnv_opal_pci_msi_eoi(struct irq_data *d); bool is_pnv_opal_msi(struct irq_chip *chip); #ifdef CONFIG_CXL_BASE diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 0a11ec88a0ae..587c33fc4564 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -706,6 +706,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq) icp->rm_eoied_irq = irq; } + /* Handle passthrough interrupts */ if (state->host_irq) { ++vcpu->stat.pthru_all; if (state->intr_cpu != -1) { @@ -759,12 +760,12 @@ int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) static unsigned long eoi_rc; -static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) +static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again) { void __iomem *xics_phys; int64_t rc; - rc = pnv_opal_pci_msi_eoi(c, hwirq); + rc = pnv_opal_pci_msi_eoi(d); if (rc) eoi_rc = rc; @@ -872,8 +873,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, icp_rm_deliver_irq(xics, icp, irq, false); /* EOI the interrupt */ - icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, - again); + icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again); if (check_too_hard(xics, icp) == H_TOO_HARD) return 2; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index aa97245eedbf..2389cd79c3c8 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1963,12 +1963,21 @@ void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pe->dma_setup_done = true; } -int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) +/* + * Called from KVM in real mode to EOI passthru interrupts. The ICP + * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru(). + * + * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call + * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ + * numbers of the in-the-middle MSI domain are vector numbers and it's + * good enough for OPAL. Use that. + */ +int64_t pnv_opal_pci_msi_eoi(struct irq_data *d) { - struct pnv_phb *phb = container_of(chip, struct pnv_phb, - ioda.irq_chip); + struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data); + struct pnv_phb *phb = hose->private_data; - return opal_pci_msi_eoi(phb->opal_id, hw_irq); + return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq); } /* -- cgit From 1753081f2d445f9157550692fcc4221cd3ff0958 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:48 +0200 Subject: KVM: PPC: Book3S HV: XICS: Fix mapping of passthrough interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI MSIs now live in an MSI domain but the underlying calls, which will EOI the interrupt in real mode, need an HW IRQ number mapped in the XICS IRQ domain. Grab it there. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-31-clg@kaod.org --- arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 05b3a3548c18..f28f99805c4c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5328,6 +5328,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) struct kvmppc_passthru_irqmap *pimap; struct irq_chip *chip; int i, rc = 0; + struct irq_data *host_data; if (!kvm_irq_bypass) return 1; @@ -5392,7 +5393,14 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * the KVM real mode handler. */ smp_wmb(); - irq_map->r_hwirq = desc->irq_data.hwirq; + + /* + * The 'host_irq' number is mapped in the PCI-MSI domain but + * the underlying calls, which will EOI the interrupt in real + * mode, need an HW IRQ number mapped in the XICS IRQ domain. + */ + host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq); + irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data); if (i == pimap->n_mapped) pimap->n_mapped++; @@ -5400,7 +5408,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) if (xics_on_xive()) rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq); else - kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); + kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq); if (rc) irq_map->r_hwirq = 0; -- cgit From 59b2bc18b1492b46d45b6b6828ba098f09b9ba67 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 1 Jul 2021 15:27:49 +0200 Subject: powerpc/xive: Use XIVE domain under xmon and debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default domain of the PCI/MSIs is not the XIVE domain anymore. To list the IRQ mappings under XMON and debugfs, query the IRQ data from the low level XIVE domain. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210701132750.1475580-32-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 107f442d3411..d0deaebbfeeb 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -312,11 +312,10 @@ void xmon_xive_get_irq_all(void) struct irq_desc *desc; for_each_irq_desc(i, desc) { - struct irq_data *d = irq_desc_get_irq_data(desc); - unsigned int hwirq = (unsigned int)irqd_to_hwirq(d); + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); - if (d->domain == xive_irq_domain) - xmon_xive_get_irq_config(hwirq, d); + if (d) + xmon_xive_get_irq_config(irqd_to_hwirq(d), d); } } @@ -1757,9 +1756,9 @@ static int xive_core_debug_show(struct seq_file *m, void *private) xive_debug_show_cpu(m, cpu); for_each_irq_desc(i, desc) { - struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); - if (d->domain == xive_irq_domain) + if (d) xive_debug_show_irq(m, d); } return 0; -- cgit From 17df41fec5b80b16ea4774495f1eb730e2225619 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 19 Jul 2021 15:06:14 +0200 Subject: powerpc: use IRQF_NO_DEBUG for IPIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to use the lockup detector ("noirqdebug") for IPIs. The ipistorm benchmark measures a ~10% improvement on high systems when this flag is set. Signed-off-by: Cédric Le Goater Reviewed-by: Thomas Gleixner Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210719130614.195886-1-clg@kaod.org --- arch/powerpc/sysdev/xics/xics-common.c | 2 +- arch/powerpc/sysdev/xive/common.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 4a7687caec75..5c1a157a83b8 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -132,7 +132,7 @@ static void xics_request_ipi(void) * IPIs are marked IRQF_PERCPU. The handler was set in map. */ BUG_ON(request_irq(ipi, icp_ops->ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); + IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL)); } void __init xics_smp_probe(void) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index d0deaebbfeeb..4018964bbd69 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1149,7 +1149,8 @@ static int __init xive_request_ipi(void) snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); ret = request_irq(xid->irq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); + IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); } -- cgit From b68c6646cce5ee8caefa6333ee743f960222dcea Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Tue, 20 Jul 2021 15:42:08 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add a 'flags' field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use it to hold platform specific features. P9 DD2 introduced single-escalation support. P10 will add others. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210720134209.256133-2-clg@kaod.org --- arch/powerpc/kvm/book3s_xive.c | 19 ++++++++++--------- arch/powerpc/kvm/book3s_xive.h | 9 ++++++++- arch/powerpc/kvm/book3s_xive_native.c | 12 +++++++----- 3 files changed, 25 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6878026ee8ec..555cc610e7ab 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -363,9 +363,9 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) if (!vcpu->arch.xive_vcpu) continue; rc = xive_provision_queue(vcpu, prio); - if (rc == 0 && !xive->single_escalation) + if (rc == 0 && !kvmppc_xive_has_single_escalation(xive)) kvmppc_xive_attach_escalation(vcpu, prio, - xive->single_escalation); + kvmppc_xive_has_single_escalation(xive)); if (rc) return rc; } @@ -1199,7 +1199,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { if (xc->esc_virq[i]) { - if (xc->xive->single_escalation) + if (kvmppc_xive_has_single_escalation(xc->xive)) xive_cleanup_single_escalation(vcpu, xc, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); @@ -1340,7 +1340,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, * Enable the VP first as the single escalation mode will * affect escalation interrupts numbering */ - r = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); if (r) { pr_err("Failed to enable VP in OPAL, err %d\n", r); goto bail; @@ -1357,15 +1357,15 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, struct xive_q *q = &xc->queues[i]; /* Single escalation, no queue 7 */ - if (i == 7 && xive->single_escalation) + if (i == 7 && kvmppc_xive_has_single_escalation(xive)) break; /* Is queue already enabled ? Provision it */ if (xive->qmap & (1 << i)) { r = xive_provision_queue(vcpu, i); - if (r == 0 && !xive->single_escalation) + if (r == 0 && !kvmppc_xive_has_single_escalation(xive)) kvmppc_xive_attach_escalation( - vcpu, i, xive->single_escalation); + vcpu, i, kvmppc_xive_has_single_escalation(xive)); if (r) goto bail; } else { @@ -1380,7 +1380,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, } /* If not done above, attach priority 0 escalation */ - r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); + r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive)); if (r) goto bail; @@ -2135,7 +2135,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) */ xive->nr_servers = KVM_MAX_VCPUS; - xive->single_escalation = xive_native_has_single_escalation(); + if (xive_native_has_single_escalation()) + xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; kvm->arch.xive = xive; return 0; diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index afe9eeac6d56..73c3cd25093c 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -97,6 +97,8 @@ struct kvmppc_xive_ops { int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq); }; +#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1 + struct kvmppc_xive { struct kvm *kvm; struct kvm_device *dev; @@ -133,7 +135,7 @@ struct kvmppc_xive { u32 q_page_order; /* Flags */ - u8 single_escalation; + u8 flags; /* Number of entries in the VP block */ u32 nr_servers; @@ -308,5 +310,10 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); +static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive) +{ + return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; +} + #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 573ecaab3597..2abb1358a268 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -93,7 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { /* Free the escalation irq */ if (xc->esc_virq[i]) { - if (xc->xive->single_escalation) + if (kvmppc_xive_has_single_escalation(xc->xive)) xive_cleanup_single_escalation(vcpu, xc, xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); @@ -172,7 +172,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, * Enable the VP first as the single escalation mode will * affect escalation interrupts numbering */ - rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); + rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); if (rc) { pr_err("Failed to enable VP in OPAL: %d\n", rc); goto bail; @@ -693,7 +693,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, } rc = kvmppc_xive_attach_escalation(vcpu, priority, - xive->single_escalation); + kvmppc_xive_has_single_escalation(xive)); error: if (rc) kvmppc_xive_native_cleanup_queue(vcpu, priority); @@ -820,7 +820,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive) for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { /* Single escalation, no queue 7 */ - if (prio == 7 && xive->single_escalation) + if (prio == 7 && kvmppc_xive_has_single_escalation(xive)) break; if (xc->esc_virq[prio]) { @@ -1111,7 +1111,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) */ xive->nr_servers = KVM_MAX_VCPUS; - xive->single_escalation = xive_native_has_single_escalation(); + if (xive_native_has_single_escalation()) + xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; + xive->ops = &kvmppc_xive_native_ops; kvm->arch.xive = xive; -- cgit From f5af0a978776b710f16dc99a85496b1e760bf9e0 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Tue, 20 Jul 2021 15:42:09 +0200 Subject: KVM: PPC: Book3S HV: XIVE: Add support for automatic save-restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On P10, the feature doing an automatic "save & restore" of a VCPU interrupt context is set by default in OPAL. When a VP context is pulled out, the state of the interrupt registers are saved by the XIVE interrupt controller under the internal NVP structure representing the VP. This saves a costly store/load in guest entries and exits. If OPAL advertises the "save & restore" feature in the device tree, it should also have set the 'H' bit in the CAM line. Check that when vCPUs are connected to their ICP in KVM before going any further. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210720134209.256133-3-clg@kaod.org --- arch/powerpc/include/asm/xive-regs.h | 3 +++ arch/powerpc/include/asm/xive.h | 1 + arch/powerpc/kvm/book3s_xive.c | 34 ++++++++++++++++++++++++++++++++-- arch/powerpc/kvm/book3s_xive.h | 2 ++ arch/powerpc/kvm/book3s_xive_native.c | 9 +++++++++ arch/powerpc/sysdev/xive/native.c | 10 ++++++++++ 6 files changed, 57 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 8b211faa0e42..cf8bb6ac4463 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -80,10 +80,13 @@ #define TM_QW0W2_VU PPC_BIT32(0) #define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1,31) // XX 2,31 ? #define TM_QW1W2_VO PPC_BIT32(0) +#define TM_QW1W2_HO PPC_BIT32(1) /* P10 XIVE2 */ #define TM_QW1W2_OS_CAM PPC_BITMASK32(8,31) #define TM_QW2W2_VP PPC_BIT32(0) +#define TM_QW2W2_HP PPC_BIT32(1) /* P10 XIVE2 */ #define TM_QW2W2_POOL_CAM PPC_BITMASK32(8,31) #define TM_QW3W2_VT PPC_BIT32(0) +#define TM_QW3W2_HT PPC_BIT32(1) /* P10 XIVE2 */ #define TM_QW3W2_LP PPC_BIT32(6) #define TM_QW3W2_LE PPC_BIT32(7) #define TM_QW3W2_T PPC_BIT32(31) diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 20ae50ab083c..92930b0b5d0e 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -126,6 +126,7 @@ int xive_native_enable_vp(u32 vp_id, bool single_escalation); int xive_native_disable_vp(u32 vp_id); int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); bool xive_native_has_single_escalation(void); +bool xive_native_has_save_restore(void); int xive_native_get_queue_info(u32 vp_id, uint32_t prio, u64 *out_qpage, diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 555cc610e7ab..912c1e9eef6b 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -59,6 +59,25 @@ */ #define XIVE_Q_GAP 2 +static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + + /* Check enablement at VP level */ + return xc->vp_cam & TM_QW1W2_HO; +} + +bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = xc->xive; + + if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE) + return kvmppc_xive_vcpu_has_save_restore(vcpu); + + return true; +} + /* * Push a vcpu's context to the XIVE on guest entry. * This assumes we are in virtual mode (MMU on) @@ -77,7 +96,8 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) return; eieio(); - __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); + if (!kvmppc_xive_vcpu_has_save_restore(vcpu)) + __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); vcpu->arch.xive_pushed = 1; eieio(); @@ -149,7 +169,8 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) /* First load to pull the context, we ignore the value */ __raw_readl(tima + TM_SPC_PULL_OS_CTX); /* Second load to recover the context state (Words 0 and 1) */ - vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS); + if (!kvmppc_xive_vcpu_has_save_restore(vcpu)) + vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS); /* Fixup some of the state for the next load */ vcpu->arch.xive_saved_state.lsmfb = 0; @@ -1319,6 +1340,12 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, if (r) goto bail; + if (!kvmppc_xive_check_save_restore(vcpu)) { + pr_err("inconsistent save-restore setup for VCPU %d\n", cpu); + r = -EIO; + goto bail; + } + /* Configure VCPU fields for use by assembly push/pull */ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); @@ -2138,6 +2165,9 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (xive_native_has_single_escalation()) xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; + if (xive_native_has_save_restore()) + xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE; + kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 73c3cd25093c..e6a9651c6f1e 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -98,6 +98,7 @@ struct kvmppc_xive_ops { }; #define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1 +#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2 struct kvmppc_xive { struct kvm *kvm; @@ -309,6 +310,7 @@ void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, struct kvmppc_xive_vcpu *xc, int irq); int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp); int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr); +bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu); static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive) { diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 2abb1358a268..af65ea21bde7 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -168,6 +168,12 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, goto bail; } + if (!kvmppc_xive_check_save_restore(vcpu)) { + pr_err("inconsistent save-restore setup for VCPU %d\n", server_num); + rc = -EIO; + goto bail; + } + /* * Enable the VP first as the single escalation mode will * affect escalation interrupts numbering @@ -1114,6 +1120,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) if (xive_native_has_single_escalation()) xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; + if (xive_native_has_save_restore()) + xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE; + xive->ops = &kvmppc_xive_native_ops; kvm->arch.xive = xive; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 57e3f1540435..1aec282cd650 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -41,6 +41,7 @@ static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; static bool xive_has_single_esc; +static bool xive_has_save_restore; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -588,6 +589,9 @@ bool __init xive_native_init(void) if (of_get_property(np, "single-escalation-support", NULL) != NULL) xive_has_single_esc = true; + if (of_get_property(np, "vp-save-restore", NULL)) + xive_has_save_restore = true; + /* Configure Thread Management areas for KVM */ for_each_possible_cpu(cpu) kvmppc_set_xive_tima(cpu, r.start, tima); @@ -752,6 +756,12 @@ bool xive_native_has_single_escalation(void) } EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); +bool xive_native_has_save_restore(void) +{ + return xive_has_save_restore; +} +EXPORT_SYMBOL_GPL(xive_native_has_save_restore); + int xive_native_get_queue_info(u32 vp_id, u32 prio, u64 *out_qpage, u64 *out_qsize, -- cgit From 1bce54250045443d55659b0b23be51e87ed2b919 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 2 Aug 2021 17:26:28 +0100 Subject: powerpc: Bulk conversion to generic_handle_domain_irq() Wherever possible, replace constructs that match either generic_handle_irq(irq_find_mapping()) or generic_handle_irq(irq_linear_revmap()) to a single call to generic_handle_domain_irq(). Signed-off-by: Marc Zyngier Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210802162630.2219813-13-maz@kernel.org --- arch/powerpc/platforms/4xx/uic.c | 4 +--- arch/powerpc/platforms/512x/mpc5121_ads_cpld.c | 23 ++++++++++------------- arch/powerpc/platforms/52xx/media5200.c | 9 ++++----- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 7 ++----- arch/powerpc/platforms/82xx/pq2ads-pci-pic.c | 6 ++---- arch/powerpc/platforms/cell/interrupt.c | 8 ++------ arch/powerpc/platforms/cell/spider-pic.c | 11 +++-------- arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 15 +++++++-------- arch/powerpc/platforms/powernv/opal-irqchip.c | 11 ++++------- arch/powerpc/sysdev/fsl_mpic_err.c | 11 ++++------- arch/powerpc/sysdev/fsl_msi.c | 12 ++++-------- 11 files changed, 43 insertions(+), 74 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c index 36fb66ce54cf..89e2587b1a59 100644 --- a/arch/powerpc/platforms/4xx/uic.c +++ b/arch/powerpc/platforms/4xx/uic.c @@ -198,7 +198,6 @@ static void uic_irq_cascade(struct irq_desc *desc) struct uic *uic = irq_desc_get_handler_data(desc); u32 msr; int src; - int subvirq; raw_spin_lock(&desc->lock); if (irqd_is_level_type(idata)) @@ -213,8 +212,7 @@ static void uic_irq_cascade(struct irq_desc *desc) src = 32 - ffs(msr); - subvirq = irq_linear_revmap(uic->irqhost, src); - generic_handle_irq(subvirq); + generic_handle_domain_irq(uic->irqhost, src); uic_irq_ret: raw_spin_lock(&desc->lock); diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index b2981634f1f8..ea46870e5d6e 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -81,11 +81,10 @@ static struct irq_chip cpld_pic = { .irq_unmask = cpld_unmask_irq, }; -static int +static unsigned int cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, u8 __iomem *maskp) { - int cpld_irq; u8 status = in_8(statusp); u8 mask = in_8(maskp); @@ -93,28 +92,26 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, status |= (ignore | mask); if (status == 0xff) - return 0; - - cpld_irq = ffz(status) + offset; + return ~0; - return irq_linear_revmap(cpld_pic_host, cpld_irq); + return ffz(status) + offset; } static void cpld_pic_cascade(struct irq_desc *desc) { - unsigned int irq; + unsigned int hwirq; - irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, + hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); - if (irq) { - generic_handle_irq(irq); + if (hwirq != ~0) { + generic_handle_domain_irq(cpld_pic_host, hwirq); return; } - irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, + hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, &cpld_regs->misc_mask); - if (irq) { - generic_handle_irq(irq); + if (hwirq != ~0) { + generic_handle_domain_irq(cpld_pic_host, hwirq); return; } } diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index efb8bdecbcc7..110c444f4bc7 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -78,7 +78,7 @@ static struct irq_chip media5200_irq_chip = { static void media5200_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - int sub_virq, val; + int val; u32 status, enable; /* Mask off the cascaded IRQ */ @@ -92,11 +92,10 @@ static void media5200_irq_cascade(struct irq_desc *desc) enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS); val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT); if (val) { - sub_virq = irq_linear_revmap(media5200_irq.irqhost, val - 1); - /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i subvirq=%i\n", - * __func__, virq, status, enable, val - 1, sub_virq); + generic_handle_domain_irq(media5200_irq.irqhost, val - 1); + /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n", + * __func__, virq, status, enable, val - 1); */ - generic_handle_irq(sub_virq); } /* Processing done; can reenable the cascade now */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 3823df235f25..f862b48b4824 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -190,14 +190,11 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc) { struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); - int sub_virq; u32 status; status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK; - if (status) { - sub_virq = irq_linear_revmap(gpt->irqhost, 0); - generic_handle_irq(sub_virq); - } + if (status) + generic_handle_domain_irq(gpt->irqhost, 0); } static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index f82f75a6085c..285bfe19b798 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -91,10 +91,8 @@ static void pq2ads_pci_irq_demux(struct irq_desc *desc) break; for (bit = 0; pend != 0; ++bit, pend <<= 1) { - if (pend & 0x80000000) { - int virq = irq_linear_revmap(priv->host, bit); - generic_handle_irq(virq); - } + if (pend & 0x80000000) + generic_handle_domain_irq(priv->host, bit); } } } diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index c0ab62ba6f16..0873a7a20271 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -106,13 +106,9 @@ static void iic_ioexc_cascade(struct irq_desc *desc) out_be64(&node_iic->iic_is, ack); /* handle them */ for (cascade = 63; cascade >= 0; cascade--) - if (bits & (0x8000000000000000UL >> cascade)) { - unsigned int cirq = - irq_linear_revmap(iic_host, + if (bits & (0x8000000000000000UL >> cascade)) + generic_handle_domain_irq(iic_host, base | cascade); - if (cirq) - generic_handle_irq(cirq); - } /* post-ack level interrupts */ ack = bits & ~IIC_ISR_EDGE_MASK; if (ack) diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 210785f59271..8af75867cb42 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -190,16 +190,11 @@ static void spider_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct spider_pic *pic = irq_desc_get_handler_data(desc); - unsigned int cs, virq; + unsigned int cs; cs = in_be32(pic->regs + TIR_CS) >> 24; - if (cs == SPIDER_IRQ_INVALID) - virq = 0; - else - virq = irq_linear_revmap(pic->host, cs); - - if (virq) - generic_handle_irq(virq); + if (cs != SPIDER_IRQ_INVALID) + generic_handle_domain_irq(pic->host, cs); chip->irq_eoi(&desc->irq_data); } diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index a1b7f79a8a15..15396333a90b 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -108,7 +108,6 @@ static const struct irq_domain_ops hlwd_irq_domain_ops = { static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) { void __iomem *io_base = h->host_data; - int irq; u32 irq_status; irq_status = in_be32(io_base + HW_BROADWAY_ICR) & @@ -116,23 +115,22 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) if (irq_status == 0) return 0; /* no more IRQs pending */ - irq = __ffs(irq_status); - return irq_linear_revmap(h, irq); + return __ffs(irq_status); } static void hlwd_pic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); - unsigned int virq; + unsigned int hwirq; raw_spin_lock(&desc->lock); chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */ raw_spin_unlock(&desc->lock); - virq = __hlwd_pic_get_irq(irq_domain); - if (virq) - generic_handle_irq(virq); + hwirq = __hlwd_pic_get_irq(irq_domain); + if (hwirq) + generic_handle_domain_irq(irq_domain, hwirq); else pr_err("spurious interrupt!\n"); @@ -190,7 +188,8 @@ static struct irq_domain *hlwd_pic_init(struct device_node *np) unsigned int hlwd_pic_get_irq(void) { - return __hlwd_pic_get_irq(hlwd_irq_host); + unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host); + return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0; } /* diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index c164419e254d..d55652b5f6fa 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -46,18 +46,15 @@ void opal_handle_events(void) e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask; again: while (e) { - int virq, hwirq; + int hwirq; hwirq = fls64(e) - 1; e &= ~BIT_ULL(hwirq); local_irq_disable(); - virq = irq_find_mapping(opal_event_irqchip.domain, hwirq); - if (virq) { - irq_enter(); - generic_handle_irq(virq); - irq_exit(); - } + irq_enter(); + generic_handle_domain_irq(opal_event_irqchip.domain, hwirq); + irq_exit(); local_irq_enable(); cond_resched(); diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c index 5fa5fa215541..9a98bb212922 100644 --- a/arch/powerpc/sysdev/fsl_mpic_err.c +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -99,7 +99,6 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) struct mpic *mpic = (struct mpic *) data; u32 eisr, eimr; int errint; - unsigned int cascade_irq; eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR); eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR); @@ -108,13 +107,11 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) return IRQ_NONE; while (eisr) { + int ret; errint = __builtin_clz(eisr); - cascade_irq = irq_linear_revmap(mpic->irqhost, - mpic->err_int_vecs[errint]); - WARN_ON(!cascade_irq); - if (cascade_irq) { - generic_handle_irq(cascade_irq); - } else { + ret = generic_handle_domain_irq(mpic->irqhost, + mpic->err_int_vecs[errint]); + if (WARN_ON(ret)) { eimr |= 1 << (31 - errint); mpic_fsl_err_write(mpic->err_regs, eimr); } diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 808e7118abfc..e6b06c3f8197 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -266,7 +266,6 @@ out_free: static irqreturn_t fsl_msi_cascade(int irq, void *data) { - unsigned int cascade_irq; struct fsl_msi *msi_data; int msir_index = -1; u32 msir_value = 0; @@ -279,9 +278,6 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data) msir_index = cascade_data->index; - if (msir_index >= NR_MSI_REG_MAX) - cascade_irq = 0; - switch (msi_data->feature & FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: msir_value = fsl_msi_read(msi_data->msi_regs, @@ -305,15 +301,15 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data) } while (msir_value) { + int err; intr_index = ffs(msir_value) - 1; - cascade_irq = irq_linear_revmap(msi_data->irqhost, + err = generic_handle_domain_irq(msi_data->irqhost, msi_hwirq(msi_data, msir_index, intr_index + have_shift)); - if (cascade_irq) { - generic_handle_irq(cascade_irq); + if (!err) ret = IRQ_HANDLED; - } + have_shift += intr_index + 1; msir_value = msir_value >> (intr_index + 1); } -- cgit From 94effcedaa543825ad9c80831450d4fbfa284880 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 5 Aug 2021 11:49:36 +0900 Subject: openrisc: Fix compiler warnings in setup This was pointed out with the recent name change of or32_early_setup to or1k_early_setup. Investigating the file I found a few other warnings so cleaning them up here. arch/openrisc/kernel/setup.c:220:13: warning: no previous prototype for 'or1k_early_setup' [-Wmissing-prototypes] 220 | void __init or1k_early_setup(void *fdt) | ^~~~~~~~~~~~~~~~ Fix this the missing or1k_early_setup prototype warning by adding an asm/setup.h file to define the prototype. arch/openrisc/kernel/setup.c:246:13: warning: no previous prototype for 'detect_unit_config' [-Wmissing-prototypes] 246 | void __init detect_unit_config(unsigned long upr, unsigned long mask, | ^~~~~~~~~~~~~~~~~~ The function detect_unit_config is not used, just remove it. arch/openrisc/kernel/setup.c:221: warning: Function parameter or member 'fdt' not described in 'or1k_early_setup' Add @fdt docs to the function comment to suppress this warning. Reported-by: kernel test robot Signed-off-by: Stafford Horne Reviewed-by: Randy Dunlap --- arch/openrisc/include/asm/setup.h | 15 +++++++++++++++ arch/openrisc/kernel/setup.c | 16 +--------------- 2 files changed, 16 insertions(+), 15 deletions(-) create mode 100644 arch/openrisc/include/asm/setup.h (limited to 'arch') diff --git a/arch/openrisc/include/asm/setup.h b/arch/openrisc/include/asm/setup.h new file mode 100644 index 000000000000..9acbc5deda69 --- /dev/null +++ b/arch/openrisc/include/asm/setup.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Stafford Horne + */ +#ifndef _ASM_OR1K_SETUP_H +#define _ASM_OR1K_SETUP_H + +#include +#include + +#ifndef __ASSEMBLY__ +void __init or1k_early_setup(void *fdt); +#endif + +#endif /* _ASM_OR1K_SETUP_H */ diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 7eddcac0ef2f..0cd04d936a7a 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -210,6 +210,7 @@ void __init setup_cpuinfo(void) /** * or1k_early_setup + * @fdt: pointer to the start of the device tree in memory or NULL * * Handles the pointer to the device tree that this kernel is to use * for establishing the available platform devices. @@ -243,21 +244,6 @@ static inline unsigned long extract_value(unsigned long reg, unsigned long mask) return mask & reg; } -void __init detect_unit_config(unsigned long upr, unsigned long mask, - char *text, void (*func) (void)) -{ - if (text != NULL) - printk("%s", text); - - if (upr & mask) { - if (func != NULL) - func(); - else - printk("present\n"); - } else - printk("not present\n"); -} - /* * calibrate_delay * -- cgit From 3acd5d8b7cf614d8724986b0dbfee52b0944d027 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 4 Aug 2021 12:40:31 +0800 Subject: arm: dts: mt8135: Move pinfunc to include/dt-bindings/pinctrl Move mt8135-pinfunc.h into include/dt-bindings/pinctrl so that we can include it in yaml examples. Signed-off-by: Hsin-Yi Wang Link: https://lore.kernel.org/r/20210804044033.3047296-1-hsinyi@chromium.org Signed-off-by: Linus Walleij --- arch/arm/boot/dts/mt8135-pinfunc.h | 1294 ------------------------------------ arch/arm/boot/dts/mt8135.dtsi | 2 +- 2 files changed, 1 insertion(+), 1295 deletions(-) delete mode 100644 arch/arm/boot/dts/mt8135-pinfunc.h (limited to 'arch') diff --git a/arch/arm/boot/dts/mt8135-pinfunc.h b/arch/arm/boot/dts/mt8135-pinfunc.h deleted file mode 100644 index ce0cb5a440eb..000000000000 --- a/arch/arm/boot/dts/mt8135-pinfunc.h +++ /dev/null @@ -1,1294 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2014 MediaTek Inc. - * Author: Hongzhou.Yang - */ - -#ifndef __DTS_MT8135_PINFUNC_H -#define __DTS_MT8135_PINFUNC_H - -#include - -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_GPIO0 (MTK_PIN_NO(0) | 0) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_MSDC0_DAT7 (MTK_PIN_NO(0) | 1) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_EINT49 (MTK_PIN_NO(0) | 2) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_I2SOUT_DAT (MTK_PIN_NO(0) | 3) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_DAC_DAT_OUT (MTK_PIN_NO(0) | 4) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_PCM1_DO (MTK_PIN_NO(0) | 5) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_SPI1_MO (MTK_PIN_NO(0) | 6) -#define MT8135_PIN_0_MSDC0_DAT7__FUNC_NALE (MTK_PIN_NO(0) | 7) - -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_GPIO1 (MTK_PIN_NO(1) | 0) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_MSDC0_DAT6 (MTK_PIN_NO(1) | 1) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_EINT48 (MTK_PIN_NO(1) | 2) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_I2SIN_WS (MTK_PIN_NO(1) | 3) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_DAC_WS (MTK_PIN_NO(1) | 4) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_PCM1_WS (MTK_PIN_NO(1) | 5) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_SPI1_CSN (MTK_PIN_NO(1) | 6) -#define MT8135_PIN_1_MSDC0_DAT6__FUNC_NCLE (MTK_PIN_NO(1) | 7) - -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_GPIO2 (MTK_PIN_NO(2) | 0) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_MSDC0_DAT5 (MTK_PIN_NO(2) | 1) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_EINT47 (MTK_PIN_NO(2) | 2) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_I2SIN_CK (MTK_PIN_NO(2) | 3) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_DAC_CK (MTK_PIN_NO(2) | 4) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_PCM1_CK (MTK_PIN_NO(2) | 5) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_SPI1_CLK (MTK_PIN_NO(2) | 6) -#define MT8135_PIN_2_MSDC0_DAT5__FUNC_NLD4 (MTK_PIN_NO(2) | 7) - -#define MT8135_PIN_3_MSDC0_DAT4__FUNC_GPIO3 (MTK_PIN_NO(3) | 0) -#define MT8135_PIN_3_MSDC0_DAT4__FUNC_MSDC0_DAT4 (MTK_PIN_NO(3) | 1) -#define MT8135_PIN_3_MSDC0_DAT4__FUNC_EINT46 (MTK_PIN_NO(3) | 2) -#define MT8135_PIN_3_MSDC0_DAT4__FUNC_A_FUNC_CK (MTK_PIN_NO(3) | 3) -#define MT8135_PIN_3_MSDC0_DAT4__FUNC_LSCE1B_2X (MTK_PIN_NO(3) | 6) -#define MT8135_PIN_3_MSDC0_DAT4__FUNC_NLD5 (MTK_PIN_NO(3) | 7) - -#define MT8135_PIN_4_MSDC0_CMD__FUNC_GPIO4 (MTK_PIN_NO(4) | 0) -#define MT8135_PIN_4_MSDC0_CMD__FUNC_MSDC0_CMD (MTK_PIN_NO(4) | 1) -#define MT8135_PIN_4_MSDC0_CMD__FUNC_EINT41 (MTK_PIN_NO(4) | 2) -#define MT8135_PIN_4_MSDC0_CMD__FUNC_A_FUNC_DOUT_0 (MTK_PIN_NO(4) | 3) -#define MT8135_PIN_4_MSDC0_CMD__FUNC_USB_TEST_IO_0 (MTK_PIN_NO(4) | 5) -#define MT8135_PIN_4_MSDC0_CMD__FUNC_LRSTB_2X (MTK_PIN_NO(4) | 6) -#define MT8135_PIN_4_MSDC0_CMD__FUNC_NRNB (MTK_PIN_NO(4) | 7) - -#define MT8135_PIN_5_MSDC0_CLK__FUNC_GPIO5 (MTK_PIN_NO(5) | 0) -#define MT8135_PIN_5_MSDC0_CLK__FUNC_MSDC0_CLK (MTK_PIN_NO(5) | 1) -#define MT8135_PIN_5_MSDC0_CLK__FUNC_EINT40 (MTK_PIN_NO(5) | 2) -#define MT8135_PIN_5_MSDC0_CLK__FUNC_A_FUNC_DOUT_1 (MTK_PIN_NO(5) | 3) -#define MT8135_PIN_5_MSDC0_CLK__FUNC_USB_TEST_IO_1 (MTK_PIN_NO(5) | 5) -#define MT8135_PIN_5_MSDC0_CLK__FUNC_LPTE (MTK_PIN_NO(5) | 6) -#define MT8135_PIN_5_MSDC0_CLK__FUNC_NREB (MTK_PIN_NO(5) | 7) - -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_GPIO6 (MTK_PIN_NO(6) | 0) -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_MSDC0_DAT3 (MTK_PIN_NO(6) | 1) -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_EINT45 (MTK_PIN_NO(6) | 2) -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_A_FUNC_DOUT_2 (MTK_PIN_NO(6) | 3) -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_USB_TEST_IO_2 (MTK_PIN_NO(6) | 5) -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_LSCE0B_2X (MTK_PIN_NO(6) | 6) -#define MT8135_PIN_6_MSDC0_DAT3__FUNC_NLD7 (MTK_PIN_NO(6) | 7) - -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_GPIO7 (MTK_PIN_NO(7) | 0) -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_MSDC0_DAT2 (MTK_PIN_NO(7) | 1) -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_EINT44 (MTK_PIN_NO(7) | 2) -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_A_FUNC_DOUT_3 (MTK_PIN_NO(7) | 3) -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_USB_TEST_IO_3 (MTK_PIN_NO(7) | 5) -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_LSA0_2X (MTK_PIN_NO(7) | 6) -#define MT8135_PIN_7_MSDC0_DAT2__FUNC_NLD14 (MTK_PIN_NO(7) | 7) - -#define MT8135_PIN_8_MSDC0_DAT1__FUNC_GPIO8 (MTK_PIN_NO(8) | 0) -#define MT8135_PIN_8_MSDC0_DAT1__FUNC_MSDC0_DAT1 (MTK_PIN_NO(8) | 1) -#define MT8135_PIN_8_MSDC0_DAT1__FUNC_EINT43 (MTK_PIN_NO(8) | 2) -#define MT8135_PIN_8_MSDC0_DAT1__FUNC_USB_TEST_IO_4 (MTK_PIN_NO(8) | 5) -#define MT8135_PIN_8_MSDC0_DAT1__FUNC_LSCK_2X (MTK_PIN_NO(8) | 6) -#define MT8135_PIN_8_MSDC0_DAT1__FUNC_NLD11 (MTK_PIN_NO(8) | 7) - -#define MT8135_PIN_9_MSDC0_DAT0__FUNC_GPIO9 (MTK_PIN_NO(9) | 0) -#define MT8135_PIN_9_MSDC0_DAT0__FUNC_MSDC0_DAT0 (MTK_PIN_NO(9) | 1) -#define MT8135_PIN_9_MSDC0_DAT0__FUNC_EINT42 (MTK_PIN_NO(9) | 2) -#define MT8135_PIN_9_MSDC0_DAT0__FUNC_USB_TEST_IO_5 (MTK_PIN_NO(9) | 5) -#define MT8135_PIN_9_MSDC0_DAT0__FUNC_LSDA_2X (MTK_PIN_NO(9) | 6) - -#define MT8135_PIN_10_NCEB0__FUNC_GPIO10 (MTK_PIN_NO(10) | 0) -#define MT8135_PIN_10_NCEB0__FUNC_NCEB0 (MTK_PIN_NO(10) | 1) -#define MT8135_PIN_10_NCEB0__FUNC_EINT139 (MTK_PIN_NO(10) | 2) -#define MT8135_PIN_10_NCEB0__FUNC_TESTA_OUT4 (MTK_PIN_NO(10) | 7) - -#define MT8135_PIN_11_NCEB1__FUNC_GPIO11 (MTK_PIN_NO(11) | 0) -#define MT8135_PIN_11_NCEB1__FUNC_NCEB1 (MTK_PIN_NO(11) | 1) -#define MT8135_PIN_11_NCEB1__FUNC_EINT140 (MTK_PIN_NO(11) | 2) -#define MT8135_PIN_11_NCEB1__FUNC_USB_DRVVBUS (MTK_PIN_NO(11) | 6) -#define MT8135_PIN_11_NCEB1__FUNC_TESTA_OUT5 (MTK_PIN_NO(11) | 7) - -#define MT8135_PIN_12_NRNB__FUNC_GPIO12 (MTK_PIN_NO(12) | 0) -#define MT8135_PIN_12_NRNB__FUNC_NRNB (MTK_PIN_NO(12) | 1) -#define MT8135_PIN_12_NRNB__FUNC_EINT141 (MTK_PIN_NO(12) | 2) -#define MT8135_PIN_12_NRNB__FUNC_A_FUNC_DOUT_4 (MTK_PIN_NO(12) | 3) -#define MT8135_PIN_12_NRNB__FUNC_TESTA_OUT6 (MTK_PIN_NO(12) | 7) - -#define MT8135_PIN_13_NCLE__FUNC_GPIO13 (MTK_PIN_NO(13) | 0) -#define MT8135_PIN_13_NCLE__FUNC_NCLE (MTK_PIN_NO(13) | 1) -#define MT8135_PIN_13_NCLE__FUNC_EINT142 (MTK_PIN_NO(13) | 2) -#define MT8135_PIN_13_NCLE__FUNC_A_FUNC_DOUT_5 (MTK_PIN_NO(13) | 3) -#define MT8135_PIN_13_NCLE__FUNC_CM2PDN_1X (MTK_PIN_NO(13) | 4) -#define MT8135_PIN_13_NCLE__FUNC_NALE (MTK_PIN_NO(13) | 6) -#define MT8135_PIN_13_NCLE__FUNC_TESTA_OUT7 (MTK_PIN_NO(13) | 7) - -#define MT8135_PIN_14_NALE__FUNC_GPIO14 (MTK_PIN_NO(14) | 0) -#define MT8135_PIN_14_NALE__FUNC_NALE (MTK_PIN_NO(14) | 1) -#define MT8135_PIN_14_NALE__FUNC_EINT143 (MTK_PIN_NO(14) | 2) -#define MT8135_PIN_14_NALE__FUNC_A_FUNC_DOUT_6 (MTK_PIN_NO(14) | 3) -#define MT8135_PIN_14_NALE__FUNC_CM2MCLK_1X (MTK_PIN_NO(14) | 4) -#define MT8135_PIN_14_NALE__FUNC_IRDA_RXD (MTK_PIN_NO(14) | 5) -#define MT8135_PIN_14_NALE__FUNC_NCLE (MTK_PIN_NO(14) | 6) -#define MT8135_PIN_14_NALE__FUNC_TESTA_OUT8 (MTK_PIN_NO(14) | 7) - -#define MT8135_PIN_15_NREB__FUNC_GPIO15 (MTK_PIN_NO(15) | 0) -#define MT8135_PIN_15_NREB__FUNC_NREB (MTK_PIN_NO(15) | 1) -#define MT8135_PIN_15_NREB__FUNC_EINT144 (MTK_PIN_NO(15) | 2) -#define MT8135_PIN_15_NREB__FUNC_A_FUNC_DOUT_7 (MTK_PIN_NO(15) | 3) -#define MT8135_PIN_15_NREB__FUNC_CM2RST_1X (MTK_PIN_NO(15) | 4) -#define MT8135_PIN_15_NREB__FUNC_IRDA_TXD (MTK_PIN_NO(15) | 5) -#define MT8135_PIN_15_NREB__FUNC_TESTA_OUT9 (MTK_PIN_NO(15) | 7) - -#define MT8135_PIN_16_NWEB__FUNC_GPIO16 (MTK_PIN_NO(16) | 0) -#define MT8135_PIN_16_NWEB__FUNC_NWEB (MTK_PIN_NO(16) | 1) -#define MT8135_PIN_16_NWEB__FUNC_EINT145 (MTK_PIN_NO(16) | 2) -#define MT8135_PIN_16_NWEB__FUNC_A_FUNC_DIN_0 (MTK_PIN_NO(16) | 3) -#define MT8135_PIN_16_NWEB__FUNC_CM2PCLK_1X (MTK_PIN_NO(16) | 4) -#define MT8135_PIN_16_NWEB__FUNC_IRDA_PDN (MTK_PIN_NO(16) | 5) -#define MT8135_PIN_16_NWEB__FUNC_TESTA_OUT10 (MTK_PIN_NO(16) | 7) - -#define MT8135_PIN_17_NLD0__FUNC_GPIO17 (MTK_PIN_NO(17) | 0) -#define MT8135_PIN_17_NLD0__FUNC_NLD0 (MTK_PIN_NO(17) | 1) -#define MT8135_PIN_17_NLD0__FUNC_EINT146 (MTK_PIN_NO(17) | 2) -#define MT8135_PIN_17_NLD0__FUNC_A_FUNC_DIN_1 (MTK_PIN_NO(17) | 3) -#define MT8135_PIN_17_NLD0__FUNC_CM2DAT_1X_0 (MTK_PIN_NO(17) | 4) -#define MT8135_PIN_17_NLD0__FUNC_I2SIN_CK (MTK_PIN_NO(17) | 5) -#define MT8135_PIN_17_NLD0__FUNC_DAC_CK (MTK_PIN_NO(17) | 6) -#define MT8135_PIN_17_NLD0__FUNC_TESTA_OUT11 (MTK_PIN_NO(17) | 7) - -#define MT8135_PIN_18_NLD1__FUNC_GPIO18 (MTK_PIN_NO(18) | 0) -#define MT8135_PIN_18_NLD1__FUNC_NLD1 (MTK_PIN_NO(18) | 1) -#define MT8135_PIN_18_NLD1__FUNC_EINT147 (MTK_PIN_NO(18) | 2) -#define MT8135_PIN_18_NLD1__FUNC_A_FUNC_DIN_2 (MTK_PIN_NO(18) | 3) -#define MT8135_PIN_18_NLD1__FUNC_CM2DAT_1X_1 (MTK_PIN_NO(18) | 4) -#define MT8135_PIN_18_NLD1__FUNC_I2SIN_WS (MTK_PIN_NO(18) | 5) -#define MT8135_PIN_18_NLD1__FUNC_DAC_WS (MTK_PIN_NO(18) | 6) -#define MT8135_PIN_18_NLD1__FUNC_TESTA_OUT12 (MTK_PIN_NO(18) | 7) - -#define MT8135_PIN_19_NLD2__FUNC_GPIO19 (MTK_PIN_NO(19) | 0) -#define MT8135_PIN_19_NLD2__FUNC_NLD2 (MTK_PIN_NO(19) | 1) -#define MT8135_PIN_19_NLD2__FUNC_EINT148 (MTK_PIN_NO(19) | 2) -#define MT8135_PIN_19_NLD2__FUNC_A_FUNC_DIN_3 (MTK_PIN_NO(19) | 3) -#define MT8135_PIN_19_NLD2__FUNC_CM2DAT_1X_2 (MTK_PIN_NO(19) | 4) -#define MT8135_PIN_19_NLD2__FUNC_I2SOUT_DAT (MTK_PIN_NO(19) | 5) -#define MT8135_PIN_19_NLD2__FUNC_DAC_DAT_OUT (MTK_PIN_NO(19) | 6) -#define MT8135_PIN_19_NLD2__FUNC_TESTA_OUT13 (MTK_PIN_NO(19) | 7) - -#define MT8135_PIN_20_NLD3__FUNC_GPIO20 (MTK_PIN_NO(20) | 0) -#define MT8135_PIN_20_NLD3__FUNC_NLD3 (MTK_PIN_NO(20) | 1) -#define MT8135_PIN_20_NLD3__FUNC_EINT149 (MTK_PIN_NO(20) | 2) -#define MT8135_PIN_20_NLD3__FUNC_A_FUNC_DIN_4 (MTK_PIN_NO(20) | 3) -#define MT8135_PIN_20_NLD3__FUNC_CM2DAT_1X_3 (MTK_PIN_NO(20) | 4) -#define MT8135_PIN_20_NLD3__FUNC_TESTA_OUT14 (MTK_PIN_NO(20) | 7) - -#define MT8135_PIN_21_NLD4__FUNC_GPIO21 (MTK_PIN_NO(21) | 0) -#define MT8135_PIN_21_NLD4__FUNC_NLD4 (MTK_PIN_NO(21) | 1) -#define MT8135_PIN_21_NLD4__FUNC_EINT150 (MTK_PIN_NO(21) | 2) -#define MT8135_PIN_21_NLD4__FUNC_A_FUNC_DIN_5 (MTK_PIN_NO(21) | 3) -#define MT8135_PIN_21_NLD4__FUNC_CM2DAT_1X_4 (MTK_PIN_NO(21) | 4) -#define MT8135_PIN_21_NLD4__FUNC_TESTA_OUT15 (MTK_PIN_NO(21) | 7) - -#define MT8135_PIN_22_NLD5__FUNC_GPIO22 (MTK_PIN_NO(22) | 0) -#define MT8135_PIN_22_NLD5__FUNC_NLD5 (MTK_PIN_NO(22) | 1) -#define MT8135_PIN_22_NLD5__FUNC_EINT151 (MTK_PIN_NO(22) | 2) -#define MT8135_PIN_22_NLD5__FUNC_A_FUNC_DIN_6 (MTK_PIN_NO(22) | 3) -#define MT8135_PIN_22_NLD5__FUNC_CM2DAT_1X_5 (MTK_PIN_NO(22) | 4) -#define MT8135_PIN_22_NLD5__FUNC_TESTA_OUT16 (MTK_PIN_NO(22) | 7) - -#define MT8135_PIN_23_NLD6__FUNC_GPIO23 (MTK_PIN_NO(23) | 0) -#define MT8135_PIN_23_NLD6__FUNC_NLD6 (MTK_PIN_NO(23) | 1) -#define MT8135_PIN_23_NLD6__FUNC_EINT152 (MTK_PIN_NO(23) | 2) -#define MT8135_PIN_23_NLD6__FUNC_A_FUNC_DIN_7 (MTK_PIN_NO(23) | 3) -#define MT8135_PIN_23_NLD6__FUNC_CM2DAT_1X_6 (MTK_PIN_NO(23) | 4) -#define MT8135_PIN_23_NLD6__FUNC_TESTA_OUT17 (MTK_PIN_NO(23) | 7) - -#define MT8135_PIN_24_NLD7__FUNC_GPIO24 (MTK_PIN_NO(24) | 0) -#define MT8135_PIN_24_NLD7__FUNC_NLD7 (MTK_PIN_NO(24) | 1) -#define MT8135_PIN_24_NLD7__FUNC_EINT153 (MTK_PIN_NO(24) | 2) -#define MT8135_PIN_24_NLD7__FUNC_A_FUNC_DIN_8 (MTK_PIN_NO(24) | 3) -#define MT8135_PIN_24_NLD7__FUNC_CM2DAT_1X_7 (MTK_PIN_NO(24) | 4) -#define MT8135_PIN_24_NLD7__FUNC_TESTA_OUT18 (MTK_PIN_NO(24) | 7) - -#define MT8135_PIN_25_NLD8__FUNC_GPIO25 (MTK_PIN_NO(25) | 0) -#define MT8135_PIN_25_NLD8__FUNC_NLD8 (MTK_PIN_NO(25) | 1) -#define MT8135_PIN_25_NLD8__FUNC_EINT154 (MTK_PIN_NO(25) | 2) -#define MT8135_PIN_25_NLD8__FUNC_CM2DAT_1X_8 (MTK_PIN_NO(25) | 4) - -#define MT8135_PIN_26_NLD9__FUNC_GPIO26 (MTK_PIN_NO(26) | 0) -#define MT8135_PIN_26_NLD9__FUNC_NLD9 (MTK_PIN_NO(26) | 1) -#define MT8135_PIN_26_NLD9__FUNC_EINT155 (MTK_PIN_NO(26) | 2) -#define MT8135_PIN_26_NLD9__FUNC_CM2DAT_1X_9 (MTK_PIN_NO(26) | 4) -#define MT8135_PIN_26_NLD9__FUNC_PWM1 (MTK_PIN_NO(26) | 5) - -#define MT8135_PIN_27_NLD10__FUNC_GPIO27 (MTK_PIN_NO(27) | 0) -#define MT8135_PIN_27_NLD10__FUNC_NLD10 (MTK_PIN_NO(27) | 1) -#define MT8135_PIN_27_NLD10__FUNC_EINT156 (MTK_PIN_NO(27) | 2) -#define MT8135_PIN_27_NLD10__FUNC_CM2VSYNC_1X (MTK_PIN_NO(27) | 4) -#define MT8135_PIN_27_NLD10__FUNC_PWM2 (MTK_PIN_NO(27) | 5) - -#define MT8135_PIN_28_NLD11__FUNC_GPIO28 (MTK_PIN_NO(28) | 0) -#define MT8135_PIN_28_NLD11__FUNC_NLD11 (MTK_PIN_NO(28) | 1) -#define MT8135_PIN_28_NLD11__FUNC_EINT157 (MTK_PIN_NO(28) | 2) -#define MT8135_PIN_28_NLD11__FUNC_CM2HSYNC_1X (MTK_PIN_NO(28) | 4) -#define MT8135_PIN_28_NLD11__FUNC_PWM3 (MTK_PIN_NO(28) | 5) - -#define MT8135_PIN_29_NLD12__FUNC_GPIO29 (MTK_PIN_NO(29) | 0) -#define MT8135_PIN_29_NLD12__FUNC_NLD12 (MTK_PIN_NO(29) | 1) -#define MT8135_PIN_29_NLD12__FUNC_EINT158 (MTK_PIN_NO(29) | 2) -#define MT8135_PIN_29_NLD12__FUNC_I2SIN_CK (MTK_PIN_NO(29) | 3) -#define MT8135_PIN_29_NLD12__FUNC_DAC_CK (MTK_PIN_NO(29) | 4) -#define MT8135_PIN_29_NLD12__FUNC_PCM1_CK (MTK_PIN_NO(29) | 5) - -#define MT8135_PIN_30_NLD13__FUNC_GPIO30 (MTK_PIN_NO(30) | 0) -#define MT8135_PIN_30_NLD13__FUNC_NLD13 (MTK_PIN_NO(30) | 1) -#define MT8135_PIN_30_NLD13__FUNC_EINT159 (MTK_PIN_NO(30) | 2) -#define MT8135_PIN_30_NLD13__FUNC_I2SIN_WS (MTK_PIN_NO(30) | 3) -#define MT8135_PIN_30_NLD13__FUNC_DAC_WS (MTK_PIN_NO(30) | 4) -#define MT8135_PIN_30_NLD13__FUNC_PCM1_WS (MTK_PIN_NO(30) | 5) - -#define MT8135_PIN_31_NLD14__FUNC_GPIO31 (MTK_PIN_NO(31) | 0) -#define MT8135_PIN_31_NLD14__FUNC_NLD14 (MTK_PIN_NO(31) | 1) -#define MT8135_PIN_31_NLD14__FUNC_EINT160 (MTK_PIN_NO(31) | 2) -#define MT8135_PIN_31_NLD14__FUNC_I2SOUT_DAT (MTK_PIN_NO(31) | 3) -#define MT8135_PIN_31_NLD14__FUNC_DAC_DAT_OUT (MTK_PIN_NO(31) | 4) -#define MT8135_PIN_31_NLD14__FUNC_PCM1_DO (MTK_PIN_NO(31) | 5) - -#define MT8135_PIN_32_NLD15__FUNC_GPIO32 (MTK_PIN_NO(32) | 0) -#define MT8135_PIN_32_NLD15__FUNC_NLD15 (MTK_PIN_NO(32) | 1) -#define MT8135_PIN_32_NLD15__FUNC_EINT161 (MTK_PIN_NO(32) | 2) -#define MT8135_PIN_32_NLD15__FUNC_DISP_PWM (MTK_PIN_NO(32) | 3) -#define MT8135_PIN_32_NLD15__FUNC_PWM4 (MTK_PIN_NO(32) | 4) -#define MT8135_PIN_32_NLD15__FUNC_PCM1_DI (MTK_PIN_NO(32) | 5) - -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_GPIO33 (MTK_PIN_NO(33) | 0) -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_MSDC0_RSTB (MTK_PIN_NO(33) | 1) -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_EINT50 (MTK_PIN_NO(33) | 2) -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_I2SIN_DAT (MTK_PIN_NO(33) | 3) -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_PCM1_DI (MTK_PIN_NO(33) | 5) -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_SPI1_MI (MTK_PIN_NO(33) | 6) -#define MT8135_PIN_33_MSDC0_RSTB__FUNC_NLD10 (MTK_PIN_NO(33) | 7) - -#define MT8135_PIN_34_IDDIG__FUNC_GPIO34 (MTK_PIN_NO(34) | 0) -#define MT8135_PIN_34_IDDIG__FUNC_IDDIG (MTK_PIN_NO(34) | 1) -#define MT8135_PIN_34_IDDIG__FUNC_EINT34 (MTK_PIN_NO(34) | 2) - -#define MT8135_PIN_35_SCL3__FUNC_GPIO35 (MTK_PIN_NO(35) | 0) -#define MT8135_PIN_35_SCL3__FUNC_SCL3 (MTK_PIN_NO(35) | 1) -#define MT8135_PIN_35_SCL3__FUNC_EINT96 (MTK_PIN_NO(35) | 2) -#define MT8135_PIN_35_SCL3__FUNC_CLKM6 (MTK_PIN_NO(35) | 3) -#define MT8135_PIN_35_SCL3__FUNC_PWM6 (MTK_PIN_NO(35) | 4) - -#define MT8135_PIN_36_SDA3__FUNC_GPIO36 (MTK_PIN_NO(36) | 0) -#define MT8135_PIN_36_SDA3__FUNC_SDA3 (MTK_PIN_NO(36) | 1) -#define MT8135_PIN_36_SDA3__FUNC_EINT97 (MTK_PIN_NO(36) | 2) - -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_GPIO37 (MTK_PIN_NO(37) | 0) -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_AUD_CLK (MTK_PIN_NO(37) | 1) -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_ADC_CK (MTK_PIN_NO(37) | 2) -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_HDMI_SDATA0 (MTK_PIN_NO(37) | 3) -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_EINT19 (MTK_PIN_NO(37) | 4) -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_USB_TEST_IO_6 (MTK_PIN_NO(37) | 5) -#define MT8135_PIN_37_AUD_CLK_MOSI__FUNC_TESTA_OUT19 (MTK_PIN_NO(37) | 7) - -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_GPIO38 (MTK_PIN_NO(38) | 0) -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_AUD_DAT_MOSI (MTK_PIN_NO(38) | 1) -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_ADC_WS (MTK_PIN_NO(38) | 2) -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_AUD_DAT_MISO (MTK_PIN_NO(38) | 3) -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_EINT21 (MTK_PIN_NO(38) | 4) -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_USB_TEST_IO_7 (MTK_PIN_NO(38) | 5) -#define MT8135_PIN_38_AUD_DAT_MOSI__FUNC_TESTA_OUT20 (MTK_PIN_NO(38) | 7) - -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_GPIO39 (MTK_PIN_NO(39) | 0) -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_AUD_DAT_MISO (MTK_PIN_NO(39) | 1) -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_ADC_DAT_IN (MTK_PIN_NO(39) | 2) -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_AUD_DAT_MOSI (MTK_PIN_NO(39) | 3) -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_EINT20 (MTK_PIN_NO(39) | 4) -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_USB_TEST_IO_8 (MTK_PIN_NO(39) | 5) -#define MT8135_PIN_39_AUD_DAT_MISO__FUNC_TESTA_OUT21 (MTK_PIN_NO(39) | 7) - -#define MT8135_PIN_40_DAC_CLK__FUNC_GPIO40 (MTK_PIN_NO(40) | 0) -#define MT8135_PIN_40_DAC_CLK__FUNC_DAC_CK (MTK_PIN_NO(40) | 1) -#define MT8135_PIN_40_DAC_CLK__FUNC_EINT22 (MTK_PIN_NO(40) | 2) -#define MT8135_PIN_40_DAC_CLK__FUNC_HDMI_SDATA1 (MTK_PIN_NO(40) | 3) -#define MT8135_PIN_40_DAC_CLK__FUNC_USB_TEST_IO_9 (MTK_PIN_NO(40) | 5) -#define MT8135_PIN_40_DAC_CLK__FUNC_TESTA_OUT22 (MTK_PIN_NO(40) | 7) - -#define MT8135_PIN_41_DAC_WS__FUNC_GPIO41 (MTK_PIN_NO(41) | 0) -#define MT8135_PIN_41_DAC_WS__FUNC_DAC_WS (MTK_PIN_NO(41) | 1) -#define MT8135_PIN_41_DAC_WS__FUNC_EINT24 (MTK_PIN_NO(41) | 2) -#define MT8135_PIN_41_DAC_WS__FUNC_HDMI_SDATA2 (MTK_PIN_NO(41) | 3) -#define MT8135_PIN_41_DAC_WS__FUNC_USB_TEST_IO_10 (MTK_PIN_NO(41) | 5) -#define MT8135_PIN_41_DAC_WS__FUNC_TESTA_OUT23 (MTK_PIN_NO(41) | 7) - -#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_GPIO42 (MTK_PIN_NO(42) | 0) -#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_DAC_DAT_OUT (MTK_PIN_NO(42) | 1) -#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_EINT23 (MTK_PIN_NO(42) | 2) -#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_HDMI_SDATA3 (MTK_PIN_NO(42) | 3) -#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_USB_TEST_IO_11 (MTK_PIN_NO(42) | 5) -#define MT8135_PIN_42_DAC_DAT_OUT__FUNC_TESTA_OUT24 (MTK_PIN_NO(42) | 7) - -#define MT8135_PIN_43_PWRAP_SPI0_MO__FUNC_GPIO43 (MTK_PIN_NO(43) | 0) -#define MT8135_PIN_43_PWRAP_SPI0_MO__FUNC_PWRAP_SPIDI (MTK_PIN_NO(43) | 1) -#define MT8135_PIN_43_PWRAP_SPI0_MO__FUNC_EINT29 (MTK_PIN_NO(43) | 2) - -#define MT8135_PIN_44_PWRAP_SPI0_MI__FUNC_GPIO44 (MTK_PIN_NO(44) | 0) -#define MT8135_PIN_44_PWRAP_SPI0_MI__FUNC_PWRAP_SPIDO (MTK_PIN_NO(44) | 1) -#define MT8135_PIN_44_PWRAP_SPI0_MI__FUNC_EINT28 (MTK_PIN_NO(44) | 2) - -#define MT8135_PIN_45_PWRAP_SPI0_CSN__FUNC_GPIO45 (MTK_PIN_NO(45) | 0) -#define MT8135_PIN_45_PWRAP_SPI0_CSN__FUNC_PWRAP_SPICS_B_I (MTK_PIN_NO(45) | 1) -#define MT8135_PIN_45_PWRAP_SPI0_CSN__FUNC_EINT27 (MTK_PIN_NO(45) | 2) - -#define MT8135_PIN_46_PWRAP_SPI0_CLK__FUNC_GPIO46 (MTK_PIN_NO(46) | 0) -#define MT8135_PIN_46_PWRAP_SPI0_CLK__FUNC_PWRAP_SPICK_I (MTK_PIN_NO(46) | 1) -#define MT8135_PIN_46_PWRAP_SPI0_CLK__FUNC_EINT26 (MTK_PIN_NO(46) | 2) - -#define MT8135_PIN_47_PWRAP_EVENT__FUNC_GPIO47 (MTK_PIN_NO(47) | 0) -#define MT8135_PIN_47_PWRAP_EVENT__FUNC_PWRAP_EVENT_IN (MTK_PIN_NO(47) | 1) -#define MT8135_PIN_47_PWRAP_EVENT__FUNC_EINT25 (MTK_PIN_NO(47) | 2) -#define MT8135_PIN_47_PWRAP_EVENT__FUNC_TESTA_OUT2 (MTK_PIN_NO(47) | 7) - -#define MT8135_PIN_48_RTC32K_CK__FUNC_GPIO48 (MTK_PIN_NO(48) | 0) -#define MT8135_PIN_48_RTC32K_CK__FUNC_RTC32K_CK (MTK_PIN_NO(48) | 1) - -#define MT8135_PIN_49_WATCHDOG__FUNC_GPIO49 (MTK_PIN_NO(49) | 0) -#define MT8135_PIN_49_WATCHDOG__FUNC_WATCHDOG (MTK_PIN_NO(49) | 1) -#define MT8135_PIN_49_WATCHDOG__FUNC_EINT36 (MTK_PIN_NO(49) | 2) - -#define MT8135_PIN_50_SRCLKENA__FUNC_GPIO50 (MTK_PIN_NO(50) | 0) -#define MT8135_PIN_50_SRCLKENA__FUNC_SRCLKENA (MTK_PIN_NO(50) | 1) -#define MT8135_PIN_50_SRCLKENA__FUNC_EINT38 (MTK_PIN_NO(50) | 2) - -#define MT8135_PIN_51_SRCVOLTEN__FUNC_GPIO51 (MTK_PIN_NO(51) | 0) -#define MT8135_PIN_51_SRCVOLTEN__FUNC_SRCVOLTEN (MTK_PIN_NO(51) | 1) -#define MT8135_PIN_51_SRCVOLTEN__FUNC_EINT37 (MTK_PIN_NO(51) | 2) - -#define MT8135_PIN_52_EINT0__FUNC_GPIO52 (MTK_PIN_NO(52) | 0) -#define MT8135_PIN_52_EINT0__FUNC_EINT0 (MTK_PIN_NO(52) | 1) -#define MT8135_PIN_52_EINT0__FUNC_PWM1 (MTK_PIN_NO(52) | 2) -#define MT8135_PIN_52_EINT0__FUNC_CLKM0 (MTK_PIN_NO(52) | 3) -#define MT8135_PIN_52_EINT0__FUNC_SPDIF_OUT (MTK_PIN_NO(52) | 4) -#define MT8135_PIN_52_EINT0__FUNC_USB_TEST_IO_12 (MTK_PIN_NO(52) | 5) -#define MT8135_PIN_52_EINT0__FUNC_USB_SCL (MTK_PIN_NO(52) | 7) - -#define MT8135_PIN_53_URXD2__FUNC_GPIO53 (MTK_PIN_NO(53) | 0) -#define MT8135_PIN_53_URXD2__FUNC_URXD2 (MTK_PIN_NO(53) | 1) -#define MT8135_PIN_53_URXD2__FUNC_EINT83 (MTK_PIN_NO(53) | 2) -#define MT8135_PIN_53_URXD2__FUNC_HDMI_LRCK (MTK_PIN_NO(53) | 4) -#define MT8135_PIN_53_URXD2__FUNC_CLKM3 (MTK_PIN_NO(53) | 5) -#define MT8135_PIN_53_URXD2__FUNC_UTXD2 (MTK_PIN_NO(53) | 7) - -#define MT8135_PIN_54_UTXD2__FUNC_GPIO54 (MTK_PIN_NO(54) | 0) -#define MT8135_PIN_54_UTXD2__FUNC_UTXD2 (MTK_PIN_NO(54) | 1) -#define MT8135_PIN_54_UTXD2__FUNC_EINT82 (MTK_PIN_NO(54) | 2) -#define MT8135_PIN_54_UTXD2__FUNC_HDMI_BCK_OUT (MTK_PIN_NO(54) | 4) -#define MT8135_PIN_54_UTXD2__FUNC_CLKM2 (MTK_PIN_NO(54) | 5) -#define MT8135_PIN_54_UTXD2__FUNC_URXD2 (MTK_PIN_NO(54) | 7) - -#define MT8135_PIN_55_UCTS2__FUNC_GPIO55 (MTK_PIN_NO(55) | 0) -#define MT8135_PIN_55_UCTS2__FUNC_UCTS2 (MTK_PIN_NO(55) | 1) -#define MT8135_PIN_55_UCTS2__FUNC_EINT84 (MTK_PIN_NO(55) | 2) -#define MT8135_PIN_55_UCTS2__FUNC_PWM1 (MTK_PIN_NO(55) | 5) -#define MT8135_PIN_55_UCTS2__FUNC_URTS2 (MTK_PIN_NO(55) | 7) - -#define MT8135_PIN_56_URTS2__FUNC_GPIO56 (MTK_PIN_NO(56) | 0) -#define MT8135_PIN_56_URTS2__FUNC_URTS2 (MTK_PIN_NO(56) | 1) -#define MT8135_PIN_56_URTS2__FUNC_EINT85 (MTK_PIN_NO(56) | 2) -#define MT8135_PIN_56_URTS2__FUNC_PWM2 (MTK_PIN_NO(56) | 5) -#define MT8135_PIN_56_URTS2__FUNC_UCTS2 (MTK_PIN_NO(56) | 7) - -#define MT8135_PIN_57_JTCK__FUNC_GPIO57 (MTK_PIN_NO(57) | 0) -#define MT8135_PIN_57_JTCK__FUNC_JTCK (MTK_PIN_NO(57) | 1) -#define MT8135_PIN_57_JTCK__FUNC_EINT188 (MTK_PIN_NO(57) | 2) -#define MT8135_PIN_57_JTCK__FUNC_DSP1_ICK (MTK_PIN_NO(57) | 3) - -#define MT8135_PIN_58_JTDO__FUNC_GPIO58 (MTK_PIN_NO(58) | 0) -#define MT8135_PIN_58_JTDO__FUNC_JTDO (MTK_PIN_NO(58) | 1) -#define MT8135_PIN_58_JTDO__FUNC_EINT190 (MTK_PIN_NO(58) | 2) -#define MT8135_PIN_58_JTDO__FUNC_DSP2_IMS (MTK_PIN_NO(58) | 3) - -#define MT8135_PIN_59_JTRST_B__FUNC_GPIO59 (MTK_PIN_NO(59) | 0) -#define MT8135_PIN_59_JTRST_B__FUNC_JTRST_B (MTK_PIN_NO(59) | 1) -#define MT8135_PIN_59_JTRST_B__FUNC_EINT0 (MTK_PIN_NO(59) | 2) -#define MT8135_PIN_59_JTRST_B__FUNC_DSP2_ICK (MTK_PIN_NO(59) | 3) - -#define MT8135_PIN_60_JTDI__FUNC_GPIO60 (MTK_PIN_NO(60) | 0) -#define MT8135_PIN_60_JTDI__FUNC_JTDI (MTK_PIN_NO(60) | 1) -#define MT8135_PIN_60_JTDI__FUNC_EINT189 (MTK_PIN_NO(60) | 2) -#define MT8135_PIN_60_JTDI__FUNC_DSP1_IMS (MTK_PIN_NO(60) | 3) - -#define MT8135_PIN_61_JRTCK__FUNC_GPIO61 (MTK_PIN_NO(61) | 0) -#define MT8135_PIN_61_JRTCK__FUNC_JRTCK (MTK_PIN_NO(61) | 1) -#define MT8135_PIN_61_JRTCK__FUNC_EINT187 (MTK_PIN_NO(61) | 2) -#define MT8135_PIN_61_JRTCK__FUNC_DSP1_ID (MTK_PIN_NO(61) | 3) - -#define MT8135_PIN_62_JTMS__FUNC_GPIO62 (MTK_PIN_NO(62) | 0) -#define MT8135_PIN_62_JTMS__FUNC_JTMS (MTK_PIN_NO(62) | 1) -#define MT8135_PIN_62_JTMS__FUNC_EINT191 (MTK_PIN_NO(62) | 2) -#define MT8135_PIN_62_JTMS__FUNC_DSP2_ID (MTK_PIN_NO(62) | 3) - -#define MT8135_PIN_63_MSDC1_INSI__FUNC_GPIO63 (MTK_PIN_NO(63) | 0) -#define MT8135_PIN_63_MSDC1_INSI__FUNC_MSDC1_INSI (MTK_PIN_NO(63) | 1) -#define MT8135_PIN_63_MSDC1_INSI__FUNC_SCL5 (MTK_PIN_NO(63) | 3) -#define MT8135_PIN_63_MSDC1_INSI__FUNC_PWM6 (MTK_PIN_NO(63) | 4) -#define MT8135_PIN_63_MSDC1_INSI__FUNC_CLKM5 (MTK_PIN_NO(63) | 5) -#define MT8135_PIN_63_MSDC1_INSI__FUNC_TESTB_OUT6 (MTK_PIN_NO(63) | 7) - -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_GPIO64 (MTK_PIN_NO(64) | 0) -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_MSDC1_SDWPI (MTK_PIN_NO(64) | 1) -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_EINT58 (MTK_PIN_NO(64) | 2) -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_SDA5 (MTK_PIN_NO(64) | 3) -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_PWM7 (MTK_PIN_NO(64) | 4) -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_CLKM6 (MTK_PIN_NO(64) | 5) -#define MT8135_PIN_64_MSDC1_SDWPI__FUNC_TESTB_OUT7 (MTK_PIN_NO(64) | 7) - -#define MT8135_PIN_65_MSDC2_INSI__FUNC_GPIO65 (MTK_PIN_NO(65) | 0) -#define MT8135_PIN_65_MSDC2_INSI__FUNC_MSDC2_INSI (MTK_PIN_NO(65) | 1) -#define MT8135_PIN_65_MSDC2_INSI__FUNC_USB_TEST_IO_27 (MTK_PIN_NO(65) | 5) -#define MT8135_PIN_65_MSDC2_INSI__FUNC_TESTA_OUT3 (MTK_PIN_NO(65) | 7) - -#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_GPIO66 (MTK_PIN_NO(66) | 0) -#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_MSDC2_SDWPI (MTK_PIN_NO(66) | 1) -#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_EINT66 (MTK_PIN_NO(66) | 2) -#define MT8135_PIN_66_MSDC2_SDWPI__FUNC_USB_TEST_IO_28 (MTK_PIN_NO(66) | 5) - -#define MT8135_PIN_67_URXD4__FUNC_GPIO67 (MTK_PIN_NO(67) | 0) -#define MT8135_PIN_67_URXD4__FUNC_URXD4 (MTK_PIN_NO(67) | 1) -#define MT8135_PIN_67_URXD4__FUNC_EINT89 (MTK_PIN_NO(67) | 2) -#define MT8135_PIN_67_URXD4__FUNC_URXD1 (MTK_PIN_NO(67) | 3) -#define MT8135_PIN_67_URXD4__FUNC_UTXD4 (MTK_PIN_NO(67) | 6) -#define MT8135_PIN_67_URXD4__FUNC_TESTB_OUT10 (MTK_PIN_NO(67) | 7) - -#define MT8135_PIN_68_UTXD4__FUNC_GPIO68 (MTK_PIN_NO(68) | 0) -#define MT8135_PIN_68_UTXD4__FUNC_UTXD4 (MTK_PIN_NO(68) | 1) -#define MT8135_PIN_68_UTXD4__FUNC_EINT88 (MTK_PIN_NO(68) | 2) -#define MT8135_PIN_68_UTXD4__FUNC_UTXD1 (MTK_PIN_NO(68) | 3) -#define MT8135_PIN_68_UTXD4__FUNC_URXD4 (MTK_PIN_NO(68) | 6) -#define MT8135_PIN_68_UTXD4__FUNC_TESTB_OUT11 (MTK_PIN_NO(68) | 7) - -#define MT8135_PIN_69_URXD1__FUNC_GPIO69 (MTK_PIN_NO(69) | 0) -#define MT8135_PIN_69_URXD1__FUNC_URXD1 (MTK_PIN_NO(69) | 1) -#define MT8135_PIN_69_URXD1__FUNC_EINT79 (MTK_PIN_NO(69) | 2) -#define MT8135_PIN_69_URXD1__FUNC_URXD4 (MTK_PIN_NO(69) | 3) -#define MT8135_PIN_69_URXD1__FUNC_UTXD1 (MTK_PIN_NO(69) | 6) -#define MT8135_PIN_69_URXD1__FUNC_TESTB_OUT24 (MTK_PIN_NO(69) | 7) - -#define MT8135_PIN_70_UTXD1__FUNC_GPIO70 (MTK_PIN_NO(70) | 0) -#define MT8135_PIN_70_UTXD1__FUNC_UTXD1 (MTK_PIN_NO(70) | 1) -#define MT8135_PIN_70_UTXD1__FUNC_EINT78 (MTK_PIN_NO(70) | 2) -#define MT8135_PIN_70_UTXD1__FUNC_UTXD4 (MTK_PIN_NO(70) | 3) -#define MT8135_PIN_70_UTXD1__FUNC_URXD1 (MTK_PIN_NO(70) | 6) -#define MT8135_PIN_70_UTXD1__FUNC_TESTB_OUT25 (MTK_PIN_NO(70) | 7) - -#define MT8135_PIN_71_UCTS1__FUNC_GPIO71 (MTK_PIN_NO(71) | 0) -#define MT8135_PIN_71_UCTS1__FUNC_UCTS1 (MTK_PIN_NO(71) | 1) -#define MT8135_PIN_71_UCTS1__FUNC_EINT80 (MTK_PIN_NO(71) | 2) -#define MT8135_PIN_71_UCTS1__FUNC_CLKM0 (MTK_PIN_NO(71) | 5) -#define MT8135_PIN_71_UCTS1__FUNC_URTS1 (MTK_PIN_NO(71) | 6) -#define MT8135_PIN_71_UCTS1__FUNC_TESTB_OUT31 (MTK_PIN_NO(71) | 7) - -#define MT8135_PIN_72_URTS1__FUNC_GPIO72 (MTK_PIN_NO(72) | 0) -#define MT8135_PIN_72_URTS1__FUNC_URTS1 (MTK_PIN_NO(72) | 1) -#define MT8135_PIN_72_URTS1__FUNC_EINT81 (MTK_PIN_NO(72) | 2) -#define MT8135_PIN_72_URTS1__FUNC_CLKM1 (MTK_PIN_NO(72) | 5) -#define MT8135_PIN_72_URTS1__FUNC_UCTS1 (MTK_PIN_NO(72) | 6) -#define MT8135_PIN_72_URTS1__FUNC_TESTB_OUT21 (MTK_PIN_NO(72) | 7) - -#define MT8135_PIN_73_PWM1__FUNC_GPIO73 (MTK_PIN_NO(73) | 0) -#define MT8135_PIN_73_PWM1__FUNC_PWM1 (MTK_PIN_NO(73) | 1) -#define MT8135_PIN_73_PWM1__FUNC_EINT73 (MTK_PIN_NO(73) | 2) -#define MT8135_PIN_73_PWM1__FUNC_USB_DRVVBUS (MTK_PIN_NO(73) | 5) -#define MT8135_PIN_73_PWM1__FUNC_DISP_PWM (MTK_PIN_NO(73) | 6) -#define MT8135_PIN_73_PWM1__FUNC_TESTB_OUT8 (MTK_PIN_NO(73) | 7) - -#define MT8135_PIN_74_PWM2__FUNC_GPIO74 (MTK_PIN_NO(74) | 0) -#define MT8135_PIN_74_PWM2__FUNC_PWM2 (MTK_PIN_NO(74) | 1) -#define MT8135_PIN_74_PWM2__FUNC_EINT74 (MTK_PIN_NO(74) | 2) -#define MT8135_PIN_74_PWM2__FUNC_DPI33_CK (MTK_PIN_NO(74) | 3) -#define MT8135_PIN_74_PWM2__FUNC_PWM5 (MTK_PIN_NO(74) | 4) -#define MT8135_PIN_74_PWM2__FUNC_URXD2 (MTK_PIN_NO(74) | 5) -#define MT8135_PIN_74_PWM2__FUNC_DISP_PWM (MTK_PIN_NO(74) | 6) -#define MT8135_PIN_74_PWM2__FUNC_TESTB_OUT9 (MTK_PIN_NO(74) | 7) - -#define MT8135_PIN_75_PWM3__FUNC_GPIO75 (MTK_PIN_NO(75) | 0) -#define MT8135_PIN_75_PWM3__FUNC_PWM3 (MTK_PIN_NO(75) | 1) -#define MT8135_PIN_75_PWM3__FUNC_EINT75 (MTK_PIN_NO(75) | 2) -#define MT8135_PIN_75_PWM3__FUNC_DPI33_D0 (MTK_PIN_NO(75) | 3) -#define MT8135_PIN_75_PWM3__FUNC_PWM6 (MTK_PIN_NO(75) | 4) -#define MT8135_PIN_75_PWM3__FUNC_UTXD2 (MTK_PIN_NO(75) | 5) -#define MT8135_PIN_75_PWM3__FUNC_DISP_PWM (MTK_PIN_NO(75) | 6) -#define MT8135_PIN_75_PWM3__FUNC_TESTB_OUT12 (MTK_PIN_NO(75) | 7) - -#define MT8135_PIN_76_PWM4__FUNC_GPIO76 (MTK_PIN_NO(76) | 0) -#define MT8135_PIN_76_PWM4__FUNC_PWM4 (MTK_PIN_NO(76) | 1) -#define MT8135_PIN_76_PWM4__FUNC_EINT76 (MTK_PIN_NO(76) | 2) -#define MT8135_PIN_76_PWM4__FUNC_DPI33_D1 (MTK_PIN_NO(76) | 3) -#define MT8135_PIN_76_PWM4__FUNC_PWM7 (MTK_PIN_NO(76) | 4) -#define MT8135_PIN_76_PWM4__FUNC_DISP_PWM (MTK_PIN_NO(76) | 6) -#define MT8135_PIN_76_PWM4__FUNC_TESTB_OUT13 (MTK_PIN_NO(76) | 7) - -#define MT8135_PIN_77_MSDC2_DAT2__FUNC_GPIO77 (MTK_PIN_NO(77) | 0) -#define MT8135_PIN_77_MSDC2_DAT2__FUNC_MSDC2_DAT2 (MTK_PIN_NO(77) | 1) -#define MT8135_PIN_77_MSDC2_DAT2__FUNC_EINT63 (MTK_PIN_NO(77) | 2) -#define MT8135_PIN_77_MSDC2_DAT2__FUNC_DSP2_IMS (MTK_PIN_NO(77) | 4) -#define MT8135_PIN_77_MSDC2_DAT2__FUNC_DPI33_D6 (MTK_PIN_NO(77) | 6) -#define MT8135_PIN_77_MSDC2_DAT2__FUNC_TESTA_OUT25 (MTK_PIN_NO(77) | 7) - -#define MT8135_PIN_78_MSDC2_DAT3__FUNC_GPIO78 (MTK_PIN_NO(78) | 0) -#define MT8135_PIN_78_MSDC2_DAT3__FUNC_MSDC2_DAT3 (MTK_PIN_NO(78) | 1) -#define MT8135_PIN_78_MSDC2_DAT3__FUNC_EINT64 (MTK_PIN_NO(78) | 2) -#define MT8135_PIN_78_MSDC2_DAT3__FUNC_DSP2_ID (MTK_PIN_NO(78) | 4) -#define MT8135_PIN_78_MSDC2_DAT3__FUNC_DPI33_D7 (MTK_PIN_NO(78) | 6) -#define MT8135_PIN_78_MSDC2_DAT3__FUNC_TESTA_OUT26 (MTK_PIN_NO(78) | 7) - -#define MT8135_PIN_79_MSDC2_CMD__FUNC_GPIO79 (MTK_PIN_NO(79) | 0) -#define MT8135_PIN_79_MSDC2_CMD__FUNC_MSDC2_CMD (MTK_PIN_NO(79) | 1) -#define MT8135_PIN_79_MSDC2_CMD__FUNC_EINT60 (MTK_PIN_NO(79) | 2) -#define MT8135_PIN_79_MSDC2_CMD__FUNC_DSP1_IMS (MTK_PIN_NO(79) | 4) -#define MT8135_PIN_79_MSDC2_CMD__FUNC_PCM1_WS (MTK_PIN_NO(79) | 5) -#define MT8135_PIN_79_MSDC2_CMD__FUNC_DPI33_D3 (MTK_PIN_NO(79) | 6) -#define MT8135_PIN_79_MSDC2_CMD__FUNC_TESTA_OUT0 (MTK_PIN_NO(79) | 7) - -#define MT8135_PIN_80_MSDC2_CLK__FUNC_GPIO80 (MTK_PIN_NO(80) | 0) -#define MT8135_PIN_80_MSDC2_CLK__FUNC_MSDC2_CLK (MTK_PIN_NO(80) | 1) -#define MT8135_PIN_80_MSDC2_CLK__FUNC_EINT59 (MTK_PIN_NO(80) | 2) -#define MT8135_PIN_80_MSDC2_CLK__FUNC_DSP1_ICK (MTK_PIN_NO(80) | 4) -#define MT8135_PIN_80_MSDC2_CLK__FUNC_PCM1_CK (MTK_PIN_NO(80) | 5) -#define MT8135_PIN_80_MSDC2_CLK__FUNC_DPI33_D2 (MTK_PIN_NO(80) | 6) -#define MT8135_PIN_80_MSDC2_CLK__FUNC_TESTA_OUT1 (MTK_PIN_NO(80) | 7) - -#define MT8135_PIN_81_MSDC2_DAT1__FUNC_GPIO81 (MTK_PIN_NO(81) | 0) -#define MT8135_PIN_81_MSDC2_DAT1__FUNC_MSDC2_DAT1 (MTK_PIN_NO(81) | 1) -#define MT8135_PIN_81_MSDC2_DAT1__FUNC_EINT62 (MTK_PIN_NO(81) | 2) -#define MT8135_PIN_81_MSDC2_DAT1__FUNC_DSP2_ICK (MTK_PIN_NO(81) | 4) -#define MT8135_PIN_81_MSDC2_DAT1__FUNC_PCM1_DO (MTK_PIN_NO(81) | 5) -#define MT8135_PIN_81_MSDC2_DAT1__FUNC_DPI33_D5 (MTK_PIN_NO(81) | 6) - -#define MT8135_PIN_82_MSDC2_DAT0__FUNC_GPIO82 (MTK_PIN_NO(82) | 0) -#define MT8135_PIN_82_MSDC2_DAT0__FUNC_MSDC2_DAT0 (MTK_PIN_NO(82) | 1) -#define MT8135_PIN_82_MSDC2_DAT0__FUNC_EINT61 (MTK_PIN_NO(82) | 2) -#define MT8135_PIN_82_MSDC2_DAT0__FUNC_DSP1_ID (MTK_PIN_NO(82) | 4) -#define MT8135_PIN_82_MSDC2_DAT0__FUNC_PCM1_DI (MTK_PIN_NO(82) | 5) -#define MT8135_PIN_82_MSDC2_DAT0__FUNC_DPI33_D4 (MTK_PIN_NO(82) | 6) - -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_GPIO83 (MTK_PIN_NO(83) | 0) -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_MSDC1_DAT0 (MTK_PIN_NO(83) | 1) -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_EINT53 (MTK_PIN_NO(83) | 2) -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_SCL1 (MTK_PIN_NO(83) | 3) -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_PWM2 (MTK_PIN_NO(83) | 4) -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_CLKM1 (MTK_PIN_NO(83) | 5) -#define MT8135_PIN_83_MSDC1_DAT0__FUNC_TESTB_OUT2 (MTK_PIN_NO(83) | 7) - -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_GPIO84 (MTK_PIN_NO(84) | 0) -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_MSDC1_DAT1 (MTK_PIN_NO(84) | 1) -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_EINT54 (MTK_PIN_NO(84) | 2) -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_SDA1 (MTK_PIN_NO(84) | 3) -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_PWM3 (MTK_PIN_NO(84) | 4) -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_CLKM2 (MTK_PIN_NO(84) | 5) -#define MT8135_PIN_84_MSDC1_DAT1__FUNC_TESTB_OUT3 (MTK_PIN_NO(84) | 7) - -#define MT8135_PIN_85_MSDC1_CMD__FUNC_GPIO85 (MTK_PIN_NO(85) | 0) -#define MT8135_PIN_85_MSDC1_CMD__FUNC_MSDC1_CMD (MTK_PIN_NO(85) | 1) -#define MT8135_PIN_85_MSDC1_CMD__FUNC_EINT52 (MTK_PIN_NO(85) | 2) -#define MT8135_PIN_85_MSDC1_CMD__FUNC_SDA0 (MTK_PIN_NO(85) | 3) -#define MT8135_PIN_85_MSDC1_CMD__FUNC_PWM1 (MTK_PIN_NO(85) | 4) -#define MT8135_PIN_85_MSDC1_CMD__FUNC_CLKM0 (MTK_PIN_NO(85) | 5) -#define MT8135_PIN_85_MSDC1_CMD__FUNC_TESTB_OUT1 (MTK_PIN_NO(85) | 7) - -#define MT8135_PIN_86_MSDC1_CLK__FUNC_GPIO86 (MTK_PIN_NO(86) | 0) -#define MT8135_PIN_86_MSDC1_CLK__FUNC_MSDC1_CLK (MTK_PIN_NO(86) | 1) -#define MT8135_PIN_86_MSDC1_CLK__FUNC_EINT51 (MTK_PIN_NO(86) | 2) -#define MT8135_PIN_86_MSDC1_CLK__FUNC_SCL0 (MTK_PIN_NO(86) | 3) -#define MT8135_PIN_86_MSDC1_CLK__FUNC_DISP_PWM (MTK_PIN_NO(86) | 4) -#define MT8135_PIN_86_MSDC1_CLK__FUNC_TESTB_OUT0 (MTK_PIN_NO(86) | 7) - -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_GPIO87 (MTK_PIN_NO(87) | 0) -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_MSDC1_DAT2 (MTK_PIN_NO(87) | 1) -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_EINT55 (MTK_PIN_NO(87) | 2) -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_SCL4 (MTK_PIN_NO(87) | 3) -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_PWM4 (MTK_PIN_NO(87) | 4) -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_CLKM3 (MTK_PIN_NO(87) | 5) -#define MT8135_PIN_87_MSDC1_DAT2__FUNC_TESTB_OUT4 (MTK_PIN_NO(87) | 7) - -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_GPIO88 (MTK_PIN_NO(88) | 0) -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_MSDC1_DAT3 (MTK_PIN_NO(88) | 1) -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_EINT56 (MTK_PIN_NO(88) | 2) -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_SDA4 (MTK_PIN_NO(88) | 3) -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_PWM5 (MTK_PIN_NO(88) | 4) -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_CLKM4 (MTK_PIN_NO(88) | 5) -#define MT8135_PIN_88_MSDC1_DAT3__FUNC_TESTB_OUT5 (MTK_PIN_NO(88) | 7) - -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_GPIO89 (MTK_PIN_NO(89) | 0) -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_MSDC4_DAT0 (MTK_PIN_NO(89) | 1) -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_EINT133 (MTK_PIN_NO(89) | 2) -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(89) | 4) -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_USB_DRVVBUS (MTK_PIN_NO(89) | 5) -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_A_FUNC_DIN_9 (MTK_PIN_NO(89) | 6) -#define MT8135_PIN_89_MSDC4_DAT0__FUNC_LPTE (MTK_PIN_NO(89) | 7) - -#define MT8135_PIN_90_MSDC4_DAT1__FUNC_GPIO90 (MTK_PIN_NO(90) | 0) -#define MT8135_PIN_90_MSDC4_DAT1__FUNC_MSDC4_DAT1 (MTK_PIN_NO(90) | 1) -#define MT8135_PIN_90_MSDC4_DAT1__FUNC_EINT134 (MTK_PIN_NO(90) | 2) -#define MT8135_PIN_90_MSDC4_DAT1__FUNC_A_FUNC_DIN_10 (MTK_PIN_NO(90) | 6) -#define MT8135_PIN_90_MSDC4_DAT1__FUNC_LRSTB_1X (MTK_PIN_NO(90) | 7) - -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_GPIO91 (MTK_PIN_NO(91) | 0) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_MSDC4_DAT5 (MTK_PIN_NO(91) | 1) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_EINT136 (MTK_PIN_NO(91) | 2) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_I2SIN_WS (MTK_PIN_NO(91) | 3) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_DAC_WS (MTK_PIN_NO(91) | 4) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_PCM1_WS (MTK_PIN_NO(91) | 5) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_A_FUNC_DIN_11 (MTK_PIN_NO(91) | 6) -#define MT8135_PIN_91_MSDC4_DAT5__FUNC_SPI1_CSN (MTK_PIN_NO(91) | 7) - -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_GPIO92 (MTK_PIN_NO(92) | 0) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_MSDC4_DAT6 (MTK_PIN_NO(92) | 1) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_EINT137 (MTK_PIN_NO(92) | 2) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_I2SOUT_DAT (MTK_PIN_NO(92) | 3) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_DAC_DAT_OUT (MTK_PIN_NO(92) | 4) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_PCM1_DO (MTK_PIN_NO(92) | 5) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_A_FUNC_DIN_12 (MTK_PIN_NO(92) | 6) -#define MT8135_PIN_92_MSDC4_DAT6__FUNC_SPI1_MO (MTK_PIN_NO(92) | 7) - -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_GPIO93 (MTK_PIN_NO(93) | 0) -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_MSDC4_DAT7 (MTK_PIN_NO(93) | 1) -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_EINT138 (MTK_PIN_NO(93) | 2) -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_I2SIN_DAT (MTK_PIN_NO(93) | 3) -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_PCM1_DI (MTK_PIN_NO(93) | 5) -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_A_FUNC_DIN_13 (MTK_PIN_NO(93) | 6) -#define MT8135_PIN_93_MSDC4_DAT7__FUNC_SPI1_MI (MTK_PIN_NO(93) | 7) - -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_GPIO94 (MTK_PIN_NO(94) | 0) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_MSDC4_DAT4 (MTK_PIN_NO(94) | 1) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_EINT135 (MTK_PIN_NO(94) | 2) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_I2SIN_CK (MTK_PIN_NO(94) | 3) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_DAC_CK (MTK_PIN_NO(94) | 4) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_PCM1_CK (MTK_PIN_NO(94) | 5) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_A_FUNC_DIN_14 (MTK_PIN_NO(94) | 6) -#define MT8135_PIN_94_MSDC4_DAT4__FUNC_SPI1_CLK (MTK_PIN_NO(94) | 7) - -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_GPIO95 (MTK_PIN_NO(95) | 0) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_MSDC4_DAT2 (MTK_PIN_NO(95) | 1) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_EINT131 (MTK_PIN_NO(95) | 2) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_I2SIN_WS (MTK_PIN_NO(95) | 3) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_CM2PDN_2X (MTK_PIN_NO(95) | 4) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_DAC_WS (MTK_PIN_NO(95) | 5) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_PCM1_WS (MTK_PIN_NO(95) | 6) -#define MT8135_PIN_95_MSDC4_DAT2__FUNC_LSCE0B_1X (MTK_PIN_NO(95) | 7) - -#define MT8135_PIN_96_MSDC4_CLK__FUNC_GPIO96 (MTK_PIN_NO(96) | 0) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_MSDC4_CLK (MTK_PIN_NO(96) | 1) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_EINT129 (MTK_PIN_NO(96) | 2) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_DPI1_CK_2X (MTK_PIN_NO(96) | 3) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_CM2PCLK_2X (MTK_PIN_NO(96) | 4) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_PWM4 (MTK_PIN_NO(96) | 5) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_PCM1_DI (MTK_PIN_NO(96) | 6) -#define MT8135_PIN_96_MSDC4_CLK__FUNC_LSCK_1X (MTK_PIN_NO(96) | 7) - -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_GPIO97 (MTK_PIN_NO(97) | 0) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_MSDC4_DAT3 (MTK_PIN_NO(97) | 1) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_EINT132 (MTK_PIN_NO(97) | 2) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_I2SOUT_DAT (MTK_PIN_NO(97) | 3) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_CM2RST_2X (MTK_PIN_NO(97) | 4) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_DAC_DAT_OUT (MTK_PIN_NO(97) | 5) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_PCM1_DO (MTK_PIN_NO(97) | 6) -#define MT8135_PIN_97_MSDC4_DAT3__FUNC_LSCE1B_1X (MTK_PIN_NO(97) | 7) - -#define MT8135_PIN_98_MSDC4_CMD__FUNC_GPIO98 (MTK_PIN_NO(98) | 0) -#define MT8135_PIN_98_MSDC4_CMD__FUNC_MSDC4_CMD (MTK_PIN_NO(98) | 1) -#define MT8135_PIN_98_MSDC4_CMD__FUNC_EINT128 (MTK_PIN_NO(98) | 2) -#define MT8135_PIN_98_MSDC4_CMD__FUNC_DPI1_DE_2X (MTK_PIN_NO(98) | 3) -#define MT8135_PIN_98_MSDC4_CMD__FUNC_PWM3 (MTK_PIN_NO(98) | 5) -#define MT8135_PIN_98_MSDC4_CMD__FUNC_LSDA_1X (MTK_PIN_NO(98) | 7) - -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_GPIO99 (MTK_PIN_NO(99) | 0) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_MSDC4_RSTB (MTK_PIN_NO(99) | 1) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_EINT130 (MTK_PIN_NO(99) | 2) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_I2SIN_CK (MTK_PIN_NO(99) | 3) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_CM2MCLK_2X (MTK_PIN_NO(99) | 4) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_DAC_CK (MTK_PIN_NO(99) | 5) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_PCM1_CK (MTK_PIN_NO(99) | 6) -#define MT8135_PIN_99_MSDC4_RSTB__FUNC_LSA0_1X (MTK_PIN_NO(99) | 7) - -#define MT8135_PIN_100_SDA0__FUNC_GPIO100 (MTK_PIN_NO(100) | 0) -#define MT8135_PIN_100_SDA0__FUNC_SDA0 (MTK_PIN_NO(100) | 1) -#define MT8135_PIN_100_SDA0__FUNC_EINT91 (MTK_PIN_NO(100) | 2) -#define MT8135_PIN_100_SDA0__FUNC_CLKM1 (MTK_PIN_NO(100) | 3) -#define MT8135_PIN_100_SDA0__FUNC_PWM1 (MTK_PIN_NO(100) | 4) -#define MT8135_PIN_100_SDA0__FUNC_A_FUNC_DIN_15 (MTK_PIN_NO(100) | 7) - -#define MT8135_PIN_101_SCL0__FUNC_GPIO101 (MTK_PIN_NO(101) | 0) -#define MT8135_PIN_101_SCL0__FUNC_SCL0 (MTK_PIN_NO(101) | 1) -#define MT8135_PIN_101_SCL0__FUNC_EINT90 (MTK_PIN_NO(101) | 2) -#define MT8135_PIN_101_SCL0__FUNC_CLKM0 (MTK_PIN_NO(101) | 3) -#define MT8135_PIN_101_SCL0__FUNC_DISP_PWM (MTK_PIN_NO(101) | 4) -#define MT8135_PIN_101_SCL0__FUNC_A_FUNC_DIN_16 (MTK_PIN_NO(101) | 7) - -#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_GPIO102 (MTK_PIN_NO(102) | 0) -#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_EINT10 (MTK_PIN_NO(102) | 1) -#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_USB_TEST_IO_16 (MTK_PIN_NO(102) | 5) -#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_TESTB_OUT16 (MTK_PIN_NO(102) | 6) -#define MT8135_PIN_102_EINT10_AUXIN2__FUNC_A_FUNC_DIN_17 (MTK_PIN_NO(102) | 7) - -#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_GPIO103 (MTK_PIN_NO(103) | 0) -#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_EINT11 (MTK_PIN_NO(103) | 1) -#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_USB_TEST_IO_17 (MTK_PIN_NO(103) | 5) -#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_TESTB_OUT17 (MTK_PIN_NO(103) | 6) -#define MT8135_PIN_103_EINT11_AUXIN3__FUNC_A_FUNC_DIN_18 (MTK_PIN_NO(103) | 7) - -#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_GPIO104 (MTK_PIN_NO(104) | 0) -#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_EINT16 (MTK_PIN_NO(104) | 1) -#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_USB_TEST_IO_18 (MTK_PIN_NO(104) | 5) -#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_TESTB_OUT18 (MTK_PIN_NO(104) | 6) -#define MT8135_PIN_104_EINT16_AUXIN4__FUNC_A_FUNC_DIN_19 (MTK_PIN_NO(104) | 7) - -#define MT8135_PIN_105_I2S_CLK__FUNC_GPIO105 (MTK_PIN_NO(105) | 0) -#define MT8135_PIN_105_I2S_CLK__FUNC_I2SIN_CK (MTK_PIN_NO(105) | 1) -#define MT8135_PIN_105_I2S_CLK__FUNC_EINT10 (MTK_PIN_NO(105) | 2) -#define MT8135_PIN_105_I2S_CLK__FUNC_DAC_CK (MTK_PIN_NO(105) | 3) -#define MT8135_PIN_105_I2S_CLK__FUNC_PCM1_CK (MTK_PIN_NO(105) | 4) -#define MT8135_PIN_105_I2S_CLK__FUNC_USB_TEST_IO_19 (MTK_PIN_NO(105) | 5) -#define MT8135_PIN_105_I2S_CLK__FUNC_TESTB_OUT19 (MTK_PIN_NO(105) | 6) -#define MT8135_PIN_105_I2S_CLK__FUNC_A_FUNC_DIN_20 (MTK_PIN_NO(105) | 7) - -#define MT8135_PIN_106_I2S_WS__FUNC_GPIO106 (MTK_PIN_NO(106) | 0) -#define MT8135_PIN_106_I2S_WS__FUNC_I2SIN_WS (MTK_PIN_NO(106) | 1) -#define MT8135_PIN_106_I2S_WS__FUNC_EINT13 (MTK_PIN_NO(106) | 2) -#define MT8135_PIN_106_I2S_WS__FUNC_DAC_WS (MTK_PIN_NO(106) | 3) -#define MT8135_PIN_106_I2S_WS__FUNC_PCM1_WS (MTK_PIN_NO(106) | 4) -#define MT8135_PIN_106_I2S_WS__FUNC_USB_TEST_IO_20 (MTK_PIN_NO(106) | 5) -#define MT8135_PIN_106_I2S_WS__FUNC_TESTB_OUT20 (MTK_PIN_NO(106) | 6) -#define MT8135_PIN_106_I2S_WS__FUNC_A_FUNC_DIN_21 (MTK_PIN_NO(106) | 7) - -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_GPIO107 (MTK_PIN_NO(107) | 0) -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_I2SIN_DAT (MTK_PIN_NO(107) | 1) -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_EINT11 (MTK_PIN_NO(107) | 2) -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_PCM1_DI (MTK_PIN_NO(107) | 4) -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_USB_TEST_IO_21 (MTK_PIN_NO(107) | 5) -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_TESTB_OUT22 (MTK_PIN_NO(107) | 6) -#define MT8135_PIN_107_I2S_DATA_IN__FUNC_A_FUNC_DIN_22 (MTK_PIN_NO(107) | 7) - -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_GPIO108 (MTK_PIN_NO(108) | 0) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_I2SOUT_DAT (MTK_PIN_NO(108) | 1) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_EINT12 (MTK_PIN_NO(108) | 2) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_DAC_DAT_OUT (MTK_PIN_NO(108) | 3) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_PCM1_DO (MTK_PIN_NO(108) | 4) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_USB_TEST_IO_22 (MTK_PIN_NO(108) | 5) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_TESTB_OUT23 (MTK_PIN_NO(108) | 6) -#define MT8135_PIN_108_I2S_DATA_OUT__FUNC_A_FUNC_DIN_23 (MTK_PIN_NO(108) | 7) - -#define MT8135_PIN_109_EINT5__FUNC_GPIO109 (MTK_PIN_NO(109) | 0) -#define MT8135_PIN_109_EINT5__FUNC_EINT5 (MTK_PIN_NO(109) | 1) -#define MT8135_PIN_109_EINT5__FUNC_PWM5 (MTK_PIN_NO(109) | 2) -#define MT8135_PIN_109_EINT5__FUNC_CLKM3 (MTK_PIN_NO(109) | 3) -#define MT8135_PIN_109_EINT5__FUNC_GPU_JTRSTB (MTK_PIN_NO(109) | 4) -#define MT8135_PIN_109_EINT5__FUNC_USB_TEST_IO_23 (MTK_PIN_NO(109) | 5) -#define MT8135_PIN_109_EINT5__FUNC_TESTB_OUT26 (MTK_PIN_NO(109) | 6) -#define MT8135_PIN_109_EINT5__FUNC_A_FUNC_DIN_24 (MTK_PIN_NO(109) | 7) - -#define MT8135_PIN_110_EINT6__FUNC_GPIO110 (MTK_PIN_NO(110) | 0) -#define MT8135_PIN_110_EINT6__FUNC_EINT6 (MTK_PIN_NO(110) | 1) -#define MT8135_PIN_110_EINT6__FUNC_PWM6 (MTK_PIN_NO(110) | 2) -#define MT8135_PIN_110_EINT6__FUNC_CLKM4 (MTK_PIN_NO(110) | 3) -#define MT8135_PIN_110_EINT6__FUNC_GPU_JTMS (MTK_PIN_NO(110) | 4) -#define MT8135_PIN_110_EINT6__FUNC_USB_TEST_IO_24 (MTK_PIN_NO(110) | 5) -#define MT8135_PIN_110_EINT6__FUNC_TESTB_OUT27 (MTK_PIN_NO(110) | 6) -#define MT8135_PIN_110_EINT6__FUNC_A_FUNC_DIN_25 (MTK_PIN_NO(110) | 7) - -#define MT8135_PIN_111_EINT7__FUNC_GPIO111 (MTK_PIN_NO(111) | 0) -#define MT8135_PIN_111_EINT7__FUNC_EINT7 (MTK_PIN_NO(111) | 1) -#define MT8135_PIN_111_EINT7__FUNC_PWM7 (MTK_PIN_NO(111) | 2) -#define MT8135_PIN_111_EINT7__FUNC_CLKM5 (MTK_PIN_NO(111) | 3) -#define MT8135_PIN_111_EINT7__FUNC_GPU_JTDO (MTK_PIN_NO(111) | 4) -#define MT8135_PIN_111_EINT7__FUNC_USB_TEST_IO_25 (MTK_PIN_NO(111) | 5) -#define MT8135_PIN_111_EINT7__FUNC_TESTB_OUT28 (MTK_PIN_NO(111) | 6) -#define MT8135_PIN_111_EINT7__FUNC_A_FUNC_DIN_26 (MTK_PIN_NO(111) | 7) - -#define MT8135_PIN_112_EINT8__FUNC_GPIO112 (MTK_PIN_NO(112) | 0) -#define MT8135_PIN_112_EINT8__FUNC_EINT8 (MTK_PIN_NO(112) | 1) -#define MT8135_PIN_112_EINT8__FUNC_DISP_PWM (MTK_PIN_NO(112) | 2) -#define MT8135_PIN_112_EINT8__FUNC_CLKM6 (MTK_PIN_NO(112) | 3) -#define MT8135_PIN_112_EINT8__FUNC_GPU_JTDI (MTK_PIN_NO(112) | 4) -#define MT8135_PIN_112_EINT8__FUNC_USB_TEST_IO_26 (MTK_PIN_NO(112) | 5) -#define MT8135_PIN_112_EINT8__FUNC_TESTB_OUT29 (MTK_PIN_NO(112) | 6) -#define MT8135_PIN_112_EINT8__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(112) | 7) - -#define MT8135_PIN_113_EINT9__FUNC_GPIO113 (MTK_PIN_NO(113) | 0) -#define MT8135_PIN_113_EINT9__FUNC_EINT9 (MTK_PIN_NO(113) | 1) -#define MT8135_PIN_113_EINT9__FUNC_GPU_JTCK (MTK_PIN_NO(113) | 4) -#define MT8135_PIN_113_EINT9__FUNC_USB_DRVVBUS (MTK_PIN_NO(113) | 5) -#define MT8135_PIN_113_EINT9__FUNC_TESTB_OUT30 (MTK_PIN_NO(113) | 6) -#define MT8135_PIN_113_EINT9__FUNC_A_FUNC_DIN_27 (MTK_PIN_NO(113) | 7) - -#define MT8135_PIN_114_LPCE1B__FUNC_GPIO114 (MTK_PIN_NO(114) | 0) -#define MT8135_PIN_114_LPCE1B__FUNC_LPCE1B (MTK_PIN_NO(114) | 1) -#define MT8135_PIN_114_LPCE1B__FUNC_EINT127 (MTK_PIN_NO(114) | 2) -#define MT8135_PIN_114_LPCE1B__FUNC_PWM2 (MTK_PIN_NO(114) | 5) -#define MT8135_PIN_114_LPCE1B__FUNC_TESTB_OUT14 (MTK_PIN_NO(114) | 6) -#define MT8135_PIN_114_LPCE1B__FUNC_A_FUNC_DIN_28 (MTK_PIN_NO(114) | 7) - -#define MT8135_PIN_115_LPCE0B__FUNC_GPIO115 (MTK_PIN_NO(115) | 0) -#define MT8135_PIN_115_LPCE0B__FUNC_LPCE0B (MTK_PIN_NO(115) | 1) -#define MT8135_PIN_115_LPCE0B__FUNC_EINT126 (MTK_PIN_NO(115) | 2) -#define MT8135_PIN_115_LPCE0B__FUNC_PWM1 (MTK_PIN_NO(115) | 5) -#define MT8135_PIN_115_LPCE0B__FUNC_TESTB_OUT15 (MTK_PIN_NO(115) | 6) -#define MT8135_PIN_115_LPCE0B__FUNC_A_FUNC_DIN_29 (MTK_PIN_NO(115) | 7) - -#define MT8135_PIN_116_DISP_PWM__FUNC_GPIO116 (MTK_PIN_NO(116) | 0) -#define MT8135_PIN_116_DISP_PWM__FUNC_DISP_PWM (MTK_PIN_NO(116) | 1) -#define MT8135_PIN_116_DISP_PWM__FUNC_EINT77 (MTK_PIN_NO(116) | 2) -#define MT8135_PIN_116_DISP_PWM__FUNC_LSDI (MTK_PIN_NO(116) | 3) -#define MT8135_PIN_116_DISP_PWM__FUNC_PWM1 (MTK_PIN_NO(116) | 4) -#define MT8135_PIN_116_DISP_PWM__FUNC_PWM2 (MTK_PIN_NO(116) | 5) -#define MT8135_PIN_116_DISP_PWM__FUNC_PWM3 (MTK_PIN_NO(116) | 7) - -#define MT8135_PIN_117_EINT1__FUNC_GPIO117 (MTK_PIN_NO(117) | 0) -#define MT8135_PIN_117_EINT1__FUNC_EINT1 (MTK_PIN_NO(117) | 1) -#define MT8135_PIN_117_EINT1__FUNC_PWM2 (MTK_PIN_NO(117) | 2) -#define MT8135_PIN_117_EINT1__FUNC_CLKM1 (MTK_PIN_NO(117) | 3) -#define MT8135_PIN_117_EINT1__FUNC_USB_TEST_IO_13 (MTK_PIN_NO(117) | 5) -#define MT8135_PIN_117_EINT1__FUNC_USB_SDA (MTK_PIN_NO(117) | 7) - -#define MT8135_PIN_118_EINT2__FUNC_GPIO118 (MTK_PIN_NO(118) | 0) -#define MT8135_PIN_118_EINT2__FUNC_EINT2 (MTK_PIN_NO(118) | 1) -#define MT8135_PIN_118_EINT2__FUNC_PWM3 (MTK_PIN_NO(118) | 2) -#define MT8135_PIN_118_EINT2__FUNC_CLKM2 (MTK_PIN_NO(118) | 3) -#define MT8135_PIN_118_EINT2__FUNC_USB_TEST_IO_14 (MTK_PIN_NO(118) | 5) -#define MT8135_PIN_118_EINT2__FUNC_SRCLKENAI2 (MTK_PIN_NO(118) | 6) -#define MT8135_PIN_118_EINT2__FUNC_A_FUNC_DIN_30 (MTK_PIN_NO(118) | 7) - -#define MT8135_PIN_119_EINT3__FUNC_GPIO119 (MTK_PIN_NO(119) | 0) -#define MT8135_PIN_119_EINT3__FUNC_EINT3 (MTK_PIN_NO(119) | 1) -#define MT8135_PIN_119_EINT3__FUNC_USB_TEST_IO_15 (MTK_PIN_NO(119) | 5) -#define MT8135_PIN_119_EINT3__FUNC_SRCLKENAI1 (MTK_PIN_NO(119) | 6) -#define MT8135_PIN_119_EINT3__FUNC_EXT_26M_CK (MTK_PIN_NO(119) | 7) - -#define MT8135_PIN_120_EINT4__FUNC_GPIO120 (MTK_PIN_NO(120) | 0) -#define MT8135_PIN_120_EINT4__FUNC_EINT4 (MTK_PIN_NO(120) | 1) -#define MT8135_PIN_120_EINT4__FUNC_PWM4 (MTK_PIN_NO(120) | 2) -#define MT8135_PIN_120_EINT4__FUNC_USB_DRVVBUS (MTK_PIN_NO(120) | 5) -#define MT8135_PIN_120_EINT4__FUNC_A_FUNC_DIN_31 (MTK_PIN_NO(120) | 7) - -#define MT8135_PIN_121_DPIDE__FUNC_GPIO121 (MTK_PIN_NO(121) | 0) -#define MT8135_PIN_121_DPIDE__FUNC_DPI0_DE (MTK_PIN_NO(121) | 1) -#define MT8135_PIN_121_DPIDE__FUNC_EINT100 (MTK_PIN_NO(121) | 2) -#define MT8135_PIN_121_DPIDE__FUNC_I2SOUT_DAT (MTK_PIN_NO(121) | 3) -#define MT8135_PIN_121_DPIDE__FUNC_DAC_DAT_OUT (MTK_PIN_NO(121) | 4) -#define MT8135_PIN_121_DPIDE__FUNC_PCM1_DO (MTK_PIN_NO(121) | 5) -#define MT8135_PIN_121_DPIDE__FUNC_IRDA_TXD (MTK_PIN_NO(121) | 6) - -#define MT8135_PIN_122_DPICK__FUNC_GPIO122 (MTK_PIN_NO(122) | 0) -#define MT8135_PIN_122_DPICK__FUNC_DPI0_CK (MTK_PIN_NO(122) | 1) -#define MT8135_PIN_122_DPICK__FUNC_EINT101 (MTK_PIN_NO(122) | 2) -#define MT8135_PIN_122_DPICK__FUNC_I2SIN_DAT (MTK_PIN_NO(122) | 3) -#define MT8135_PIN_122_DPICK__FUNC_PCM1_DI (MTK_PIN_NO(122) | 5) -#define MT8135_PIN_122_DPICK__FUNC_IRDA_PDN (MTK_PIN_NO(122) | 6) - -#define MT8135_PIN_123_DPIG4__FUNC_GPIO123 (MTK_PIN_NO(123) | 0) -#define MT8135_PIN_123_DPIG4__FUNC_DPI0_G4 (MTK_PIN_NO(123) | 1) -#define MT8135_PIN_123_DPIG4__FUNC_EINT114 (MTK_PIN_NO(123) | 2) -#define MT8135_PIN_123_DPIG4__FUNC_CM2DAT_2X_0 (MTK_PIN_NO(123) | 4) -#define MT8135_PIN_123_DPIG4__FUNC_DSP2_ID (MTK_PIN_NO(123) | 5) - -#define MT8135_PIN_124_DPIG5__FUNC_GPIO124 (MTK_PIN_NO(124) | 0) -#define MT8135_PIN_124_DPIG5__FUNC_DPI0_G5 (MTK_PIN_NO(124) | 1) -#define MT8135_PIN_124_DPIG5__FUNC_EINT115 (MTK_PIN_NO(124) | 2) -#define MT8135_PIN_124_DPIG5__FUNC_CM2DAT_2X_1 (MTK_PIN_NO(124) | 4) -#define MT8135_PIN_124_DPIG5__FUNC_DSP2_ICK (MTK_PIN_NO(124) | 5) - -#define MT8135_PIN_125_DPIR3__FUNC_GPIO125 (MTK_PIN_NO(125) | 0) -#define MT8135_PIN_125_DPIR3__FUNC_DPI0_R3 (MTK_PIN_NO(125) | 1) -#define MT8135_PIN_125_DPIR3__FUNC_EINT121 (MTK_PIN_NO(125) | 2) -#define MT8135_PIN_125_DPIR3__FUNC_CM2DAT_2X_7 (MTK_PIN_NO(125) | 4) - -#define MT8135_PIN_126_DPIG1__FUNC_GPIO126 (MTK_PIN_NO(126) | 0) -#define MT8135_PIN_126_DPIG1__FUNC_DPI0_G1 (MTK_PIN_NO(126) | 1) -#define MT8135_PIN_126_DPIG1__FUNC_EINT111 (MTK_PIN_NO(126) | 2) -#define MT8135_PIN_126_DPIG1__FUNC_DSP1_ICK (MTK_PIN_NO(126) | 5) - -#define MT8135_PIN_127_DPIVSYNC__FUNC_GPIO127 (MTK_PIN_NO(127) | 0) -#define MT8135_PIN_127_DPIVSYNC__FUNC_DPI0_VSYNC (MTK_PIN_NO(127) | 1) -#define MT8135_PIN_127_DPIVSYNC__FUNC_EINT98 (MTK_PIN_NO(127) | 2) -#define MT8135_PIN_127_DPIVSYNC__FUNC_I2SIN_CK (MTK_PIN_NO(127) | 3) -#define MT8135_PIN_127_DPIVSYNC__FUNC_DAC_CK (MTK_PIN_NO(127) | 4) -#define MT8135_PIN_127_DPIVSYNC__FUNC_PCM1_CK (MTK_PIN_NO(127) | 5) - -#define MT8135_PIN_128_DPIHSYNC__FUNC_GPIO128 (MTK_PIN_NO(128) | 0) -#define MT8135_PIN_128_DPIHSYNC__FUNC_DPI0_HSYNC (MTK_PIN_NO(128) | 1) -#define MT8135_PIN_128_DPIHSYNC__FUNC_EINT99 (MTK_PIN_NO(128) | 2) -#define MT8135_PIN_128_DPIHSYNC__FUNC_I2SIN_WS (MTK_PIN_NO(128) | 3) -#define MT8135_PIN_128_DPIHSYNC__FUNC_DAC_WS (MTK_PIN_NO(128) | 4) -#define MT8135_PIN_128_DPIHSYNC__FUNC_PCM1_WS (MTK_PIN_NO(128) | 5) -#define MT8135_PIN_128_DPIHSYNC__FUNC_IRDA_RXD (MTK_PIN_NO(128) | 6) - -#define MT8135_PIN_129_DPIB0__FUNC_GPIO129 (MTK_PIN_NO(129) | 0) -#define MT8135_PIN_129_DPIB0__FUNC_DPI0_B0 (MTK_PIN_NO(129) | 1) -#define MT8135_PIN_129_DPIB0__FUNC_EINT102 (MTK_PIN_NO(129) | 2) -#define MT8135_PIN_129_DPIB0__FUNC_SCL0 (MTK_PIN_NO(129) | 4) -#define MT8135_PIN_129_DPIB0__FUNC_DISP_PWM (MTK_PIN_NO(129) | 5) - -#define MT8135_PIN_130_DPIB1__FUNC_GPIO130 (MTK_PIN_NO(130) | 0) -#define MT8135_PIN_130_DPIB1__FUNC_DPI0_B1 (MTK_PIN_NO(130) | 1) -#define MT8135_PIN_130_DPIB1__FUNC_EINT103 (MTK_PIN_NO(130) | 2) -#define MT8135_PIN_130_DPIB1__FUNC_CLKM0 (MTK_PIN_NO(130) | 3) -#define MT8135_PIN_130_DPIB1__FUNC_SDA0 (MTK_PIN_NO(130) | 4) -#define MT8135_PIN_130_DPIB1__FUNC_PWM1 (MTK_PIN_NO(130) | 5) - -#define MT8135_PIN_131_DPIB2__FUNC_GPIO131 (MTK_PIN_NO(131) | 0) -#define MT8135_PIN_131_DPIB2__FUNC_DPI0_B2 (MTK_PIN_NO(131) | 1) -#define MT8135_PIN_131_DPIB2__FUNC_EINT104 (MTK_PIN_NO(131) | 2) -#define MT8135_PIN_131_DPIB2__FUNC_CLKM1 (MTK_PIN_NO(131) | 3) -#define MT8135_PIN_131_DPIB2__FUNC_SCL1 (MTK_PIN_NO(131) | 4) -#define MT8135_PIN_131_DPIB2__FUNC_PWM2 (MTK_PIN_NO(131) | 5) - -#define MT8135_PIN_132_DPIB3__FUNC_GPIO132 (MTK_PIN_NO(132) | 0) -#define MT8135_PIN_132_DPIB3__FUNC_DPI0_B3 (MTK_PIN_NO(132) | 1) -#define MT8135_PIN_132_DPIB3__FUNC_EINT105 (MTK_PIN_NO(132) | 2) -#define MT8135_PIN_132_DPIB3__FUNC_CLKM2 (MTK_PIN_NO(132) | 3) -#define MT8135_PIN_132_DPIB3__FUNC_SDA1 (MTK_PIN_NO(132) | 4) -#define MT8135_PIN_132_DPIB3__FUNC_PWM3 (MTK_PIN_NO(132) | 5) - -#define MT8135_PIN_133_DPIB4__FUNC_GPIO133 (MTK_PIN_NO(133) | 0) -#define MT8135_PIN_133_DPIB4__FUNC_DPI0_B4 (MTK_PIN_NO(133) | 1) -#define MT8135_PIN_133_DPIB4__FUNC_EINT106 (MTK_PIN_NO(133) | 2) -#define MT8135_PIN_133_DPIB4__FUNC_CLKM3 (MTK_PIN_NO(133) | 3) -#define MT8135_PIN_133_DPIB4__FUNC_SCL2 (MTK_PIN_NO(133) | 4) -#define MT8135_PIN_133_DPIB4__FUNC_PWM4 (MTK_PIN_NO(133) | 5) - -#define MT8135_PIN_134_DPIB5__FUNC_GPIO134 (MTK_PIN_NO(134) | 0) -#define MT8135_PIN_134_DPIB5__FUNC_DPI0_B5 (MTK_PIN_NO(134) | 1) -#define MT8135_PIN_134_DPIB5__FUNC_EINT107 (MTK_PIN_NO(134) | 2) -#define MT8135_PIN_134_DPIB5__FUNC_CLKM4 (MTK_PIN_NO(134) | 3) -#define MT8135_PIN_134_DPIB5__FUNC_SDA2 (MTK_PIN_NO(134) | 4) -#define MT8135_PIN_134_DPIB5__FUNC_PWM5 (MTK_PIN_NO(134) | 5) - -#define MT8135_PIN_135_DPIB6__FUNC_GPIO135 (MTK_PIN_NO(135) | 0) -#define MT8135_PIN_135_DPIB6__FUNC_DPI0_B6 (MTK_PIN_NO(135) | 1) -#define MT8135_PIN_135_DPIB6__FUNC_EINT108 (MTK_PIN_NO(135) | 2) -#define MT8135_PIN_135_DPIB6__FUNC_CLKM5 (MTK_PIN_NO(135) | 3) -#define MT8135_PIN_135_DPIB6__FUNC_SCL3 (MTK_PIN_NO(135) | 4) -#define MT8135_PIN_135_DPIB6__FUNC_PWM6 (MTK_PIN_NO(135) | 5) - -#define MT8135_PIN_136_DPIB7__FUNC_GPIO136 (MTK_PIN_NO(136) | 0) -#define MT8135_PIN_136_DPIB7__FUNC_DPI0_B7 (MTK_PIN_NO(136) | 1) -#define MT8135_PIN_136_DPIB7__FUNC_EINT109 (MTK_PIN_NO(136) | 2) -#define MT8135_PIN_136_DPIB7__FUNC_CLKM6 (MTK_PIN_NO(136) | 3) -#define MT8135_PIN_136_DPIB7__FUNC_SDA3 (MTK_PIN_NO(136) | 4) -#define MT8135_PIN_136_DPIB7__FUNC_PWM7 (MTK_PIN_NO(136) | 5) - -#define MT8135_PIN_137_DPIG0__FUNC_GPIO137 (MTK_PIN_NO(137) | 0) -#define MT8135_PIN_137_DPIG0__FUNC_DPI0_G0 (MTK_PIN_NO(137) | 1) -#define MT8135_PIN_137_DPIG0__FUNC_EINT110 (MTK_PIN_NO(137) | 2) -#define MT8135_PIN_137_DPIG0__FUNC_DSP1_ID (MTK_PIN_NO(137) | 5) - -#define MT8135_PIN_138_DPIG2__FUNC_GPIO138 (MTK_PIN_NO(138) | 0) -#define MT8135_PIN_138_DPIG2__FUNC_DPI0_G2 (MTK_PIN_NO(138) | 1) -#define MT8135_PIN_138_DPIG2__FUNC_EINT112 (MTK_PIN_NO(138) | 2) -#define MT8135_PIN_138_DPIG2__FUNC_DSP1_IMS (MTK_PIN_NO(138) | 5) - -#define MT8135_PIN_139_DPIG3__FUNC_GPIO139 (MTK_PIN_NO(139) | 0) -#define MT8135_PIN_139_DPIG3__FUNC_DPI0_G3 (MTK_PIN_NO(139) | 1) -#define MT8135_PIN_139_DPIG3__FUNC_EINT113 (MTK_PIN_NO(139) | 2) -#define MT8135_PIN_139_DPIG3__FUNC_DSP2_IMS (MTK_PIN_NO(139) | 5) - -#define MT8135_PIN_140_DPIG6__FUNC_GPIO140 (MTK_PIN_NO(140) | 0) -#define MT8135_PIN_140_DPIG6__FUNC_DPI0_G6 (MTK_PIN_NO(140) | 1) -#define MT8135_PIN_140_DPIG6__FUNC_EINT116 (MTK_PIN_NO(140) | 2) -#define MT8135_PIN_140_DPIG6__FUNC_CM2DAT_2X_2 (MTK_PIN_NO(140) | 4) - -#define MT8135_PIN_141_DPIG7__FUNC_GPIO141 (MTK_PIN_NO(141) | 0) -#define MT8135_PIN_141_DPIG7__FUNC_DPI0_G7 (MTK_PIN_NO(141) | 1) -#define MT8135_PIN_141_DPIG7__FUNC_EINT117 (MTK_PIN_NO(141) | 2) -#define MT8135_PIN_141_DPIG7__FUNC_CM2DAT_2X_3 (MTK_PIN_NO(141) | 4) - -#define MT8135_PIN_142_DPIR0__FUNC_GPIO142 (MTK_PIN_NO(142) | 0) -#define MT8135_PIN_142_DPIR0__FUNC_DPI0_R0 (MTK_PIN_NO(142) | 1) -#define MT8135_PIN_142_DPIR0__FUNC_EINT118 (MTK_PIN_NO(142) | 2) -#define MT8135_PIN_142_DPIR0__FUNC_CM2DAT_2X_4 (MTK_PIN_NO(142) | 4) - -#define MT8135_PIN_143_DPIR1__FUNC_GPIO143 (MTK_PIN_NO(143) | 0) -#define MT8135_PIN_143_DPIR1__FUNC_DPI0_R1 (MTK_PIN_NO(143) | 1) -#define MT8135_PIN_143_DPIR1__FUNC_EINT119 (MTK_PIN_NO(143) | 2) -#define MT8135_PIN_143_DPIR1__FUNC_CM2DAT_2X_5 (MTK_PIN_NO(143) | 4) - -#define MT8135_PIN_144_DPIR2__FUNC_GPIO144 (MTK_PIN_NO(144) | 0) -#define MT8135_PIN_144_DPIR2__FUNC_DPI0_R2 (MTK_PIN_NO(144) | 1) -#define MT8135_PIN_144_DPIR2__FUNC_EINT120 (MTK_PIN_NO(144) | 2) -#define MT8135_PIN_144_DPIR2__FUNC_CM2DAT_2X_6 (MTK_PIN_NO(144) | 4) - -#define MT8135_PIN_145_DPIR4__FUNC_GPIO145 (MTK_PIN_NO(145) | 0) -#define MT8135_PIN_145_DPIR4__FUNC_DPI0_R4 (MTK_PIN_NO(145) | 1) -#define MT8135_PIN_145_DPIR4__FUNC_EINT122 (MTK_PIN_NO(145) | 2) -#define MT8135_PIN_145_DPIR4__FUNC_CM2DAT_2X_8 (MTK_PIN_NO(145) | 4) - -#define MT8135_PIN_146_DPIR5__FUNC_GPIO146 (MTK_PIN_NO(146) | 0) -#define MT8135_PIN_146_DPIR5__FUNC_DPI0_R5 (MTK_PIN_NO(146) | 1) -#define MT8135_PIN_146_DPIR5__FUNC_EINT123 (MTK_PIN_NO(146) | 2) -#define MT8135_PIN_146_DPIR5__FUNC_CM2DAT_2X_9 (MTK_PIN_NO(146) | 4) - -#define MT8135_PIN_147_DPIR6__FUNC_GPIO147 (MTK_PIN_NO(147) | 0) -#define MT8135_PIN_147_DPIR6__FUNC_DPI0_R6 (MTK_PIN_NO(147) | 1) -#define MT8135_PIN_147_DPIR6__FUNC_EINT124 (MTK_PIN_NO(147) | 2) -#define MT8135_PIN_147_DPIR6__FUNC_CM2VSYNC_2X (MTK_PIN_NO(147) | 4) - -#define MT8135_PIN_148_DPIR7__FUNC_GPIO148 (MTK_PIN_NO(148) | 0) -#define MT8135_PIN_148_DPIR7__FUNC_DPI0_R7 (MTK_PIN_NO(148) | 1) -#define MT8135_PIN_148_DPIR7__FUNC_EINT125 (MTK_PIN_NO(148) | 2) -#define MT8135_PIN_148_DPIR7__FUNC_CM2HSYNC_2X (MTK_PIN_NO(148) | 4) - -#define MT8135_PIN_149_TDN3__FUNC_GPIO149 (MTK_PIN_NO(149) | 0) -#define MT8135_PIN_149_TDN3__FUNC_EINT36 (MTK_PIN_NO(149) | 2) - -#define MT8135_PIN_150_TDP3__FUNC_GPIO150 (MTK_PIN_NO(150) | 0) -#define MT8135_PIN_150_TDP3__FUNC_EINT35 (MTK_PIN_NO(150) | 2) - -#define MT8135_PIN_151_TDN2__FUNC_GPIO151 (MTK_PIN_NO(151) | 0) -#define MT8135_PIN_151_TDN2__FUNC_EINT169 (MTK_PIN_NO(151) | 2) - -#define MT8135_PIN_152_TDP2__FUNC_GPIO152 (MTK_PIN_NO(152) | 0) -#define MT8135_PIN_152_TDP2__FUNC_EINT168 (MTK_PIN_NO(152) | 2) - -#define MT8135_PIN_153_TCN__FUNC_GPIO153 (MTK_PIN_NO(153) | 0) -#define MT8135_PIN_153_TCN__FUNC_EINT163 (MTK_PIN_NO(153) | 2) - -#define MT8135_PIN_154_TCP__FUNC_GPIO154 (MTK_PIN_NO(154) | 0) -#define MT8135_PIN_154_TCP__FUNC_EINT162 (MTK_PIN_NO(154) | 2) - -#define MT8135_PIN_155_TDN1__FUNC_GPIO155 (MTK_PIN_NO(155) | 0) -#define MT8135_PIN_155_TDN1__FUNC_EINT167 (MTK_PIN_NO(155) | 2) - -#define MT8135_PIN_156_TDP1__FUNC_GPIO156 (MTK_PIN_NO(156) | 0) -#define MT8135_PIN_156_TDP1__FUNC_EINT166 (MTK_PIN_NO(156) | 2) - -#define MT8135_PIN_157_TDN0__FUNC_GPIO157 (MTK_PIN_NO(157) | 0) -#define MT8135_PIN_157_TDN0__FUNC_EINT165 (MTK_PIN_NO(157) | 2) - -#define MT8135_PIN_158_TDP0__FUNC_GPIO158 (MTK_PIN_NO(158) | 0) -#define MT8135_PIN_158_TDP0__FUNC_EINT164 (MTK_PIN_NO(158) | 2) - -#define MT8135_PIN_159_RDN3__FUNC_GPIO159 (MTK_PIN_NO(159) | 0) -#define MT8135_PIN_159_RDN3__FUNC_EINT18 (MTK_PIN_NO(159) | 2) - -#define MT8135_PIN_160_RDP3__FUNC_GPIO160 (MTK_PIN_NO(160) | 0) -#define MT8135_PIN_160_RDP3__FUNC_EINT30 (MTK_PIN_NO(160) | 2) - -#define MT8135_PIN_161_RDN2__FUNC_GPIO161 (MTK_PIN_NO(161) | 0) -#define MT8135_PIN_161_RDN2__FUNC_EINT31 (MTK_PIN_NO(161) | 2) - -#define MT8135_PIN_162_RDP2__FUNC_GPIO162 (MTK_PIN_NO(162) | 0) -#define MT8135_PIN_162_RDP2__FUNC_EINT32 (MTK_PIN_NO(162) | 2) - -#define MT8135_PIN_163_RCN__FUNC_GPIO163 (MTK_PIN_NO(163) | 0) -#define MT8135_PIN_163_RCN__FUNC_EINT33 (MTK_PIN_NO(163) | 2) - -#define MT8135_PIN_164_RCP__FUNC_GPIO164 (MTK_PIN_NO(164) | 0) -#define MT8135_PIN_164_RCP__FUNC_EINT39 (MTK_PIN_NO(164) | 2) - -#define MT8135_PIN_165_RDN1__FUNC_GPIO165 (MTK_PIN_NO(165) | 0) - -#define MT8135_PIN_166_RDP1__FUNC_GPIO166 (MTK_PIN_NO(166) | 0) - -#define MT8135_PIN_167_RDN0__FUNC_GPIO167 (MTK_PIN_NO(167) | 0) - -#define MT8135_PIN_168_RDP0__FUNC_GPIO168 (MTK_PIN_NO(168) | 0) - -#define MT8135_PIN_169_RDN1_A__FUNC_GPIO169 (MTK_PIN_NO(169) | 0) -#define MT8135_PIN_169_RDN1_A__FUNC_CMDAT6 (MTK_PIN_NO(169) | 1) -#define MT8135_PIN_169_RDN1_A__FUNC_EINT175 (MTK_PIN_NO(169) | 2) - -#define MT8135_PIN_170_RDP1_A__FUNC_GPIO170 (MTK_PIN_NO(170) | 0) -#define MT8135_PIN_170_RDP1_A__FUNC_CMDAT7 (MTK_PIN_NO(170) | 1) -#define MT8135_PIN_170_RDP1_A__FUNC_EINT174 (MTK_PIN_NO(170) | 2) - -#define MT8135_PIN_171_RCN_A__FUNC_GPIO171 (MTK_PIN_NO(171) | 0) -#define MT8135_PIN_171_RCN_A__FUNC_CMDAT8 (MTK_PIN_NO(171) | 1) -#define MT8135_PIN_171_RCN_A__FUNC_EINT171 (MTK_PIN_NO(171) | 2) - -#define MT8135_PIN_172_RCP_A__FUNC_GPIO172 (MTK_PIN_NO(172) | 0) -#define MT8135_PIN_172_RCP_A__FUNC_CMDAT9 (MTK_PIN_NO(172) | 1) -#define MT8135_PIN_172_RCP_A__FUNC_EINT170 (MTK_PIN_NO(172) | 2) - -#define MT8135_PIN_173_RDN0_A__FUNC_GPIO173 (MTK_PIN_NO(173) | 0) -#define MT8135_PIN_173_RDN0_A__FUNC_CMHSYNC (MTK_PIN_NO(173) | 1) -#define MT8135_PIN_173_RDN0_A__FUNC_EINT173 (MTK_PIN_NO(173) | 2) - -#define MT8135_PIN_174_RDP0_A__FUNC_GPIO174 (MTK_PIN_NO(174) | 0) -#define MT8135_PIN_174_RDP0_A__FUNC_CMVSYNC (MTK_PIN_NO(174) | 1) -#define MT8135_PIN_174_RDP0_A__FUNC_EINT172 (MTK_PIN_NO(174) | 2) - -#define MT8135_PIN_175_RDN1_B__FUNC_GPIO175 (MTK_PIN_NO(175) | 0) -#define MT8135_PIN_175_RDN1_B__FUNC_CMDAT2 (MTK_PIN_NO(175) | 1) -#define MT8135_PIN_175_RDN1_B__FUNC_EINT181 (MTK_PIN_NO(175) | 2) -#define MT8135_PIN_175_RDN1_B__FUNC_CMCSD2 (MTK_PIN_NO(175) | 3) - -#define MT8135_PIN_176_RDP1_B__FUNC_GPIO176 (MTK_PIN_NO(176) | 0) -#define MT8135_PIN_176_RDP1_B__FUNC_CMDAT3 (MTK_PIN_NO(176) | 1) -#define MT8135_PIN_176_RDP1_B__FUNC_EINT180 (MTK_PIN_NO(176) | 2) -#define MT8135_PIN_176_RDP1_B__FUNC_CMCSD3 (MTK_PIN_NO(176) | 3) - -#define MT8135_PIN_177_RCN_B__FUNC_GPIO177 (MTK_PIN_NO(177) | 0) -#define MT8135_PIN_177_RCN_B__FUNC_CMDAT4 (MTK_PIN_NO(177) | 1) -#define MT8135_PIN_177_RCN_B__FUNC_EINT177 (MTK_PIN_NO(177) | 2) - -#define MT8135_PIN_178_RCP_B__FUNC_GPIO178 (MTK_PIN_NO(178) | 0) -#define MT8135_PIN_178_RCP_B__FUNC_CMDAT5 (MTK_PIN_NO(178) | 1) -#define MT8135_PIN_178_RCP_B__FUNC_EINT176 (MTK_PIN_NO(178) | 2) - -#define MT8135_PIN_179_RDN0_B__FUNC_GPIO179 (MTK_PIN_NO(179) | 0) -#define MT8135_PIN_179_RDN0_B__FUNC_CMDAT0 (MTK_PIN_NO(179) | 1) -#define MT8135_PIN_179_RDN0_B__FUNC_EINT179 (MTK_PIN_NO(179) | 2) -#define MT8135_PIN_179_RDN0_B__FUNC_CMCSD0 (MTK_PIN_NO(179) | 3) - -#define MT8135_PIN_180_RDP0_B__FUNC_GPIO180 (MTK_PIN_NO(180) | 0) -#define MT8135_PIN_180_RDP0_B__FUNC_CMDAT1 (MTK_PIN_NO(180) | 1) -#define MT8135_PIN_180_RDP0_B__FUNC_EINT178 (MTK_PIN_NO(180) | 2) -#define MT8135_PIN_180_RDP0_B__FUNC_CMCSD1 (MTK_PIN_NO(180) | 3) - -#define MT8135_PIN_181_CMPCLK__FUNC_GPIO181 (MTK_PIN_NO(181) | 0) -#define MT8135_PIN_181_CMPCLK__FUNC_CMPCLK (MTK_PIN_NO(181) | 1) -#define MT8135_PIN_181_CMPCLK__FUNC_EINT182 (MTK_PIN_NO(181) | 2) -#define MT8135_PIN_181_CMPCLK__FUNC_CMCSK (MTK_PIN_NO(181) | 3) -#define MT8135_PIN_181_CMPCLK__FUNC_CM2MCLK_4X (MTK_PIN_NO(181) | 4) -#define MT8135_PIN_181_CMPCLK__FUNC_TS_AUXADC_SEL_3 (MTK_PIN_NO(181) | 5) -#define MT8135_PIN_181_CMPCLK__FUNC_VENC_TEST_CK (MTK_PIN_NO(181) | 6) -#define MT8135_PIN_181_CMPCLK__FUNC_TESTA_OUT27 (MTK_PIN_NO(181) | 7) - -#define MT8135_PIN_182_CMMCLK__FUNC_GPIO182 (MTK_PIN_NO(182) | 0) -#define MT8135_PIN_182_CMMCLK__FUNC_CMMCLK (MTK_PIN_NO(182) | 1) -#define MT8135_PIN_182_CMMCLK__FUNC_EINT183 (MTK_PIN_NO(182) | 2) -#define MT8135_PIN_182_CMMCLK__FUNC_TS_AUXADC_SEL_2 (MTK_PIN_NO(182) | 5) -#define MT8135_PIN_182_CMMCLK__FUNC_TESTA_OUT28 (MTK_PIN_NO(182) | 7) - -#define MT8135_PIN_183_CMRST__FUNC_GPIO183 (MTK_PIN_NO(183) | 0) -#define MT8135_PIN_183_CMRST__FUNC_CMRST (MTK_PIN_NO(183) | 1) -#define MT8135_PIN_183_CMRST__FUNC_EINT185 (MTK_PIN_NO(183) | 2) -#define MT8135_PIN_183_CMRST__FUNC_TS_AUXADC_SEL_1 (MTK_PIN_NO(183) | 5) -#define MT8135_PIN_183_CMRST__FUNC_TESTA_OUT30 (MTK_PIN_NO(183) | 7) - -#define MT8135_PIN_184_CMPDN__FUNC_GPIO184 (MTK_PIN_NO(184) | 0) -#define MT8135_PIN_184_CMPDN__FUNC_CMPDN (MTK_PIN_NO(184) | 1) -#define MT8135_PIN_184_CMPDN__FUNC_EINT184 (MTK_PIN_NO(184) | 2) -#define MT8135_PIN_184_CMPDN__FUNC_TS_AUXADC_SEL_0 (MTK_PIN_NO(184) | 5) -#define MT8135_PIN_184_CMPDN__FUNC_TESTA_OUT29 (MTK_PIN_NO(184) | 7) - -#define MT8135_PIN_185_CMFLASH__FUNC_GPIO185 (MTK_PIN_NO(185) | 0) -#define MT8135_PIN_185_CMFLASH__FUNC_CMFLASH (MTK_PIN_NO(185) | 1) -#define MT8135_PIN_185_CMFLASH__FUNC_EINT186 (MTK_PIN_NO(185) | 2) -#define MT8135_PIN_185_CMFLASH__FUNC_CM2MCLK_3X (MTK_PIN_NO(185) | 3) -#define MT8135_PIN_185_CMFLASH__FUNC_MFG_TEST_CK_1 (MTK_PIN_NO(185) | 6) -#define MT8135_PIN_185_CMFLASH__FUNC_TESTA_OUT31 (MTK_PIN_NO(185) | 7) - -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_GPIO186 (MTK_PIN_NO(186) | 0) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_MRG_I2S_P_CLK (MTK_PIN_NO(186) | 1) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_EINT14 (MTK_PIN_NO(186) | 2) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_I2SIN_CK (MTK_PIN_NO(186) | 3) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_PCM0_CK (MTK_PIN_NO(186) | 4) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_DSP2_ICK (MTK_PIN_NO(186) | 5) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_IMG_TEST_CK (MTK_PIN_NO(186) | 6) -#define MT8135_PIN_186_MRG_I2S_PCM_CLK__FUNC_USB_SCL (MTK_PIN_NO(186) | 7) - -#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_GPIO187 (MTK_PIN_NO(187) | 0) -#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_MRG_I2S_SYNC (MTK_PIN_NO(187) | 1) -#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_EINT16 (MTK_PIN_NO(187) | 2) -#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_I2SIN_WS (MTK_PIN_NO(187) | 3) -#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_PCM0_WS (MTK_PIN_NO(187) | 4) -#define MT8135_PIN_187_MRG_I2S_PCM_SYNC__FUNC_DISP_TEST_CK (MTK_PIN_NO(187) | 6) - -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_GPIO188 (MTK_PIN_NO(188) | 0) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_MRG_I2S_PCM_RX (MTK_PIN_NO(188) | 1) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_EINT15 (MTK_PIN_NO(188) | 2) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_I2SIN_DAT (MTK_PIN_NO(188) | 3) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_PCM0_DI (MTK_PIN_NO(188) | 4) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_DSP2_ID (MTK_PIN_NO(188) | 5) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_MFG_TEST_CK (MTK_PIN_NO(188) | 6) -#define MT8135_PIN_188_MRG_I2S_PCM_RX__FUNC_USB_SDA (MTK_PIN_NO(188) | 7) - -#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_GPIO189 (MTK_PIN_NO(189) | 0) -#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_MRG_I2S_PCM_TX (MTK_PIN_NO(189) | 1) -#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_EINT17 (MTK_PIN_NO(189) | 2) -#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_I2SOUT_DAT (MTK_PIN_NO(189) | 3) -#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_PCM0_DO (MTK_PIN_NO(189) | 4) -#define MT8135_PIN_189_MRG_I2S_PCM_TX__FUNC_VDEC_TEST_CK (MTK_PIN_NO(189) | 6) - -#define MT8135_PIN_190_SRCLKENAI__FUNC_GPIO190 (MTK_PIN_NO(190) | 0) -#define MT8135_PIN_190_SRCLKENAI__FUNC_SRCLKENAI (MTK_PIN_NO(190) | 1) - -#define MT8135_PIN_191_URXD3__FUNC_GPIO191 (MTK_PIN_NO(191) | 0) -#define MT8135_PIN_191_URXD3__FUNC_URXD3 (MTK_PIN_NO(191) | 1) -#define MT8135_PIN_191_URXD3__FUNC_EINT87 (MTK_PIN_NO(191) | 2) -#define MT8135_PIN_191_URXD3__FUNC_UTXD3 (MTK_PIN_NO(191) | 3) -#define MT8135_PIN_191_URXD3__FUNC_TS_AUX_ST (MTK_PIN_NO(191) | 5) -#define MT8135_PIN_191_URXD3__FUNC_PWM4 (MTK_PIN_NO(191) | 6) - -#define MT8135_PIN_192_UTXD3__FUNC_GPIO192 (MTK_PIN_NO(192) | 0) -#define MT8135_PIN_192_UTXD3__FUNC_UTXD3 (MTK_PIN_NO(192) | 1) -#define MT8135_PIN_192_UTXD3__FUNC_EINT86 (MTK_PIN_NO(192) | 2) -#define MT8135_PIN_192_UTXD3__FUNC_URXD3 (MTK_PIN_NO(192) | 3) -#define MT8135_PIN_192_UTXD3__FUNC_TS_AUX_CS_B (MTK_PIN_NO(192) | 5) -#define MT8135_PIN_192_UTXD3__FUNC_PWM3 (MTK_PIN_NO(192) | 6) - -#define MT8135_PIN_193_SDA2__FUNC_GPIO193 (MTK_PIN_NO(193) | 0) -#define MT8135_PIN_193_SDA2__FUNC_SDA2 (MTK_PIN_NO(193) | 1) -#define MT8135_PIN_193_SDA2__FUNC_EINT95 (MTK_PIN_NO(193) | 2) -#define MT8135_PIN_193_SDA2__FUNC_CLKM5 (MTK_PIN_NO(193) | 3) -#define MT8135_PIN_193_SDA2__FUNC_PWM5 (MTK_PIN_NO(193) | 4) -#define MT8135_PIN_193_SDA2__FUNC_TS_AUX_PWDB (MTK_PIN_NO(193) | 5) - -#define MT8135_PIN_194_SCL2__FUNC_GPIO194 (MTK_PIN_NO(194) | 0) -#define MT8135_PIN_194_SCL2__FUNC_SCL2 (MTK_PIN_NO(194) | 1) -#define MT8135_PIN_194_SCL2__FUNC_EINT94 (MTK_PIN_NO(194) | 2) -#define MT8135_PIN_194_SCL2__FUNC_CLKM4 (MTK_PIN_NO(194) | 3) -#define MT8135_PIN_194_SCL2__FUNC_PWM4 (MTK_PIN_NO(194) | 4) -#define MT8135_PIN_194_SCL2__FUNC_TS_AUXADC_TEST_CK (MTK_PIN_NO(194) | 5) - -#define MT8135_PIN_195_SDA1__FUNC_GPIO195 (MTK_PIN_NO(195) | 0) -#define MT8135_PIN_195_SDA1__FUNC_SDA1 (MTK_PIN_NO(195) | 1) -#define MT8135_PIN_195_SDA1__FUNC_EINT93 (MTK_PIN_NO(195) | 2) -#define MT8135_PIN_195_SDA1__FUNC_CLKM3 (MTK_PIN_NO(195) | 3) -#define MT8135_PIN_195_SDA1__FUNC_PWM3 (MTK_PIN_NO(195) | 4) -#define MT8135_PIN_195_SDA1__FUNC_TS_AUX_SCLK_PWDB (MTK_PIN_NO(195) | 5) - -#define MT8135_PIN_196_SCL1__FUNC_GPIO196 (MTK_PIN_NO(196) | 0) -#define MT8135_PIN_196_SCL1__FUNC_SCL1 (MTK_PIN_NO(196) | 1) -#define MT8135_PIN_196_SCL1__FUNC_EINT92 (MTK_PIN_NO(196) | 2) -#define MT8135_PIN_196_SCL1__FUNC_CLKM2 (MTK_PIN_NO(196) | 3) -#define MT8135_PIN_196_SCL1__FUNC_PWM2 (MTK_PIN_NO(196) | 4) -#define MT8135_PIN_196_SCL1__FUNC_TS_AUX_DIN (MTK_PIN_NO(196) | 5) - -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_GPIO197 (MTK_PIN_NO(197) | 0) -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_MSDC3_DAT2 (MTK_PIN_NO(197) | 1) -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_EINT71 (MTK_PIN_NO(197) | 2) -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_SCL6 (MTK_PIN_NO(197) | 3) -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_PWM5 (MTK_PIN_NO(197) | 4) -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_CLKM4 (MTK_PIN_NO(197) | 5) -#define MT8135_PIN_197_MSDC3_DAT2__FUNC_MFG_TEST_CK_2 (MTK_PIN_NO(197) | 6) - -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_GPIO198 (MTK_PIN_NO(198) | 0) -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_MSDC3_DAT3 (MTK_PIN_NO(198) | 1) -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_EINT72 (MTK_PIN_NO(198) | 2) -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_SDA6 (MTK_PIN_NO(198) | 3) -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_PWM6 (MTK_PIN_NO(198) | 4) -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_CLKM5 (MTK_PIN_NO(198) | 5) -#define MT8135_PIN_198_MSDC3_DAT3__FUNC_MFG_TEST_CK_3 (MTK_PIN_NO(198) | 6) - -#define MT8135_PIN_199_MSDC3_CMD__FUNC_GPIO199 (MTK_PIN_NO(199) | 0) -#define MT8135_PIN_199_MSDC3_CMD__FUNC_MSDC3_CMD (MTK_PIN_NO(199) | 1) -#define MT8135_PIN_199_MSDC3_CMD__FUNC_EINT68 (MTK_PIN_NO(199) | 2) -#define MT8135_PIN_199_MSDC3_CMD__FUNC_SDA2 (MTK_PIN_NO(199) | 3) -#define MT8135_PIN_199_MSDC3_CMD__FUNC_PWM2 (MTK_PIN_NO(199) | 4) -#define MT8135_PIN_199_MSDC3_CMD__FUNC_CLKM1 (MTK_PIN_NO(199) | 5) -#define MT8135_PIN_199_MSDC3_CMD__FUNC_MFG_TEST_CK_4 (MTK_PIN_NO(199) | 6) - -#define MT8135_PIN_200_MSDC3_CLK__FUNC_GPIO200 (MTK_PIN_NO(200) | 0) -#define MT8135_PIN_200_MSDC3_CLK__FUNC_MSDC3_CLK (MTK_PIN_NO(200) | 1) -#define MT8135_PIN_200_MSDC3_CLK__FUNC_EINT67 (MTK_PIN_NO(200) | 2) -#define MT8135_PIN_200_MSDC3_CLK__FUNC_SCL2 (MTK_PIN_NO(200) | 3) -#define MT8135_PIN_200_MSDC3_CLK__FUNC_PWM1 (MTK_PIN_NO(200) | 4) -#define MT8135_PIN_200_MSDC3_CLK__FUNC_CLKM0 (MTK_PIN_NO(200) | 5) - -#define MT8135_PIN_201_MSDC3_DAT1__FUNC_GPIO201 (MTK_PIN_NO(201) | 0) -#define MT8135_PIN_201_MSDC3_DAT1__FUNC_MSDC3_DAT1 (MTK_PIN_NO(201) | 1) -#define MT8135_PIN_201_MSDC3_DAT1__FUNC_EINT70 (MTK_PIN_NO(201) | 2) -#define MT8135_PIN_201_MSDC3_DAT1__FUNC_SDA3 (MTK_PIN_NO(201) | 3) -#define MT8135_PIN_201_MSDC3_DAT1__FUNC_PWM4 (MTK_PIN_NO(201) | 4) -#define MT8135_PIN_201_MSDC3_DAT1__FUNC_CLKM3 (MTK_PIN_NO(201) | 5) - -#define MT8135_PIN_202_MSDC3_DAT0__FUNC_GPIO202 (MTK_PIN_NO(202) | 0) -#define MT8135_PIN_202_MSDC3_DAT0__FUNC_MSDC3_DAT0 (MTK_PIN_NO(202) | 1) -#define MT8135_PIN_202_MSDC3_DAT0__FUNC_EINT69 (MTK_PIN_NO(202) | 2) -#define MT8135_PIN_202_MSDC3_DAT0__FUNC_SCL3 (MTK_PIN_NO(202) | 3) -#define MT8135_PIN_202_MSDC3_DAT0__FUNC_PWM3 (MTK_PIN_NO(202) | 4) -#define MT8135_PIN_202_MSDC3_DAT0__FUNC_CLKM2 (MTK_PIN_NO(202) | 5) - -#endif /* __DTS_MT8135_PINFUNC_H */ diff --git a/arch/arm/boot/dts/mt8135.dtsi b/arch/arm/boot/dts/mt8135.dtsi index 0e4e835026db..a031b3636318 100644 --- a/arch/arm/boot/dts/mt8135.dtsi +++ b/arch/arm/boot/dts/mt8135.dtsi @@ -9,7 +9,7 @@ #include #include #include -#include "mt8135-pinfunc.h" +#include / { #address-cells = <2>; -- cgit From 4e233326e50bf2787a632f7625b9ef89819478ff Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Wed, 4 Aug 2021 12:40:32 +0800 Subject: arm: dts: mt8183: Move pinfunc to include/dt-bindings/pinctrl Move mt8183-pinfunc.h into include/dt-bindings/pinctrl so that we can include it in yaml examples. Signed-off-by: Hsin-Yi Wang Link: https://lore.kernel.org/r/20210804044033.3047296-2-hsinyi@chromium.org Signed-off-by: Linus Walleij --- arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h | 1120 ------------------------- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 2 +- 2 files changed, 1 insertion(+), 1121 deletions(-) delete mode 100644 arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h b/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h deleted file mode 100644 index 6221cd712718..000000000000 --- a/arch/arm64/boot/dts/mediatek/mt8183-pinfunc.h +++ /dev/null @@ -1,1120 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2018 MediaTek Inc. - * Author: Zhiyong Tao - * - */ - -#ifndef __MT8183_PINFUNC_H -#define __MT8183_PINFUNC_H - -#include - -#define PINMUX_GPIO0__FUNC_GPIO0 (MTK_PIN_NO(0) | 0) -#define PINMUX_GPIO0__FUNC_MRG_SYNC (MTK_PIN_NO(0) | 1) -#define PINMUX_GPIO0__FUNC_PCM0_SYNC (MTK_PIN_NO(0) | 2) -#define PINMUX_GPIO0__FUNC_TP_GPIO0_AO (MTK_PIN_NO(0) | 3) -#define PINMUX_GPIO0__FUNC_SRCLKENAI0 (MTK_PIN_NO(0) | 4) -#define PINMUX_GPIO0__FUNC_SCP_SPI2_CS (MTK_PIN_NO(0) | 5) -#define PINMUX_GPIO0__FUNC_I2S3_MCK (MTK_PIN_NO(0) | 6) -#define PINMUX_GPIO0__FUNC_SPI2_CSB (MTK_PIN_NO(0) | 7) - -#define PINMUX_GPIO1__FUNC_GPIO1 (MTK_PIN_NO(1) | 0) -#define PINMUX_GPIO1__FUNC_MRG_CLK (MTK_PIN_NO(1) | 1) -#define PINMUX_GPIO1__FUNC_PCM0_CLK (MTK_PIN_NO(1) | 2) -#define PINMUX_GPIO1__FUNC_TP_GPIO1_AO (MTK_PIN_NO(1) | 3) -#define PINMUX_GPIO1__FUNC_CLKM3 (MTK_PIN_NO(1) | 4) -#define PINMUX_GPIO1__FUNC_SCP_SPI2_MO (MTK_PIN_NO(1) | 5) -#define PINMUX_GPIO1__FUNC_I2S3_BCK (MTK_PIN_NO(1) | 6) -#define PINMUX_GPIO1__FUNC_SPI2_MO (MTK_PIN_NO(1) | 7) - -#define PINMUX_GPIO2__FUNC_GPIO2 (MTK_PIN_NO(2) | 0) -#define PINMUX_GPIO2__FUNC_MRG_DO (MTK_PIN_NO(2) | 1) -#define PINMUX_GPIO2__FUNC_PCM0_DO (MTK_PIN_NO(2) | 2) -#define PINMUX_GPIO2__FUNC_TP_GPIO2_AO (MTK_PIN_NO(2) | 3) -#define PINMUX_GPIO2__FUNC_SCL6 (MTK_PIN_NO(2) | 4) -#define PINMUX_GPIO2__FUNC_SCP_SPI2_CK (MTK_PIN_NO(2) | 5) -#define PINMUX_GPIO2__FUNC_I2S3_LRCK (MTK_PIN_NO(2) | 6) -#define PINMUX_GPIO2__FUNC_SPI2_CLK (MTK_PIN_NO(2) | 7) - -#define PINMUX_GPIO3__FUNC_GPIO3 (MTK_PIN_NO(3) | 0) -#define PINMUX_GPIO3__FUNC_MRG_DI (MTK_PIN_NO(3) | 1) -#define PINMUX_GPIO3__FUNC_PCM0_DI (MTK_PIN_NO(3) | 2) -#define PINMUX_GPIO3__FUNC_TP_GPIO3_AO (MTK_PIN_NO(3) | 3) -#define PINMUX_GPIO3__FUNC_SDA6 (MTK_PIN_NO(3) | 4) -#define PINMUX_GPIO3__FUNC_TDM_MCK (MTK_PIN_NO(3) | 5) -#define PINMUX_GPIO3__FUNC_I2S3_DO (MTK_PIN_NO(3) | 6) -#define PINMUX_GPIO3__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(3) | 7) - -#define PINMUX_GPIO4__FUNC_GPIO4 (MTK_PIN_NO(4) | 0) -#define PINMUX_GPIO4__FUNC_PWM_B (MTK_PIN_NO(4) | 1) -#define PINMUX_GPIO4__FUNC_I2S0_MCK (MTK_PIN_NO(4) | 2) -#define PINMUX_GPIO4__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(4) | 3) -#define PINMUX_GPIO4__FUNC_MD_URXD1 (MTK_PIN_NO(4) | 4) -#define PINMUX_GPIO4__FUNC_TDM_BCK (MTK_PIN_NO(4) | 5) -#define PINMUX_GPIO4__FUNC_TP_GPIO4_AO (MTK_PIN_NO(4) | 6) -#define PINMUX_GPIO4__FUNC_DAP_MD32_SWD (MTK_PIN_NO(4) | 7) - -#define PINMUX_GPIO5__FUNC_GPIO5 (MTK_PIN_NO(5) | 0) -#define PINMUX_GPIO5__FUNC_PWM_C (MTK_PIN_NO(5) | 1) -#define PINMUX_GPIO5__FUNC_I2S0_BCK (MTK_PIN_NO(5) | 2) -#define PINMUX_GPIO5__FUNC_SSPM_URXD_AO (MTK_PIN_NO(5) | 3) -#define PINMUX_GPIO5__FUNC_MD_UTXD1 (MTK_PIN_NO(5) | 4) -#define PINMUX_GPIO5__FUNC_TDM_LRCK (MTK_PIN_NO(5) | 5) -#define PINMUX_GPIO5__FUNC_TP_GPIO5_AO (MTK_PIN_NO(5) | 6) -#define PINMUX_GPIO5__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(5) | 7) - -#define PINMUX_GPIO6__FUNC_GPIO6 (MTK_PIN_NO(6) | 0) -#define PINMUX_GPIO6__FUNC_PWM_A (MTK_PIN_NO(6) | 1) -#define PINMUX_GPIO6__FUNC_I2S0_LRCK (MTK_PIN_NO(6) | 2) -#define PINMUX_GPIO6__FUNC_IDDIG (MTK_PIN_NO(6) | 3) -#define PINMUX_GPIO6__FUNC_MD_URXD0 (MTK_PIN_NO(6) | 4) -#define PINMUX_GPIO6__FUNC_TDM_DATA0 (MTK_PIN_NO(6) | 5) -#define PINMUX_GPIO6__FUNC_TP_GPIO6_AO (MTK_PIN_NO(6) | 6) -#define PINMUX_GPIO6__FUNC_CMFLASH (MTK_PIN_NO(6) | 7) - -#define PINMUX_GPIO7__FUNC_GPIO7 (MTK_PIN_NO(7) | 0) -#define PINMUX_GPIO7__FUNC_SPI1_B_MI (MTK_PIN_NO(7) | 1) -#define PINMUX_GPIO7__FUNC_I2S0_DI (MTK_PIN_NO(7) | 2) -#define PINMUX_GPIO7__FUNC_USB_DRVVBUS (MTK_PIN_NO(7) | 3) -#define PINMUX_GPIO7__FUNC_MD_UTXD0 (MTK_PIN_NO(7) | 4) -#define PINMUX_GPIO7__FUNC_TDM_DATA1 (MTK_PIN_NO(7) | 5) -#define PINMUX_GPIO7__FUNC_TP_GPIO7_AO (MTK_PIN_NO(7) | 6) -#define PINMUX_GPIO7__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(7) | 7) - -#define PINMUX_GPIO8__FUNC_GPIO8 (MTK_PIN_NO(8) | 0) -#define PINMUX_GPIO8__FUNC_SPI1_B_CSB (MTK_PIN_NO(8) | 1) -#define PINMUX_GPIO8__FUNC_ANT_SEL3 (MTK_PIN_NO(8) | 2) -#define PINMUX_GPIO8__FUNC_SCL7 (MTK_PIN_NO(8) | 3) -#define PINMUX_GPIO8__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(8) | 4) -#define PINMUX_GPIO8__FUNC_TDM_DATA2 (MTK_PIN_NO(8) | 5) -#define PINMUX_GPIO8__FUNC_MD_INT0 (MTK_PIN_NO(8) | 6) -#define PINMUX_GPIO8__FUNC_JTRSTN_SEL1 (MTK_PIN_NO(8) | 7) - -#define PINMUX_GPIO9__FUNC_GPIO9 (MTK_PIN_NO(9) | 0) -#define PINMUX_GPIO9__FUNC_SPI1_B_MO (MTK_PIN_NO(9) | 1) -#define PINMUX_GPIO9__FUNC_ANT_SEL4 (MTK_PIN_NO(9) | 2) -#define PINMUX_GPIO9__FUNC_CMMCLK2 (MTK_PIN_NO(9) | 3) -#define PINMUX_GPIO9__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(9) | 4) -#define PINMUX_GPIO9__FUNC_SSPM_JTAG_TRSTN (MTK_PIN_NO(9) | 5) -#define PINMUX_GPIO9__FUNC_IO_JTAG_TRSTN (MTK_PIN_NO(9) | 6) -#define PINMUX_GPIO9__FUNC_DBG_MON_B10 (MTK_PIN_NO(9) | 7) - -#define PINMUX_GPIO10__FUNC_GPIO10 (MTK_PIN_NO(10) | 0) -#define PINMUX_GPIO10__FUNC_SPI1_B_CLK (MTK_PIN_NO(10) | 1) -#define PINMUX_GPIO10__FUNC_ANT_SEL5 (MTK_PIN_NO(10) | 2) -#define PINMUX_GPIO10__FUNC_CMMCLK3 (MTK_PIN_NO(10) | 3) -#define PINMUX_GPIO10__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(10) | 4) -#define PINMUX_GPIO10__FUNC_TDM_DATA3 (MTK_PIN_NO(10) | 5) -#define PINMUX_GPIO10__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(10) | 6) -#define PINMUX_GPIO10__FUNC_DBG_MON_B11 (MTK_PIN_NO(10) | 7) - -#define PINMUX_GPIO11__FUNC_GPIO11 (MTK_PIN_NO(11) | 0) -#define PINMUX_GPIO11__FUNC_TP_URXD1_AO (MTK_PIN_NO(11) | 1) -#define PINMUX_GPIO11__FUNC_IDDIG (MTK_PIN_NO(11) | 2) -#define PINMUX_GPIO11__FUNC_SCL6 (MTK_PIN_NO(11) | 3) -#define PINMUX_GPIO11__FUNC_UCTS1 (MTK_PIN_NO(11) | 4) -#define PINMUX_GPIO11__FUNC_UCTS0 (MTK_PIN_NO(11) | 5) -#define PINMUX_GPIO11__FUNC_SRCLKENAI1 (MTK_PIN_NO(11) | 6) -#define PINMUX_GPIO11__FUNC_I2S5_MCK (MTK_PIN_NO(11) | 7) - -#define PINMUX_GPIO12__FUNC_GPIO12 (MTK_PIN_NO(12) | 0) -#define PINMUX_GPIO12__FUNC_TP_UTXD1_AO (MTK_PIN_NO(12) | 1) -#define PINMUX_GPIO12__FUNC_USB_DRVVBUS (MTK_PIN_NO(12) | 2) -#define PINMUX_GPIO12__FUNC_SDA6 (MTK_PIN_NO(12) | 3) -#define PINMUX_GPIO12__FUNC_URTS1 (MTK_PIN_NO(12) | 4) -#define PINMUX_GPIO12__FUNC_URTS0 (MTK_PIN_NO(12) | 5) -#define PINMUX_GPIO12__FUNC_I2S2_DI2 (MTK_PIN_NO(12) | 6) -#define PINMUX_GPIO12__FUNC_I2S5_BCK (MTK_PIN_NO(12) | 7) - -#define PINMUX_GPIO13__FUNC_GPIO13 (MTK_PIN_NO(13) | 0) -#define PINMUX_GPIO13__FUNC_DBPI_D0 (MTK_PIN_NO(13) | 1) -#define PINMUX_GPIO13__FUNC_SPI5_MI (MTK_PIN_NO(13) | 2) -#define PINMUX_GPIO13__FUNC_PCM0_SYNC (MTK_PIN_NO(13) | 3) -#define PINMUX_GPIO13__FUNC_MD_URXD0 (MTK_PIN_NO(13) | 4) -#define PINMUX_GPIO13__FUNC_ANT_SEL3 (MTK_PIN_NO(13) | 5) -#define PINMUX_GPIO13__FUNC_I2S0_MCK (MTK_PIN_NO(13) | 6) -#define PINMUX_GPIO13__FUNC_DBG_MON_B15 (MTK_PIN_NO(13) | 7) - -#define PINMUX_GPIO14__FUNC_GPIO14 (MTK_PIN_NO(14) | 0) -#define PINMUX_GPIO14__FUNC_DBPI_D1 (MTK_PIN_NO(14) | 1) -#define PINMUX_GPIO14__FUNC_SPI5_CSB (MTK_PIN_NO(14) | 2) -#define PINMUX_GPIO14__FUNC_PCM0_CLK (MTK_PIN_NO(14) | 3) -#define PINMUX_GPIO14__FUNC_MD_UTXD0 (MTK_PIN_NO(14) | 4) -#define PINMUX_GPIO14__FUNC_ANT_SEL4 (MTK_PIN_NO(14) | 5) -#define PINMUX_GPIO14__FUNC_I2S0_BCK (MTK_PIN_NO(14) | 6) -#define PINMUX_GPIO14__FUNC_DBG_MON_B16 (MTK_PIN_NO(14) | 7) - -#define PINMUX_GPIO15__FUNC_GPIO15 (MTK_PIN_NO(15) | 0) -#define PINMUX_GPIO15__FUNC_DBPI_D2 (MTK_PIN_NO(15) | 1) -#define PINMUX_GPIO15__FUNC_SPI5_MO (MTK_PIN_NO(15) | 2) -#define PINMUX_GPIO15__FUNC_PCM0_DO (MTK_PIN_NO(15) | 3) -#define PINMUX_GPIO15__FUNC_MD_URXD1 (MTK_PIN_NO(15) | 4) -#define PINMUX_GPIO15__FUNC_ANT_SEL5 (MTK_PIN_NO(15) | 5) -#define PINMUX_GPIO15__FUNC_I2S0_LRCK (MTK_PIN_NO(15) | 6) -#define PINMUX_GPIO15__FUNC_DBG_MON_B17 (MTK_PIN_NO(15) | 7) - -#define PINMUX_GPIO16__FUNC_GPIO16 (MTK_PIN_NO(16) | 0) -#define PINMUX_GPIO16__FUNC_DBPI_D3 (MTK_PIN_NO(16) | 1) -#define PINMUX_GPIO16__FUNC_SPI5_CLK (MTK_PIN_NO(16) | 2) -#define PINMUX_GPIO16__FUNC_PCM0_DI (MTK_PIN_NO(16) | 3) -#define PINMUX_GPIO16__FUNC_MD_UTXD1 (MTK_PIN_NO(16) | 4) -#define PINMUX_GPIO16__FUNC_ANT_SEL6 (MTK_PIN_NO(16) | 5) -#define PINMUX_GPIO16__FUNC_I2S0_DI (MTK_PIN_NO(16) | 6) -#define PINMUX_GPIO16__FUNC_DBG_MON_B23 (MTK_PIN_NO(16) | 7) - -#define PINMUX_GPIO17__FUNC_GPIO17 (MTK_PIN_NO(17) | 0) -#define PINMUX_GPIO17__FUNC_DBPI_D4 (MTK_PIN_NO(17) | 1) -#define PINMUX_GPIO17__FUNC_SPI4_MI (MTK_PIN_NO(17) | 2) -#define PINMUX_GPIO17__FUNC_CONN_MCU_TRST_B (MTK_PIN_NO(17) | 3) -#define PINMUX_GPIO17__FUNC_MD_INT0 (MTK_PIN_NO(17) | 4) -#define PINMUX_GPIO17__FUNC_ANT_SEL7 (MTK_PIN_NO(17) | 5) -#define PINMUX_GPIO17__FUNC_I2S3_MCK (MTK_PIN_NO(17) | 6) -#define PINMUX_GPIO17__FUNC_DBG_MON_A1 (MTK_PIN_NO(17) | 7) - -#define PINMUX_GPIO18__FUNC_GPIO18 (MTK_PIN_NO(18) | 0) -#define PINMUX_GPIO18__FUNC_DBPI_D5 (MTK_PIN_NO(18) | 1) -#define PINMUX_GPIO18__FUNC_SPI4_CSB (MTK_PIN_NO(18) | 2) -#define PINMUX_GPIO18__FUNC_CONN_MCU_DBGI_N (MTK_PIN_NO(18) | 3) -#define PINMUX_GPIO18__FUNC_MD_INT0 (MTK_PIN_NO(18) | 4) -#define PINMUX_GPIO18__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(18) | 5) -#define PINMUX_GPIO18__FUNC_I2S3_BCK (MTK_PIN_NO(18) | 6) -#define PINMUX_GPIO18__FUNC_DBG_MON_A2 (MTK_PIN_NO(18) | 7) - -#define PINMUX_GPIO19__FUNC_GPIO19 (MTK_PIN_NO(19) | 0) -#define PINMUX_GPIO19__FUNC_DBPI_D6 (MTK_PIN_NO(19) | 1) -#define PINMUX_GPIO19__FUNC_SPI4_MO (MTK_PIN_NO(19) | 2) -#define PINMUX_GPIO19__FUNC_CONN_MCU_TDO (MTK_PIN_NO(19) | 3) -#define PINMUX_GPIO19__FUNC_MD_INT2_C2K_UIM1_HOT_PLUG (MTK_PIN_NO(19) | 4) -#define PINMUX_GPIO19__FUNC_URXD1 (MTK_PIN_NO(19) | 5) -#define PINMUX_GPIO19__FUNC_I2S3_LRCK (MTK_PIN_NO(19) | 6) -#define PINMUX_GPIO19__FUNC_DBG_MON_A3 (MTK_PIN_NO(19) | 7) - -#define PINMUX_GPIO20__FUNC_GPIO20 (MTK_PIN_NO(20) | 0) -#define PINMUX_GPIO20__FUNC_DBPI_D7 (MTK_PIN_NO(20) | 1) -#define PINMUX_GPIO20__FUNC_SPI4_CLK (MTK_PIN_NO(20) | 2) -#define PINMUX_GPIO20__FUNC_CONN_MCU_DBGACK_N (MTK_PIN_NO(20) | 3) -#define PINMUX_GPIO20__FUNC_MD_INT1_C2K_UIM0_HOT_PLUG (MTK_PIN_NO(20) | 4) -#define PINMUX_GPIO20__FUNC_UTXD1 (MTK_PIN_NO(20) | 5) -#define PINMUX_GPIO20__FUNC_I2S3_DO (MTK_PIN_NO(20) | 6) -#define PINMUX_GPIO20__FUNC_DBG_MON_A19 (MTK_PIN_NO(20) | 7) - -#define PINMUX_GPIO21__FUNC_GPIO21 (MTK_PIN_NO(21) | 0) -#define PINMUX_GPIO21__FUNC_DBPI_D8 (MTK_PIN_NO(21) | 1) -#define PINMUX_GPIO21__FUNC_SPI3_MI (MTK_PIN_NO(21) | 2) -#define PINMUX_GPIO21__FUNC_CONN_MCU_TMS (MTK_PIN_NO(21) | 3) -#define PINMUX_GPIO21__FUNC_DAP_MD32_SWD (MTK_PIN_NO(21) | 4) -#define PINMUX_GPIO21__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(21) | 5) -#define PINMUX_GPIO21__FUNC_I2S2_MCK (MTK_PIN_NO(21) | 6) -#define PINMUX_GPIO21__FUNC_DBG_MON_B5 (MTK_PIN_NO(21) | 7) - -#define PINMUX_GPIO22__FUNC_GPIO22 (MTK_PIN_NO(22) | 0) -#define PINMUX_GPIO22__FUNC_DBPI_D9 (MTK_PIN_NO(22) | 1) -#define PINMUX_GPIO22__FUNC_SPI3_CSB (MTK_PIN_NO(22) | 2) -#define PINMUX_GPIO22__FUNC_CONN_MCU_TCK (MTK_PIN_NO(22) | 3) -#define PINMUX_GPIO22__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(22) | 4) -#define PINMUX_GPIO22__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(22) | 5) -#define PINMUX_GPIO22__FUNC_I2S2_BCK (MTK_PIN_NO(22) | 6) -#define PINMUX_GPIO22__FUNC_DBG_MON_B6 (MTK_PIN_NO(22) | 7) - -#define PINMUX_GPIO23__FUNC_GPIO23 (MTK_PIN_NO(23) | 0) -#define PINMUX_GPIO23__FUNC_DBPI_D10 (MTK_PIN_NO(23) | 1) -#define PINMUX_GPIO23__FUNC_SPI3_MO (MTK_PIN_NO(23) | 2) -#define PINMUX_GPIO23__FUNC_CONN_MCU_TDI (MTK_PIN_NO(23) | 3) -#define PINMUX_GPIO23__FUNC_UCTS1 (MTK_PIN_NO(23) | 4) -#define PINMUX_GPIO23__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(23) | 5) -#define PINMUX_GPIO23__FUNC_I2S2_LRCK (MTK_PIN_NO(23) | 6) -#define PINMUX_GPIO23__FUNC_DBG_MON_B7 (MTK_PIN_NO(23) | 7) - -#define PINMUX_GPIO24__FUNC_GPIO24 (MTK_PIN_NO(24) | 0) -#define PINMUX_GPIO24__FUNC_DBPI_D11 (MTK_PIN_NO(24) | 1) -#define PINMUX_GPIO24__FUNC_SPI3_CLK (MTK_PIN_NO(24) | 2) -#define PINMUX_GPIO24__FUNC_SRCLKENAI0 (MTK_PIN_NO(24) | 3) -#define PINMUX_GPIO24__FUNC_URTS1 (MTK_PIN_NO(24) | 4) -#define PINMUX_GPIO24__FUNC_IO_JTAG_TCK (MTK_PIN_NO(24) | 5) -#define PINMUX_GPIO24__FUNC_I2S2_DI (MTK_PIN_NO(24) | 6) -#define PINMUX_GPIO24__FUNC_DBG_MON_B31 (MTK_PIN_NO(24) | 7) - -#define PINMUX_GPIO25__FUNC_GPIO25 (MTK_PIN_NO(25) | 0) -#define PINMUX_GPIO25__FUNC_DBPI_HSYNC (MTK_PIN_NO(25) | 1) -#define PINMUX_GPIO25__FUNC_ANT_SEL0 (MTK_PIN_NO(25) | 2) -#define PINMUX_GPIO25__FUNC_SCL6 (MTK_PIN_NO(25) | 3) -#define PINMUX_GPIO25__FUNC_KPCOL2 (MTK_PIN_NO(25) | 4) -#define PINMUX_GPIO25__FUNC_IO_JTAG_TMS (MTK_PIN_NO(25) | 5) -#define PINMUX_GPIO25__FUNC_I2S1_MCK (MTK_PIN_NO(25) | 6) -#define PINMUX_GPIO25__FUNC_DBG_MON_B0 (MTK_PIN_NO(25) | 7) - -#define PINMUX_GPIO26__FUNC_GPIO26 (MTK_PIN_NO(26) | 0) -#define PINMUX_GPIO26__FUNC_DBPI_VSYNC (MTK_PIN_NO(26) | 1) -#define PINMUX_GPIO26__FUNC_ANT_SEL1 (MTK_PIN_NO(26) | 2) -#define PINMUX_GPIO26__FUNC_SDA6 (MTK_PIN_NO(26) | 3) -#define PINMUX_GPIO26__FUNC_KPROW2 (MTK_PIN_NO(26) | 4) -#define PINMUX_GPIO26__FUNC_IO_JTAG_TDI (MTK_PIN_NO(26) | 5) -#define PINMUX_GPIO26__FUNC_I2S1_BCK (MTK_PIN_NO(26) | 6) -#define PINMUX_GPIO26__FUNC_DBG_MON_B1 (MTK_PIN_NO(26) | 7) - -#define PINMUX_GPIO27__FUNC_GPIO27 (MTK_PIN_NO(27) | 0) -#define PINMUX_GPIO27__FUNC_DBPI_DE (MTK_PIN_NO(27) | 1) -#define PINMUX_GPIO27__FUNC_ANT_SEL2 (MTK_PIN_NO(27) | 2) -#define PINMUX_GPIO27__FUNC_SCL7 (MTK_PIN_NO(27) | 3) -#define PINMUX_GPIO27__FUNC_DMIC_CLK (MTK_PIN_NO(27) | 4) -#define PINMUX_GPIO27__FUNC_IO_JTAG_TDO (MTK_PIN_NO(27) | 5) -#define PINMUX_GPIO27__FUNC_I2S1_LRCK (MTK_PIN_NO(27) | 6) -#define PINMUX_GPIO27__FUNC_DBG_MON_B9 (MTK_PIN_NO(27) | 7) - -#define PINMUX_GPIO28__FUNC_GPIO28 (MTK_PIN_NO(28) | 0) -#define PINMUX_GPIO28__FUNC_DBPI_CK (MTK_PIN_NO(28) | 1) -#define PINMUX_GPIO28__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(28) | 2) -#define PINMUX_GPIO28__FUNC_SDA7 (MTK_PIN_NO(28) | 3) -#define PINMUX_GPIO28__FUNC_DMIC_DAT (MTK_PIN_NO(28) | 4) -#define PINMUX_GPIO28__FUNC_IO_JTAG_TRSTN (MTK_PIN_NO(28) | 5) -#define PINMUX_GPIO28__FUNC_I2S1_DO (MTK_PIN_NO(28) | 6) -#define PINMUX_GPIO28__FUNC_DBG_MON_B32 (MTK_PIN_NO(28) | 7) - -#define PINMUX_GPIO29__FUNC_GPIO29 (MTK_PIN_NO(29) | 0) -#define PINMUX_GPIO29__FUNC_MSDC1_CLK (MTK_PIN_NO(29) | 1) -#define PINMUX_GPIO29__FUNC_IO_JTAG_TCK (MTK_PIN_NO(29) | 2) -#define PINMUX_GPIO29__FUNC_UDI_TCK (MTK_PIN_NO(29) | 3) -#define PINMUX_GPIO29__FUNC_CONN_DSP_JCK (MTK_PIN_NO(29) | 4) -#define PINMUX_GPIO29__FUNC_SSPM_JTAG_TCK (MTK_PIN_NO(29) | 5) -#define PINMUX_GPIO29__FUNC_PCM1_CLK (MTK_PIN_NO(29) | 6) -#define PINMUX_GPIO29__FUNC_DBG_MON_A6 (MTK_PIN_NO(29) | 7) - -#define PINMUX_GPIO30__FUNC_GPIO30 (MTK_PIN_NO(30) | 0) -#define PINMUX_GPIO30__FUNC_MSDC1_DAT3 (MTK_PIN_NO(30) | 1) -#define PINMUX_GPIO30__FUNC_DAP_MD32_SWD (MTK_PIN_NO(30) | 2) -#define PINMUX_GPIO30__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(30) | 3) -#define PINMUX_GPIO30__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(30) | 4) -#define PINMUX_GPIO30__FUNC_SSPM_JTAG_TRSTN (MTK_PIN_NO(30) | 5) -#define PINMUX_GPIO30__FUNC_PCM1_DI (MTK_PIN_NO(30) | 6) -#define PINMUX_GPIO30__FUNC_DBG_MON_A7 (MTK_PIN_NO(30) | 7) - -#define PINMUX_GPIO31__FUNC_GPIO31 (MTK_PIN_NO(31) | 0) -#define PINMUX_GPIO31__FUNC_MSDC1_CMD (MTK_PIN_NO(31) | 1) -#define PINMUX_GPIO31__FUNC_IO_JTAG_TMS (MTK_PIN_NO(31) | 2) -#define PINMUX_GPIO31__FUNC_UDI_TMS (MTK_PIN_NO(31) | 3) -#define PINMUX_GPIO31__FUNC_CONN_DSP_JMS (MTK_PIN_NO(31) | 4) -#define PINMUX_GPIO31__FUNC_SSPM_JTAG_TMS (MTK_PIN_NO(31) | 5) -#define PINMUX_GPIO31__FUNC_PCM1_SYNC (MTK_PIN_NO(31) | 6) -#define PINMUX_GPIO31__FUNC_DBG_MON_A8 (MTK_PIN_NO(31) | 7) - -#define PINMUX_GPIO32__FUNC_GPIO32 (MTK_PIN_NO(32) | 0) -#define PINMUX_GPIO32__FUNC_MSDC1_DAT0 (MTK_PIN_NO(32) | 1) -#define PINMUX_GPIO32__FUNC_IO_JTAG_TDI (MTK_PIN_NO(32) | 2) -#define PINMUX_GPIO32__FUNC_UDI_TDI (MTK_PIN_NO(32) | 3) -#define PINMUX_GPIO32__FUNC_CONN_DSP_JDI (MTK_PIN_NO(32) | 4) -#define PINMUX_GPIO32__FUNC_SSPM_JTAG_TDI (MTK_PIN_NO(32) | 5) -#define PINMUX_GPIO32__FUNC_PCM1_DO0 (MTK_PIN_NO(32) | 6) -#define PINMUX_GPIO32__FUNC_DBG_MON_A9 (MTK_PIN_NO(32) | 7) - -#define PINMUX_GPIO33__FUNC_GPIO33 (MTK_PIN_NO(33) | 0) -#define PINMUX_GPIO33__FUNC_MSDC1_DAT2 (MTK_PIN_NO(33) | 1) -#define PINMUX_GPIO33__FUNC_IO_JTAG_TRSTN (MTK_PIN_NO(33) | 2) -#define PINMUX_GPIO33__FUNC_UDI_NTRST (MTK_PIN_NO(33) | 3) -#define PINMUX_GPIO33__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(33) | 4) -#define PINMUX_GPIO33__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(33) | 5) -#define PINMUX_GPIO33__FUNC_PCM1_DO2 (MTK_PIN_NO(33) | 6) -#define PINMUX_GPIO33__FUNC_DBG_MON_A10 (MTK_PIN_NO(33) | 7) - -#define PINMUX_GPIO34__FUNC_GPIO34 (MTK_PIN_NO(34) | 0) -#define PINMUX_GPIO34__FUNC_MSDC1_DAT1 (MTK_PIN_NO(34) | 1) -#define PINMUX_GPIO34__FUNC_IO_JTAG_TDO (MTK_PIN_NO(34) | 2) -#define PINMUX_GPIO34__FUNC_UDI_TDO (MTK_PIN_NO(34) | 3) -#define PINMUX_GPIO34__FUNC_CONN_DSP_JDO (MTK_PIN_NO(34) | 4) -#define PINMUX_GPIO34__FUNC_SSPM_JTAG_TDO (MTK_PIN_NO(34) | 5) -#define PINMUX_GPIO34__FUNC_PCM1_DO1 (MTK_PIN_NO(34) | 6) -#define PINMUX_GPIO34__FUNC_DBG_MON_A11 (MTK_PIN_NO(34) | 7) - -#define PINMUX_GPIO35__FUNC_GPIO35 (MTK_PIN_NO(35) | 0) -#define PINMUX_GPIO35__FUNC_MD1_SIM2_SIO (MTK_PIN_NO(35) | 1) -#define PINMUX_GPIO35__FUNC_CCU_JTAG_TDO (MTK_PIN_NO(35) | 2) -#define PINMUX_GPIO35__FUNC_MD1_SIM1_SIO (MTK_PIN_NO(35) | 3) -#define PINMUX_GPIO35__FUNC_SCP_JTAG_TDO (MTK_PIN_NO(35) | 5) -#define PINMUX_GPIO35__FUNC_CONN_DSP_JMS (MTK_PIN_NO(35) | 6) -#define PINMUX_GPIO35__FUNC_DBG_MON_A28 (MTK_PIN_NO(35) | 7) - -#define PINMUX_GPIO36__FUNC_GPIO36 (MTK_PIN_NO(36) | 0) -#define PINMUX_GPIO36__FUNC_MD1_SIM2_SRST (MTK_PIN_NO(36) | 1) -#define PINMUX_GPIO36__FUNC_CCU_JTAG_TMS (MTK_PIN_NO(36) | 2) -#define PINMUX_GPIO36__FUNC_MD1_SIM1_SRST (MTK_PIN_NO(36) | 3) -#define PINMUX_GPIO36__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(36) | 4) -#define PINMUX_GPIO36__FUNC_SCP_JTAG_TMS (MTK_PIN_NO(36) | 5) -#define PINMUX_GPIO36__FUNC_CONN_DSP_JINTP (MTK_PIN_NO(36) | 6) -#define PINMUX_GPIO36__FUNC_DBG_MON_A29 (MTK_PIN_NO(36) | 7) - -#define PINMUX_GPIO37__FUNC_GPIO37 (MTK_PIN_NO(37) | 0) -#define PINMUX_GPIO37__FUNC_MD1_SIM2_SCLK (MTK_PIN_NO(37) | 1) -#define PINMUX_GPIO37__FUNC_CCU_JTAG_TDI (MTK_PIN_NO(37) | 2) -#define PINMUX_GPIO37__FUNC_MD1_SIM1_SCLK (MTK_PIN_NO(37) | 3) -#define PINMUX_GPIO37__FUNC_SCP_JTAG_TDI (MTK_PIN_NO(37) | 5) -#define PINMUX_GPIO37__FUNC_CONN_DSP_JDO (MTK_PIN_NO(37) | 6) -#define PINMUX_GPIO37__FUNC_DBG_MON_A30 (MTK_PIN_NO(37) | 7) - -#define PINMUX_GPIO38__FUNC_GPIO38 (MTK_PIN_NO(38) | 0) -#define PINMUX_GPIO38__FUNC_MD1_SIM1_SCLK (MTK_PIN_NO(38) | 1) -#define PINMUX_GPIO38__FUNC_MD1_SIM2_SCLK (MTK_PIN_NO(38) | 3) -#define PINMUX_GPIO38__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(38) | 4) -#define PINMUX_GPIO38__FUNC_DBG_MON_A20 (MTK_PIN_NO(38) | 7) - -#define PINMUX_GPIO39__FUNC_GPIO39 (MTK_PIN_NO(39) | 0) -#define PINMUX_GPIO39__FUNC_MD1_SIM1_SRST (MTK_PIN_NO(39) | 1) -#define PINMUX_GPIO39__FUNC_CCU_JTAG_TCK (MTK_PIN_NO(39) | 2) -#define PINMUX_GPIO39__FUNC_MD1_SIM2_SRST (MTK_PIN_NO(39) | 3) -#define PINMUX_GPIO39__FUNC_SCP_JTAG_TCK (MTK_PIN_NO(39) | 5) -#define PINMUX_GPIO39__FUNC_CONN_DSP_JCK (MTK_PIN_NO(39) | 6) -#define PINMUX_GPIO39__FUNC_DBG_MON_A31 (MTK_PIN_NO(39) | 7) - -#define PINMUX_GPIO40__FUNC_GPIO40 (MTK_PIN_NO(40) | 0) -#define PINMUX_GPIO40__FUNC_MD1_SIM1_SIO (MTK_PIN_NO(40) | 1) -#define PINMUX_GPIO40__FUNC_CCU_JTAG_TRST (MTK_PIN_NO(40) | 2) -#define PINMUX_GPIO40__FUNC_MD1_SIM2_SIO (MTK_PIN_NO(40) | 3) -#define PINMUX_GPIO40__FUNC_SCP_JTAG_TRSTN (MTK_PIN_NO(40) | 5) -#define PINMUX_GPIO40__FUNC_CONN_DSP_JDI (MTK_PIN_NO(40) | 6) -#define PINMUX_GPIO40__FUNC_DBG_MON_A32 (MTK_PIN_NO(40) | 7) - -#define PINMUX_GPIO41__FUNC_GPIO41 (MTK_PIN_NO(41) | 0) -#define PINMUX_GPIO41__FUNC_IDDIG (MTK_PIN_NO(41) | 1) -#define PINMUX_GPIO41__FUNC_URXD1 (MTK_PIN_NO(41) | 2) -#define PINMUX_GPIO41__FUNC_UCTS0 (MTK_PIN_NO(41) | 3) -#define PINMUX_GPIO41__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(41) | 4) -#define PINMUX_GPIO41__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(41) | 5) -#define PINMUX_GPIO41__FUNC_DMIC_CLK (MTK_PIN_NO(41) | 6) - -#define PINMUX_GPIO42__FUNC_GPIO42 (MTK_PIN_NO(42) | 0) -#define PINMUX_GPIO42__FUNC_USB_DRVVBUS (MTK_PIN_NO(42) | 1) -#define PINMUX_GPIO42__FUNC_UTXD1 (MTK_PIN_NO(42) | 2) -#define PINMUX_GPIO42__FUNC_URTS0 (MTK_PIN_NO(42) | 3) -#define PINMUX_GPIO42__FUNC_SSPM_URXD_AO (MTK_PIN_NO(42) | 4) -#define PINMUX_GPIO42__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(42) | 5) -#define PINMUX_GPIO42__FUNC_DMIC_DAT (MTK_PIN_NO(42) | 6) - -#define PINMUX_GPIO43__FUNC_GPIO43 (MTK_PIN_NO(43) | 0) -#define PINMUX_GPIO43__FUNC_DISP_PWM (MTK_PIN_NO(43) | 1) - -#define PINMUX_GPIO44__FUNC_GPIO44 (MTK_PIN_NO(44) | 0) -#define PINMUX_GPIO44__FUNC_DSI_TE (MTK_PIN_NO(44) | 1) - -#define PINMUX_GPIO45__FUNC_GPIO45 (MTK_PIN_NO(45) | 0) -#define PINMUX_GPIO45__FUNC_LCM_RST (MTK_PIN_NO(45) | 1) - -#define PINMUX_GPIO46__FUNC_GPIO46 (MTK_PIN_NO(46) | 0) -#define PINMUX_GPIO46__FUNC_MD_INT2_C2K_UIM1_HOT_PLUG (MTK_PIN_NO(46) | 1) -#define PINMUX_GPIO46__FUNC_URXD1 (MTK_PIN_NO(46) | 2) -#define PINMUX_GPIO46__FUNC_UCTS1 (MTK_PIN_NO(46) | 3) -#define PINMUX_GPIO46__FUNC_CCU_UTXD_AO (MTK_PIN_NO(46) | 4) -#define PINMUX_GPIO46__FUNC_TP_UCTS1_AO (MTK_PIN_NO(46) | 5) -#define PINMUX_GPIO46__FUNC_IDDIG (MTK_PIN_NO(46) | 6) -#define PINMUX_GPIO46__FUNC_I2S5_LRCK (MTK_PIN_NO(46) | 7) - -#define PINMUX_GPIO47__FUNC_GPIO47 (MTK_PIN_NO(47) | 0) -#define PINMUX_GPIO47__FUNC_MD_INT1_C2K_UIM0_HOT_PLUG (MTK_PIN_NO(47) | 1) -#define PINMUX_GPIO47__FUNC_UTXD1 (MTK_PIN_NO(47) | 2) -#define PINMUX_GPIO47__FUNC_URTS1 (MTK_PIN_NO(47) | 3) -#define PINMUX_GPIO47__FUNC_CCU_URXD_AO (MTK_PIN_NO(47) | 4) -#define PINMUX_GPIO47__FUNC_TP_URTS1_AO (MTK_PIN_NO(47) | 5) -#define PINMUX_GPIO47__FUNC_USB_DRVVBUS (MTK_PIN_NO(47) | 6) -#define PINMUX_GPIO47__FUNC_I2S5_DO (MTK_PIN_NO(47) | 7) - -#define PINMUX_GPIO48__FUNC_GPIO48 (MTK_PIN_NO(48) | 0) -#define PINMUX_GPIO48__FUNC_SCL5 (MTK_PIN_NO(48) | 1) - -#define PINMUX_GPIO49__FUNC_GPIO49 (MTK_PIN_NO(49) | 0) -#define PINMUX_GPIO49__FUNC_SDA5 (MTK_PIN_NO(49) | 1) - -#define PINMUX_GPIO50__FUNC_GPIO50 (MTK_PIN_NO(50) | 0) -#define PINMUX_GPIO50__FUNC_SCL3 (MTK_PIN_NO(50) | 1) - -#define PINMUX_GPIO51__FUNC_GPIO51 (MTK_PIN_NO(51) | 0) -#define PINMUX_GPIO51__FUNC_SDA3 (MTK_PIN_NO(51) | 1) - -#define PINMUX_GPIO52__FUNC_GPIO52 (MTK_PIN_NO(52) | 0) -#define PINMUX_GPIO52__FUNC_BPI_ANT2 (MTK_PIN_NO(52) | 1) - -#define PINMUX_GPIO53__FUNC_GPIO53 (MTK_PIN_NO(53) | 0) -#define PINMUX_GPIO53__FUNC_BPI_ANT0 (MTK_PIN_NO(53) | 1) - -#define PINMUX_GPIO54__FUNC_GPIO54 (MTK_PIN_NO(54) | 0) -#define PINMUX_GPIO54__FUNC_BPI_OLAT1 (MTK_PIN_NO(54) | 1) - -#define PINMUX_GPIO55__FUNC_GPIO55 (MTK_PIN_NO(55) | 0) -#define PINMUX_GPIO55__FUNC_BPI_BUS8 (MTK_PIN_NO(55) | 1) - -#define PINMUX_GPIO56__FUNC_GPIO56 (MTK_PIN_NO(56) | 0) -#define PINMUX_GPIO56__FUNC_BPI_BUS9 (MTK_PIN_NO(56) | 1) -#define PINMUX_GPIO56__FUNC_SCL_6306 (MTK_PIN_NO(56) | 2) - -#define PINMUX_GPIO57__FUNC_GPIO57 (MTK_PIN_NO(57) | 0) -#define PINMUX_GPIO57__FUNC_BPI_BUS10 (MTK_PIN_NO(57) | 1) -#define PINMUX_GPIO57__FUNC_SDA_6306 (MTK_PIN_NO(57) | 2) - -#define PINMUX_GPIO58__FUNC_GPIO58 (MTK_PIN_NO(58) | 0) -#define PINMUX_GPIO58__FUNC_RFIC0_BSI_D2 (MTK_PIN_NO(58) | 1) -#define PINMUX_GPIO58__FUNC_SPM_BSI_D2 (MTK_PIN_NO(58) | 2) -#define PINMUX_GPIO58__FUNC_PWM_B (MTK_PIN_NO(58) | 3) - -#define PINMUX_GPIO59__FUNC_GPIO59 (MTK_PIN_NO(59) | 0) -#define PINMUX_GPIO59__FUNC_RFIC0_BSI_D1 (MTK_PIN_NO(59) | 1) -#define PINMUX_GPIO59__FUNC_SPM_BSI_D1 (MTK_PIN_NO(59) | 2) - -#define PINMUX_GPIO60__FUNC_GPIO60 (MTK_PIN_NO(60) | 0) -#define PINMUX_GPIO60__FUNC_RFIC0_BSI_D0 (MTK_PIN_NO(60) | 1) -#define PINMUX_GPIO60__FUNC_SPM_BSI_D0 (MTK_PIN_NO(60) | 2) - -#define PINMUX_GPIO61__FUNC_GPIO61 (MTK_PIN_NO(61) | 0) -#define PINMUX_GPIO61__FUNC_MIPI1_SDATA (MTK_PIN_NO(61) | 1) - -#define PINMUX_GPIO62__FUNC_GPIO62 (MTK_PIN_NO(62) | 0) -#define PINMUX_GPIO62__FUNC_MIPI1_SCLK (MTK_PIN_NO(62) | 1) - -#define PINMUX_GPIO63__FUNC_GPIO63 (MTK_PIN_NO(63) | 0) -#define PINMUX_GPIO63__FUNC_MIPI0_SDATA (MTK_PIN_NO(63) | 1) - -#define PINMUX_GPIO64__FUNC_GPIO64 (MTK_PIN_NO(64) | 0) -#define PINMUX_GPIO64__FUNC_MIPI0_SCLK (MTK_PIN_NO(64) | 1) - -#define PINMUX_GPIO65__FUNC_GPIO65 (MTK_PIN_NO(65) | 0) -#define PINMUX_GPIO65__FUNC_MIPI3_SDATA (MTK_PIN_NO(65) | 1) -#define PINMUX_GPIO65__FUNC_BPI_OLAT2 (MTK_PIN_NO(65) | 2) - -#define PINMUX_GPIO66__FUNC_GPIO66 (MTK_PIN_NO(66) | 0) -#define PINMUX_GPIO66__FUNC_MIPI3_SCLK (MTK_PIN_NO(66) | 1) -#define PINMUX_GPIO66__FUNC_BPI_OLAT3 (MTK_PIN_NO(66) | 2) - -#define PINMUX_GPIO67__FUNC_GPIO67 (MTK_PIN_NO(67) | 0) -#define PINMUX_GPIO67__FUNC_MIPI2_SDATA (MTK_PIN_NO(67) | 1) - -#define PINMUX_GPIO68__FUNC_GPIO68 (MTK_PIN_NO(68) | 0) -#define PINMUX_GPIO68__FUNC_MIPI2_SCLK (MTK_PIN_NO(68) | 1) - -#define PINMUX_GPIO69__FUNC_GPIO69 (MTK_PIN_NO(69) | 0) -#define PINMUX_GPIO69__FUNC_BPI_BUS7 (MTK_PIN_NO(69) | 1) - -#define PINMUX_GPIO70__FUNC_GPIO70 (MTK_PIN_NO(70) | 0) -#define PINMUX_GPIO70__FUNC_BPI_BUS6 (MTK_PIN_NO(70) | 1) - -#define PINMUX_GPIO71__FUNC_GPIO71 (MTK_PIN_NO(71) | 0) -#define PINMUX_GPIO71__FUNC_BPI_BUS5 (MTK_PIN_NO(71) | 1) - -#define PINMUX_GPIO72__FUNC_GPIO72 (MTK_PIN_NO(72) | 0) -#define PINMUX_GPIO72__FUNC_BPI_BUS4 (MTK_PIN_NO(72) | 1) - -#define PINMUX_GPIO73__FUNC_GPIO73 (MTK_PIN_NO(73) | 0) -#define PINMUX_GPIO73__FUNC_BPI_BUS3 (MTK_PIN_NO(73) | 1) - -#define PINMUX_GPIO74__FUNC_GPIO74 (MTK_PIN_NO(74) | 0) -#define PINMUX_GPIO74__FUNC_BPI_BUS2 (MTK_PIN_NO(74) | 1) - -#define PINMUX_GPIO75__FUNC_GPIO75 (MTK_PIN_NO(75) | 0) -#define PINMUX_GPIO75__FUNC_BPI_BUS1 (MTK_PIN_NO(75) | 1) - -#define PINMUX_GPIO76__FUNC_GPIO76 (MTK_PIN_NO(76) | 0) -#define PINMUX_GPIO76__FUNC_BPI_BUS0 (MTK_PIN_NO(76) | 1) - -#define PINMUX_GPIO77__FUNC_GPIO77 (MTK_PIN_NO(77) | 0) -#define PINMUX_GPIO77__FUNC_BPI_ANT1 (MTK_PIN_NO(77) | 1) - -#define PINMUX_GPIO78__FUNC_GPIO78 (MTK_PIN_NO(78) | 0) -#define PINMUX_GPIO78__FUNC_BPI_OLAT0 (MTK_PIN_NO(78) | 1) - -#define PINMUX_GPIO79__FUNC_GPIO79 (MTK_PIN_NO(79) | 0) -#define PINMUX_GPIO79__FUNC_BPI_PA_VM1 (MTK_PIN_NO(79) | 1) -#define PINMUX_GPIO79__FUNC_MIPI4_SDATA (MTK_PIN_NO(79) | 2) - -#define PINMUX_GPIO80__FUNC_GPIO80 (MTK_PIN_NO(80) | 0) -#define PINMUX_GPIO80__FUNC_BPI_PA_VM0 (MTK_PIN_NO(80) | 1) -#define PINMUX_GPIO80__FUNC_MIPI4_SCLK (MTK_PIN_NO(80) | 2) - -#define PINMUX_GPIO81__FUNC_GPIO81 (MTK_PIN_NO(81) | 0) -#define PINMUX_GPIO81__FUNC_SDA1 (MTK_PIN_NO(81) | 1) - -#define PINMUX_GPIO82__FUNC_GPIO82 (MTK_PIN_NO(82) | 0) -#define PINMUX_GPIO82__FUNC_SDA0 (MTK_PIN_NO(82) | 1) - -#define PINMUX_GPIO83__FUNC_GPIO83 (MTK_PIN_NO(83) | 0) -#define PINMUX_GPIO83__FUNC_SCL0 (MTK_PIN_NO(83) | 1) - -#define PINMUX_GPIO84__FUNC_GPIO84 (MTK_PIN_NO(84) | 0) -#define PINMUX_GPIO84__FUNC_SCL1 (MTK_PIN_NO(84) | 1) - -#define PINMUX_GPIO85__FUNC_GPIO85 (MTK_PIN_NO(85) | 0) -#define PINMUX_GPIO85__FUNC_SPI0_MI (MTK_PIN_NO(85) | 1) -#define PINMUX_GPIO85__FUNC_SCP_SPI0_MI (MTK_PIN_NO(85) | 2) -#define PINMUX_GPIO85__FUNC_CLKM3 (MTK_PIN_NO(85) | 3) -#define PINMUX_GPIO85__FUNC_I2S1_BCK (MTK_PIN_NO(85) | 4) -#define PINMUX_GPIO85__FUNC_MFG_DFD_JTAG_TDO (MTK_PIN_NO(85) | 5) -#define PINMUX_GPIO85__FUNC_DFD_TDO (MTK_PIN_NO(85) | 6) -#define PINMUX_GPIO85__FUNC_JTDO_SEL1 (MTK_PIN_NO(85) | 7) - -#define PINMUX_GPIO86__FUNC_GPIO86 (MTK_PIN_NO(86) | 0) -#define PINMUX_GPIO86__FUNC_SPI0_CSB (MTK_PIN_NO(86) | 1) -#define PINMUX_GPIO86__FUNC_SCP_SPI0_CS (MTK_PIN_NO(86) | 2) -#define PINMUX_GPIO86__FUNC_CLKM0 (MTK_PIN_NO(86) | 3) -#define PINMUX_GPIO86__FUNC_I2S1_LRCK (MTK_PIN_NO(86) | 4) -#define PINMUX_GPIO86__FUNC_MFG_DFD_JTAG_TMS (MTK_PIN_NO(86) | 5) -#define PINMUX_GPIO86__FUNC_DFD_TMS (MTK_PIN_NO(86) | 6) -#define PINMUX_GPIO86__FUNC_JTMS_SEL1 (MTK_PIN_NO(86) | 7) - -#define PINMUX_GPIO87__FUNC_GPIO87 (MTK_PIN_NO(87) | 0) -#define PINMUX_GPIO87__FUNC_SPI0_MO (MTK_PIN_NO(87) | 1) -#define PINMUX_GPIO87__FUNC_SCP_SPI0_MO (MTK_PIN_NO(87) | 2) -#define PINMUX_GPIO87__FUNC_SDA1 (MTK_PIN_NO(87) | 3) -#define PINMUX_GPIO87__FUNC_I2S1_DO (MTK_PIN_NO(87) | 4) -#define PINMUX_GPIO87__FUNC_MFG_DFD_JTAG_TDI (MTK_PIN_NO(87) | 5) -#define PINMUX_GPIO87__FUNC_DFD_TDI (MTK_PIN_NO(87) | 6) -#define PINMUX_GPIO87__FUNC_JTDI_SEL1 (MTK_PIN_NO(87) | 7) - -#define PINMUX_GPIO88__FUNC_GPIO88 (MTK_PIN_NO(88) | 0) -#define PINMUX_GPIO88__FUNC_SPI0_CLK (MTK_PIN_NO(88) | 1) -#define PINMUX_GPIO88__FUNC_SCP_SPI0_CK (MTK_PIN_NO(88) | 2) -#define PINMUX_GPIO88__FUNC_SCL1 (MTK_PIN_NO(88) | 3) -#define PINMUX_GPIO88__FUNC_I2S1_MCK (MTK_PIN_NO(88) | 4) -#define PINMUX_GPIO88__FUNC_MFG_DFD_JTAG_TCK (MTK_PIN_NO(88) | 5) -#define PINMUX_GPIO88__FUNC_DFD_TCK_XI (MTK_PIN_NO(88) | 6) -#define PINMUX_GPIO88__FUNC_JTCK_SEL1 (MTK_PIN_NO(88) | 7) - -#define PINMUX_GPIO89__FUNC_GPIO89 (MTK_PIN_NO(89) | 0) -#define PINMUX_GPIO89__FUNC_SRCLKENAI0 (MTK_PIN_NO(89) | 1) -#define PINMUX_GPIO89__FUNC_PWM_C (MTK_PIN_NO(89) | 2) -#define PINMUX_GPIO89__FUNC_I2S5_BCK (MTK_PIN_NO(89) | 3) -#define PINMUX_GPIO89__FUNC_ANT_SEL6 (MTK_PIN_NO(89) | 4) -#define PINMUX_GPIO89__FUNC_SDA8 (MTK_PIN_NO(89) | 5) -#define PINMUX_GPIO89__FUNC_CMVREF0 (MTK_PIN_NO(89) | 6) -#define PINMUX_GPIO89__FUNC_DBG_MON_A21 (MTK_PIN_NO(89) | 7) - -#define PINMUX_GPIO90__FUNC_GPIO90 (MTK_PIN_NO(90) | 0) -#define PINMUX_GPIO90__FUNC_PWM_A (MTK_PIN_NO(90) | 1) -#define PINMUX_GPIO90__FUNC_CMMCLK2 (MTK_PIN_NO(90) | 2) -#define PINMUX_GPIO90__FUNC_I2S5_LRCK (MTK_PIN_NO(90) | 3) -#define PINMUX_GPIO90__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(90) | 4) -#define PINMUX_GPIO90__FUNC_SCL8 (MTK_PIN_NO(90) | 5) -#define PINMUX_GPIO90__FUNC_PTA_RXD (MTK_PIN_NO(90) | 6) -#define PINMUX_GPIO90__FUNC_DBG_MON_A22 (MTK_PIN_NO(90) | 7) - -#define PINMUX_GPIO91__FUNC_GPIO91 (MTK_PIN_NO(91) | 0) -#define PINMUX_GPIO91__FUNC_KPROW1 (MTK_PIN_NO(91) | 1) -#define PINMUX_GPIO91__FUNC_PWM_B (MTK_PIN_NO(91) | 2) -#define PINMUX_GPIO91__FUNC_I2S5_DO (MTK_PIN_NO(91) | 3) -#define PINMUX_GPIO91__FUNC_ANT_SEL7 (MTK_PIN_NO(91) | 4) -#define PINMUX_GPIO91__FUNC_CMMCLK3 (MTK_PIN_NO(91) | 5) -#define PINMUX_GPIO91__FUNC_PTA_TXD (MTK_PIN_NO(91) | 6) - -#define PINMUX_GPIO92__FUNC_GPIO92 (MTK_PIN_NO(92) | 0) -#define PINMUX_GPIO92__FUNC_KPROW0 (MTK_PIN_NO(92) | 1) - -#define PINMUX_GPIO93__FUNC_GPIO93 (MTK_PIN_NO(93) | 0) -#define PINMUX_GPIO93__FUNC_KPCOL0 (MTK_PIN_NO(93) | 1) -#define PINMUX_GPIO93__FUNC_DBG_MON_B27 (MTK_PIN_NO(93) | 7) - -#define PINMUX_GPIO94__FUNC_GPIO94 (MTK_PIN_NO(94) | 0) -#define PINMUX_GPIO94__FUNC_KPCOL1 (MTK_PIN_NO(94) | 1) -#define PINMUX_GPIO94__FUNC_I2S2_DI2 (MTK_PIN_NO(94) | 2) -#define PINMUX_GPIO94__FUNC_I2S5_MCK (MTK_PIN_NO(94) | 3) -#define PINMUX_GPIO94__FUNC_CMMCLK2 (MTK_PIN_NO(94) | 4) -#define PINMUX_GPIO94__FUNC_SCP_SPI2_MI (MTK_PIN_NO(94) | 5) -#define PINMUX_GPIO94__FUNC_SRCLKENAI1 (MTK_PIN_NO(94) | 6) -#define PINMUX_GPIO94__FUNC_SPI2_MI (MTK_PIN_NO(94) | 7) - -#define PINMUX_GPIO95__FUNC_GPIO95 (MTK_PIN_NO(95) | 0) -#define PINMUX_GPIO95__FUNC_URXD0 (MTK_PIN_NO(95) | 1) -#define PINMUX_GPIO95__FUNC_UTXD0 (MTK_PIN_NO(95) | 2) -#define PINMUX_GPIO95__FUNC_MD_URXD0 (MTK_PIN_NO(95) | 3) -#define PINMUX_GPIO95__FUNC_MD_URXD1 (MTK_PIN_NO(95) | 4) -#define PINMUX_GPIO95__FUNC_SSPM_URXD_AO (MTK_PIN_NO(95) | 5) -#define PINMUX_GPIO95__FUNC_CCU_URXD_AO (MTK_PIN_NO(95) | 6) - -#define PINMUX_GPIO96__FUNC_GPIO96 (MTK_PIN_NO(96) | 0) -#define PINMUX_GPIO96__FUNC_UTXD0 (MTK_PIN_NO(96) | 1) -#define PINMUX_GPIO96__FUNC_URXD0 (MTK_PIN_NO(96) | 2) -#define PINMUX_GPIO96__FUNC_MD_UTXD0 (MTK_PIN_NO(96) | 3) -#define PINMUX_GPIO96__FUNC_MD_UTXD1 (MTK_PIN_NO(96) | 4) -#define PINMUX_GPIO96__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(96) | 5) -#define PINMUX_GPIO96__FUNC_CCU_UTXD_AO (MTK_PIN_NO(96) | 6) -#define PINMUX_GPIO96__FUNC_DBG_MON_B2 (MTK_PIN_NO(96) | 7) - -#define PINMUX_GPIO97__FUNC_GPIO97 (MTK_PIN_NO(97) | 0) -#define PINMUX_GPIO97__FUNC_UCTS0 (MTK_PIN_NO(97) | 1) -#define PINMUX_GPIO97__FUNC_I2S2_MCK (MTK_PIN_NO(97) | 2) -#define PINMUX_GPIO97__FUNC_IDDIG (MTK_PIN_NO(97) | 3) -#define PINMUX_GPIO97__FUNC_CONN_MCU_TDO (MTK_PIN_NO(97) | 4) -#define PINMUX_GPIO97__FUNC_SSPM_JTAG_TDO (MTK_PIN_NO(97) | 5) -#define PINMUX_GPIO97__FUNC_IO_JTAG_TDO (MTK_PIN_NO(97) | 6) -#define PINMUX_GPIO97__FUNC_DBG_MON_B3 (MTK_PIN_NO(97) | 7) - -#define PINMUX_GPIO98__FUNC_GPIO98 (MTK_PIN_NO(98) | 0) -#define PINMUX_GPIO98__FUNC_URTS0 (MTK_PIN_NO(98) | 1) -#define PINMUX_GPIO98__FUNC_I2S2_BCK (MTK_PIN_NO(98) | 2) -#define PINMUX_GPIO98__FUNC_USB_DRVVBUS (MTK_PIN_NO(98) | 3) -#define PINMUX_GPIO98__FUNC_CONN_MCU_TMS (MTK_PIN_NO(98) | 4) -#define PINMUX_GPIO98__FUNC_SSPM_JTAG_TMS (MTK_PIN_NO(98) | 5) -#define PINMUX_GPIO98__FUNC_IO_JTAG_TMS (MTK_PIN_NO(98) | 6) -#define PINMUX_GPIO98__FUNC_DBG_MON_B4 (MTK_PIN_NO(98) | 7) - -#define PINMUX_GPIO99__FUNC_GPIO99 (MTK_PIN_NO(99) | 0) -#define PINMUX_GPIO99__FUNC_CMMCLK0 (MTK_PIN_NO(99) | 1) -#define PINMUX_GPIO99__FUNC_CONN_MCU_AICE_TMSC (MTK_PIN_NO(99) | 4) -#define PINMUX_GPIO99__FUNC_DBG_MON_B28 (MTK_PIN_NO(99) | 7) - -#define PINMUX_GPIO100__FUNC_GPIO100 (MTK_PIN_NO(100) | 0) -#define PINMUX_GPIO100__FUNC_CMMCLK1 (MTK_PIN_NO(100) | 1) -#define PINMUX_GPIO100__FUNC_PWM_C (MTK_PIN_NO(100) | 2) -#define PINMUX_GPIO100__FUNC_MD_INT1_C2K_UIM0_HOT_PLUG (MTK_PIN_NO(100) | 3) -#define PINMUX_GPIO100__FUNC_CONN_MCU_AICE_TCKC (MTK_PIN_NO(100) | 4) -#define PINMUX_GPIO100__FUNC_DBG_MON_B29 (MTK_PIN_NO(100) | 7) - -#define PINMUX_GPIO101__FUNC_GPIO101 (MTK_PIN_NO(101) | 0) -#define PINMUX_GPIO101__FUNC_CLKM2 (MTK_PIN_NO(101) | 1) -#define PINMUX_GPIO101__FUNC_I2S2_LRCK (MTK_PIN_NO(101) | 2) -#define PINMUX_GPIO101__FUNC_CMVREF1 (MTK_PIN_NO(101) | 3) -#define PINMUX_GPIO101__FUNC_CONN_MCU_TCK (MTK_PIN_NO(101) | 4) -#define PINMUX_GPIO101__FUNC_SSPM_JTAG_TCK (MTK_PIN_NO(101) | 5) -#define PINMUX_GPIO101__FUNC_IO_JTAG_TCK (MTK_PIN_NO(101) | 6) - -#define PINMUX_GPIO102__FUNC_GPIO102 (MTK_PIN_NO(102) | 0) -#define PINMUX_GPIO102__FUNC_CLKM1 (MTK_PIN_NO(102) | 1) -#define PINMUX_GPIO102__FUNC_I2S2_DI (MTK_PIN_NO(102) | 2) -#define PINMUX_GPIO102__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(102) | 3) -#define PINMUX_GPIO102__FUNC_CONN_MCU_TDI (MTK_PIN_NO(102) | 4) -#define PINMUX_GPIO102__FUNC_SSPM_JTAG_TDI (MTK_PIN_NO(102) | 5) -#define PINMUX_GPIO102__FUNC_IO_JTAG_TDI (MTK_PIN_NO(102) | 6) -#define PINMUX_GPIO102__FUNC_DBG_MON_B8 (MTK_PIN_NO(102) | 7) - -#define PINMUX_GPIO103__FUNC_GPIO103 (MTK_PIN_NO(103) | 0) -#define PINMUX_GPIO103__FUNC_SCL2 (MTK_PIN_NO(103) | 1) - -#define PINMUX_GPIO104__FUNC_GPIO104 (MTK_PIN_NO(104) | 0) -#define PINMUX_GPIO104__FUNC_SDA2 (MTK_PIN_NO(104) | 1) - -#define PINMUX_GPIO105__FUNC_GPIO105 (MTK_PIN_NO(105) | 0) -#define PINMUX_GPIO105__FUNC_SCL4 (MTK_PIN_NO(105) | 1) - -#define PINMUX_GPIO106__FUNC_GPIO106 (MTK_PIN_NO(106) | 0) -#define PINMUX_GPIO106__FUNC_SDA4 (MTK_PIN_NO(106) | 1) - -#define PINMUX_GPIO107__FUNC_GPIO107 (MTK_PIN_NO(107) | 0) -#define PINMUX_GPIO107__FUNC_DMIC_CLK (MTK_PIN_NO(107) | 1) -#define PINMUX_GPIO107__FUNC_ANT_SEL0 (MTK_PIN_NO(107) | 2) -#define PINMUX_GPIO107__FUNC_CLKM0 (MTK_PIN_NO(107) | 3) -#define PINMUX_GPIO107__FUNC_SDA7 (MTK_PIN_NO(107) | 4) -#define PINMUX_GPIO107__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(107) | 5) -#define PINMUX_GPIO107__FUNC_PWM_A (MTK_PIN_NO(107) | 6) -#define PINMUX_GPIO107__FUNC_DBG_MON_B12 (MTK_PIN_NO(107) | 7) - -#define PINMUX_GPIO108__FUNC_GPIO108 (MTK_PIN_NO(108) | 0) -#define PINMUX_GPIO108__FUNC_CMMCLK2 (MTK_PIN_NO(108) | 1) -#define PINMUX_GPIO108__FUNC_ANT_SEL1 (MTK_PIN_NO(108) | 2) -#define PINMUX_GPIO108__FUNC_CLKM1 (MTK_PIN_NO(108) | 3) -#define PINMUX_GPIO108__FUNC_SCL8 (MTK_PIN_NO(108) | 4) -#define PINMUX_GPIO108__FUNC_DAP_MD32_SWD (MTK_PIN_NO(108) | 5) -#define PINMUX_GPIO108__FUNC_PWM_B (MTK_PIN_NO(108) | 6) -#define PINMUX_GPIO108__FUNC_DBG_MON_B13 (MTK_PIN_NO(108) | 7) - -#define PINMUX_GPIO109__FUNC_GPIO109 (MTK_PIN_NO(109) | 0) -#define PINMUX_GPIO109__FUNC_DMIC_DAT (MTK_PIN_NO(109) | 1) -#define PINMUX_GPIO109__FUNC_ANT_SEL2 (MTK_PIN_NO(109) | 2) -#define PINMUX_GPIO109__FUNC_CLKM2 (MTK_PIN_NO(109) | 3) -#define PINMUX_GPIO109__FUNC_SDA8 (MTK_PIN_NO(109) | 4) -#define PINMUX_GPIO109__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(109) | 5) -#define PINMUX_GPIO109__FUNC_PWM_C (MTK_PIN_NO(109) | 6) -#define PINMUX_GPIO109__FUNC_DBG_MON_B14 (MTK_PIN_NO(109) | 7) - -#define PINMUX_GPIO110__FUNC_GPIO110 (MTK_PIN_NO(110) | 0) -#define PINMUX_GPIO110__FUNC_SCL7 (MTK_PIN_NO(110) | 1) -#define PINMUX_GPIO110__FUNC_ANT_SEL0 (MTK_PIN_NO(110) | 2) -#define PINMUX_GPIO110__FUNC_TP_URXD1_AO (MTK_PIN_NO(110) | 3) -#define PINMUX_GPIO110__FUNC_USB_DRVVBUS (MTK_PIN_NO(110) | 4) -#define PINMUX_GPIO110__FUNC_SRCLKENAI1 (MTK_PIN_NO(110) | 5) -#define PINMUX_GPIO110__FUNC_KPCOL2 (MTK_PIN_NO(110) | 6) -#define PINMUX_GPIO110__FUNC_URXD1 (MTK_PIN_NO(110) | 7) - -#define PINMUX_GPIO111__FUNC_GPIO111 (MTK_PIN_NO(111) | 0) -#define PINMUX_GPIO111__FUNC_CMMCLK3 (MTK_PIN_NO(111) | 1) -#define PINMUX_GPIO111__FUNC_ANT_SEL1 (MTK_PIN_NO(111) | 2) -#define PINMUX_GPIO111__FUNC_SRCLKENAI0 (MTK_PIN_NO(111) | 3) -#define PINMUX_GPIO111__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(111) | 4) -#define PINMUX_GPIO111__FUNC_MD_INT2_C2K_UIM1_HOT_PLUG (MTK_PIN_NO(111) | 5) -#define PINMUX_GPIO111__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(111) | 7) - -#define PINMUX_GPIO112__FUNC_GPIO112 (MTK_PIN_NO(112) | 0) -#define PINMUX_GPIO112__FUNC_SDA7 (MTK_PIN_NO(112) | 1) -#define PINMUX_GPIO112__FUNC_ANT_SEL2 (MTK_PIN_NO(112) | 2) -#define PINMUX_GPIO112__FUNC_TP_UTXD1_AO (MTK_PIN_NO(112) | 3) -#define PINMUX_GPIO112__FUNC_IDDIG (MTK_PIN_NO(112) | 4) -#define PINMUX_GPIO112__FUNC_AGPS_SYNC (MTK_PIN_NO(112) | 5) -#define PINMUX_GPIO112__FUNC_KPROW2 (MTK_PIN_NO(112) | 6) -#define PINMUX_GPIO112__FUNC_UTXD1 (MTK_PIN_NO(112) | 7) - -#define PINMUX_GPIO113__FUNC_GPIO113 (MTK_PIN_NO(113) | 0) -#define PINMUX_GPIO113__FUNC_CONN_TOP_CLK (MTK_PIN_NO(113) | 1) -#define PINMUX_GPIO113__FUNC_SCL6 (MTK_PIN_NO(113) | 3) -#define PINMUX_GPIO113__FUNC_AUXIF_CLK0 (MTK_PIN_NO(113) | 4) -#define PINMUX_GPIO113__FUNC_TP_UCTS1_AO (MTK_PIN_NO(113) | 6) - -#define PINMUX_GPIO114__FUNC_GPIO114 (MTK_PIN_NO(114) | 0) -#define PINMUX_GPIO114__FUNC_CONN_TOP_DATA (MTK_PIN_NO(114) | 1) -#define PINMUX_GPIO114__FUNC_SDA6 (MTK_PIN_NO(114) | 3) -#define PINMUX_GPIO114__FUNC_AUXIF_ST0 (MTK_PIN_NO(114) | 4) -#define PINMUX_GPIO114__FUNC_TP_URTS1_AO (MTK_PIN_NO(114) | 6) - -#define PINMUX_GPIO115__FUNC_GPIO115 (MTK_PIN_NO(115) | 0) -#define PINMUX_GPIO115__FUNC_CONN_BT_CLK (MTK_PIN_NO(115) | 1) -#define PINMUX_GPIO115__FUNC_UTXD1 (MTK_PIN_NO(115) | 2) -#define PINMUX_GPIO115__FUNC_PTA_TXD (MTK_PIN_NO(115) | 3) -#define PINMUX_GPIO115__FUNC_AUXIF_CLK1 (MTK_PIN_NO(115) | 4) -#define PINMUX_GPIO115__FUNC_DAP_MD32_SWD (MTK_PIN_NO(115) | 5) -#define PINMUX_GPIO115__FUNC_TP_UTXD1_AO (MTK_PIN_NO(115) | 6) - -#define PINMUX_GPIO116__FUNC_GPIO116 (MTK_PIN_NO(116) | 0) -#define PINMUX_GPIO116__FUNC_CONN_BT_DATA (MTK_PIN_NO(116) | 1) -#define PINMUX_GPIO116__FUNC_IPU_JTAG_TRST (MTK_PIN_NO(116) | 2) -#define PINMUX_GPIO116__FUNC_AUXIF_ST1 (MTK_PIN_NO(116) | 4) -#define PINMUX_GPIO116__FUNC_DAP_MD32_SWCK (MTK_PIN_NO(116) | 5) -#define PINMUX_GPIO116__FUNC_TP_URXD2_AO (MTK_PIN_NO(116) | 6) -#define PINMUX_GPIO116__FUNC_DBG_MON_A0 (MTK_PIN_NO(116) | 7) - -#define PINMUX_GPIO117__FUNC_GPIO117 (MTK_PIN_NO(117) | 0) -#define PINMUX_GPIO117__FUNC_CONN_WF_HB0 (MTK_PIN_NO(117) | 1) -#define PINMUX_GPIO117__FUNC_IPU_JTAG_TDO (MTK_PIN_NO(117) | 2) -#define PINMUX_GPIO117__FUNC_TP_UTXD2_AO (MTK_PIN_NO(117) | 6) -#define PINMUX_GPIO117__FUNC_DBG_MON_A4 (MTK_PIN_NO(117) | 7) - -#define PINMUX_GPIO118__FUNC_GPIO118 (MTK_PIN_NO(118) | 0) -#define PINMUX_GPIO118__FUNC_CONN_WF_HB1 (MTK_PIN_NO(118) | 1) -#define PINMUX_GPIO118__FUNC_IPU_JTAG_TDI (MTK_PIN_NO(118) | 2) -#define PINMUX_GPIO118__FUNC_SSPM_URXD_AO (MTK_PIN_NO(118) | 5) -#define PINMUX_GPIO118__FUNC_TP_UCTS2_AO (MTK_PIN_NO(118) | 6) -#define PINMUX_GPIO118__FUNC_DBG_MON_A5 (MTK_PIN_NO(118) | 7) - -#define PINMUX_GPIO119__FUNC_GPIO119 (MTK_PIN_NO(119) | 0) -#define PINMUX_GPIO119__FUNC_CONN_WF_HB2 (MTK_PIN_NO(119) | 1) -#define PINMUX_GPIO119__FUNC_IPU_JTAG_TCK (MTK_PIN_NO(119) | 2) -#define PINMUX_GPIO119__FUNC_SSPM_UTXD_AO (MTK_PIN_NO(119) | 5) -#define PINMUX_GPIO119__FUNC_TP_URTS2_AO (MTK_PIN_NO(119) | 6) - -#define PINMUX_GPIO120__FUNC_GPIO120 (MTK_PIN_NO(120) | 0) -#define PINMUX_GPIO120__FUNC_CONN_WB_PTA (MTK_PIN_NO(120) | 1) -#define PINMUX_GPIO120__FUNC_IPU_JTAG_TMS (MTK_PIN_NO(120) | 2) -#define PINMUX_GPIO120__FUNC_CCU_URXD_AO (MTK_PIN_NO(120) | 5) - -#define PINMUX_GPIO121__FUNC_GPIO121 (MTK_PIN_NO(121) | 0) -#define PINMUX_GPIO121__FUNC_CONN_HRST_B (MTK_PIN_NO(121) | 1) -#define PINMUX_GPIO121__FUNC_URXD1 (MTK_PIN_NO(121) | 2) -#define PINMUX_GPIO121__FUNC_PTA_RXD (MTK_PIN_NO(121) | 3) -#define PINMUX_GPIO121__FUNC_CCU_UTXD_AO (MTK_PIN_NO(121) | 5) -#define PINMUX_GPIO121__FUNC_TP_URXD1_AO (MTK_PIN_NO(121) | 6) - -#define PINMUX_GPIO122__FUNC_GPIO122 (MTK_PIN_NO(122) | 0) -#define PINMUX_GPIO122__FUNC_MSDC0_CMD (MTK_PIN_NO(122) | 1) -#define PINMUX_GPIO122__FUNC_SSPM_URXD2_AO (MTK_PIN_NO(122) | 2) -#define PINMUX_GPIO122__FUNC_ANT_SEL1 (MTK_PIN_NO(122) | 3) -#define PINMUX_GPIO122__FUNC_DBG_MON_A12 (MTK_PIN_NO(122) | 7) - -#define PINMUX_GPIO123__FUNC_GPIO123 (MTK_PIN_NO(123) | 0) -#define PINMUX_GPIO123__FUNC_MSDC0_DAT0 (MTK_PIN_NO(123) | 1) -#define PINMUX_GPIO123__FUNC_ANT_SEL0 (MTK_PIN_NO(123) | 3) -#define PINMUX_GPIO123__FUNC_DBG_MON_A13 (MTK_PIN_NO(123) | 7) - -#define PINMUX_GPIO124__FUNC_GPIO124 (MTK_PIN_NO(124) | 0) -#define PINMUX_GPIO124__FUNC_MSDC0_CLK (MTK_PIN_NO(124) | 1) -#define PINMUX_GPIO124__FUNC_DBG_MON_A14 (MTK_PIN_NO(124) | 7) - -#define PINMUX_GPIO125__FUNC_GPIO125 (MTK_PIN_NO(125) | 0) -#define PINMUX_GPIO125__FUNC_MSDC0_DAT2 (MTK_PIN_NO(125) | 1) -#define PINMUX_GPIO125__FUNC_MRG_CLK (MTK_PIN_NO(125) | 3) -#define PINMUX_GPIO125__FUNC_DBG_MON_A15 (MTK_PIN_NO(125) | 7) - -#define PINMUX_GPIO126__FUNC_GPIO126 (MTK_PIN_NO(126) | 0) -#define PINMUX_GPIO126__FUNC_MSDC0_DAT4 (MTK_PIN_NO(126) | 1) -#define PINMUX_GPIO126__FUNC_ANT_SEL5 (MTK_PIN_NO(126) | 3) -#define PINMUX_GPIO126__FUNC_UFS_MPHY_SCL (MTK_PIN_NO(126) | 6) -#define PINMUX_GPIO126__FUNC_DBG_MON_A16 (MTK_PIN_NO(126) | 7) - -#define PINMUX_GPIO127__FUNC_GPIO127 (MTK_PIN_NO(127) | 0) -#define PINMUX_GPIO127__FUNC_MSDC0_DAT6 (MTK_PIN_NO(127) | 1) -#define PINMUX_GPIO127__FUNC_ANT_SEL4 (MTK_PIN_NO(127) | 3) -#define PINMUX_GPIO127__FUNC_UFS_MPHY_SDA (MTK_PIN_NO(127) | 6) -#define PINMUX_GPIO127__FUNC_DBG_MON_A17 (MTK_PIN_NO(127) | 7) - -#define PINMUX_GPIO128__FUNC_GPIO128 (MTK_PIN_NO(128) | 0) -#define PINMUX_GPIO128__FUNC_MSDC0_DAT1 (MTK_PIN_NO(128) | 1) -#define PINMUX_GPIO128__FUNC_ANT_SEL2 (MTK_PIN_NO(128) | 3) -#define PINMUX_GPIO128__FUNC_UFS_UNIPRO_SDA (MTK_PIN_NO(128) | 6) -#define PINMUX_GPIO128__FUNC_DBG_MON_A18 (MTK_PIN_NO(128) | 7) - -#define PINMUX_GPIO129__FUNC_GPIO129 (MTK_PIN_NO(129) | 0) -#define PINMUX_GPIO129__FUNC_MSDC0_DAT5 (MTK_PIN_NO(129) | 1) -#define PINMUX_GPIO129__FUNC_ANT_SEL3 (MTK_PIN_NO(129) | 3) -#define PINMUX_GPIO129__FUNC_UFS_UNIPRO_SCL (MTK_PIN_NO(129) | 6) -#define PINMUX_GPIO129__FUNC_DBG_MON_A23 (MTK_PIN_NO(129) | 7) - -#define PINMUX_GPIO130__FUNC_GPIO130 (MTK_PIN_NO(130) | 0) -#define PINMUX_GPIO130__FUNC_MSDC0_DAT7 (MTK_PIN_NO(130) | 1) -#define PINMUX_GPIO130__FUNC_MRG_DO (MTK_PIN_NO(130) | 3) -#define PINMUX_GPIO130__FUNC_DBG_MON_A24 (MTK_PIN_NO(130) | 7) - -#define PINMUX_GPIO131__FUNC_GPIO131 (MTK_PIN_NO(131) | 0) -#define PINMUX_GPIO131__FUNC_MSDC0_DSL (MTK_PIN_NO(131) | 1) -#define PINMUX_GPIO131__FUNC_MRG_SYNC (MTK_PIN_NO(131) | 3) -#define PINMUX_GPIO131__FUNC_DBG_MON_A25 (MTK_PIN_NO(131) | 7) - -#define PINMUX_GPIO132__FUNC_GPIO132 (MTK_PIN_NO(132) | 0) -#define PINMUX_GPIO132__FUNC_MSDC0_DAT3 (MTK_PIN_NO(132) | 1) -#define PINMUX_GPIO132__FUNC_MRG_DI (MTK_PIN_NO(132) | 3) -#define PINMUX_GPIO132__FUNC_DBG_MON_A26 (MTK_PIN_NO(132) | 7) - -#define PINMUX_GPIO133__FUNC_GPIO133 (MTK_PIN_NO(133) | 0) -#define PINMUX_GPIO133__FUNC_MSDC0_RSTB (MTK_PIN_NO(133) | 1) -#define PINMUX_GPIO133__FUNC_AGPS_SYNC (MTK_PIN_NO(133) | 3) -#define PINMUX_GPIO133__FUNC_DBG_MON_A27 (MTK_PIN_NO(133) | 7) - -#define PINMUX_GPIO134__FUNC_GPIO134 (MTK_PIN_NO(134) | 0) -#define PINMUX_GPIO134__FUNC_RTC32K_CK (MTK_PIN_NO(134) | 1) - -#define PINMUX_GPIO135__FUNC_GPIO135 (MTK_PIN_NO(135) | 0) -#define PINMUX_GPIO135__FUNC_WATCHDOG (MTK_PIN_NO(135) | 1) - -#define PINMUX_GPIO136__FUNC_GPIO136 (MTK_PIN_NO(136) | 0) -#define PINMUX_GPIO136__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(136) | 1) -#define PINMUX_GPIO136__FUNC_AUD_CLK_MISO (MTK_PIN_NO(136) | 2) -#define PINMUX_GPIO136__FUNC_I2S1_MCK (MTK_PIN_NO(136) | 3) -#define PINMUX_GPIO136__FUNC_UFS_UNIPRO_SCL (MTK_PIN_NO(136) | 6) - -#define PINMUX_GPIO137__FUNC_GPIO137 (MTK_PIN_NO(137) | 0) -#define PINMUX_GPIO137__FUNC_AUD_SYNC_MOSI (MTK_PIN_NO(137) | 1) -#define PINMUX_GPIO137__FUNC_AUD_SYNC_MISO (MTK_PIN_NO(137) | 2) -#define PINMUX_GPIO137__FUNC_I2S1_BCK (MTK_PIN_NO(137) | 3) - -#define PINMUX_GPIO138__FUNC_GPIO138 (MTK_PIN_NO(138) | 0) -#define PINMUX_GPIO138__FUNC_AUD_DAT_MOSI0 (MTK_PIN_NO(138) | 1) -#define PINMUX_GPIO138__FUNC_AUD_DAT_MISO0 (MTK_PIN_NO(138) | 2) -#define PINMUX_GPIO138__FUNC_I2S1_LRCK (MTK_PIN_NO(138) | 3) -#define PINMUX_GPIO138__FUNC_DBG_MON_B24 (MTK_PIN_NO(138) | 7) - -#define PINMUX_GPIO139__FUNC_GPIO139 (MTK_PIN_NO(139) | 0) -#define PINMUX_GPIO139__FUNC_AUD_DAT_MOSI1 (MTK_PIN_NO(139) | 1) -#define PINMUX_GPIO139__FUNC_AUD_DAT_MISO1 (MTK_PIN_NO(139) | 2) -#define PINMUX_GPIO139__FUNC_I2S1_DO (MTK_PIN_NO(139) | 3) -#define PINMUX_GPIO139__FUNC_UFS_MPHY_SDA (MTK_PIN_NO(139) | 6) - -#define PINMUX_GPIO140__FUNC_GPIO140 (MTK_PIN_NO(140) | 0) -#define PINMUX_GPIO140__FUNC_AUD_CLK_MISO (MTK_PIN_NO(140) | 1) -#define PINMUX_GPIO140__FUNC_AUD_CLK_MOSI (MTK_PIN_NO(140) | 2) -#define PINMUX_GPIO140__FUNC_I2S0_MCK (MTK_PIN_NO(140) | 3) -#define PINMUX_GPIO140__FUNC_UFS_UNIPRO_SDA (MTK_PIN_NO(140) | 6) - -#define PINMUX_GPIO141__FUNC_GPIO141 (MTK_PIN_NO(141) | 0) -#define PINMUX_GPIO141__FUNC_AUD_SYNC_MISO (MTK_PIN_NO(141) | 1) -#define PINMUX_GPIO141__FUNC_AUD_SYNC_MOSI (MTK_PIN_NO(141) | 2) -#define PINMUX_GPIO141__FUNC_I2S0_BCK (MTK_PIN_NO(141) | 3) - -#define PINMUX_GPIO142__FUNC_GPIO142 (MTK_PIN_NO(142) | 0) -#define PINMUX_GPIO142__FUNC_AUD_DAT_MISO0 (MTK_PIN_NO(142) | 1) -#define PINMUX_GPIO142__FUNC_AUD_DAT_MOSI0 (MTK_PIN_NO(142) | 2) -#define PINMUX_GPIO142__FUNC_I2S0_LRCK (MTK_PIN_NO(142) | 3) -#define PINMUX_GPIO142__FUNC_VOW_DAT_MISO (MTK_PIN_NO(142) | 4) -#define PINMUX_GPIO142__FUNC_DBG_MON_B25 (MTK_PIN_NO(142) | 7) - -#define PINMUX_GPIO143__FUNC_GPIO143 (MTK_PIN_NO(143) | 0) -#define PINMUX_GPIO143__FUNC_AUD_DAT_MISO1 (MTK_PIN_NO(143) | 1) -#define PINMUX_GPIO143__FUNC_AUD_DAT_MOSI1 (MTK_PIN_NO(143) | 2) -#define PINMUX_GPIO143__FUNC_I2S0_DI (MTK_PIN_NO(143) | 3) -#define PINMUX_GPIO143__FUNC_VOW_CLK_MISO (MTK_PIN_NO(143) | 4) -#define PINMUX_GPIO143__FUNC_UFS_MPHY_SCL (MTK_PIN_NO(143) | 6) -#define PINMUX_GPIO143__FUNC_DBG_MON_B26 (MTK_PIN_NO(143) | 7) - -#define PINMUX_GPIO144__FUNC_GPIO144 (MTK_PIN_NO(144) | 0) -#define PINMUX_GPIO144__FUNC_PWRAP_SPI0_MI (MTK_PIN_NO(144) | 1) -#define PINMUX_GPIO144__FUNC_PWRAP_SPI0_MO (MTK_PIN_NO(144) | 2) - -#define PINMUX_GPIO145__FUNC_GPIO145 (MTK_PIN_NO(145) | 0) -#define PINMUX_GPIO145__FUNC_PWRAP_SPI0_CSN (MTK_PIN_NO(145) | 1) - -#define PINMUX_GPIO146__FUNC_GPIO146 (MTK_PIN_NO(146) | 0) -#define PINMUX_GPIO146__FUNC_PWRAP_SPI0_MO (MTK_PIN_NO(146) | 1) -#define PINMUX_GPIO146__FUNC_PWRAP_SPI0_MI (MTK_PIN_NO(146) | 2) - -#define PINMUX_GPIO147__FUNC_GPIO147 (MTK_PIN_NO(147) | 0) -#define PINMUX_GPIO147__FUNC_PWRAP_SPI0_CK (MTK_PIN_NO(147) | 1) - -#define PINMUX_GPIO148__FUNC_GPIO148 (MTK_PIN_NO(148) | 0) -#define PINMUX_GPIO148__FUNC_SRCLKENA0 (MTK_PIN_NO(148) | 1) - -#define PINMUX_GPIO149__FUNC_GPIO149 (MTK_PIN_NO(149) | 0) -#define PINMUX_GPIO149__FUNC_SRCLKENA1 (MTK_PIN_NO(149) | 1) - -#define PINMUX_GPIO150__FUNC_GPIO150 (MTK_PIN_NO(150) | 0) -#define PINMUX_GPIO150__FUNC_PWM_A (MTK_PIN_NO(150) | 1) -#define PINMUX_GPIO150__FUNC_CMFLASH (MTK_PIN_NO(150) | 2) -#define PINMUX_GPIO150__FUNC_CLKM0 (MTK_PIN_NO(150) | 3) -#define PINMUX_GPIO150__FUNC_DBG_MON_B30 (MTK_PIN_NO(150) | 7) - -#define PINMUX_GPIO151__FUNC_GPIO151 (MTK_PIN_NO(151) | 0) -#define PINMUX_GPIO151__FUNC_PWM_B (MTK_PIN_NO(151) | 1) -#define PINMUX_GPIO151__FUNC_CMVREF0 (MTK_PIN_NO(151) | 2) -#define PINMUX_GPIO151__FUNC_CLKM1 (MTK_PIN_NO(151) | 3) -#define PINMUX_GPIO151__FUNC_DBG_MON_B20 (MTK_PIN_NO(151) | 7) - -#define PINMUX_GPIO152__FUNC_GPIO152 (MTK_PIN_NO(152) | 0) -#define PINMUX_GPIO152__FUNC_PWM_C (MTK_PIN_NO(152) | 1) -#define PINMUX_GPIO152__FUNC_CMFLASH (MTK_PIN_NO(152) | 2) -#define PINMUX_GPIO152__FUNC_CLKM2 (MTK_PIN_NO(152) | 3) -#define PINMUX_GPIO152__FUNC_DBG_MON_B21 (MTK_PIN_NO(152) | 7) - -#define PINMUX_GPIO153__FUNC_GPIO153 (MTK_PIN_NO(153) | 0) -#define PINMUX_GPIO153__FUNC_PWM_A (MTK_PIN_NO(153) | 1) -#define PINMUX_GPIO153__FUNC_CMVREF0 (MTK_PIN_NO(153) | 2) -#define PINMUX_GPIO153__FUNC_CLKM3 (MTK_PIN_NO(153) | 3) -#define PINMUX_GPIO153__FUNC_DBG_MON_B22 (MTK_PIN_NO(153) | 7) - -#define PINMUX_GPIO154__FUNC_GPIO154 (MTK_PIN_NO(154) | 0) -#define PINMUX_GPIO154__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(154) | 1) -#define PINMUX_GPIO154__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(154) | 2) -#define PINMUX_GPIO154__FUNC_DBG_MON_B18 (MTK_PIN_NO(154) | 7) - -#define PINMUX_GPIO155__FUNC_GPIO155 (MTK_PIN_NO(155) | 0) -#define PINMUX_GPIO155__FUNC_ANT_SEL0 (MTK_PIN_NO(155) | 1) -#define PINMUX_GPIO155__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(155) | 2) -#define PINMUX_GPIO155__FUNC_CMVREF1 (MTK_PIN_NO(155) | 3) -#define PINMUX_GPIO155__FUNC_SCP_JTAG_TDI (MTK_PIN_NO(155) | 7) - -#define PINMUX_GPIO156__FUNC_GPIO156 (MTK_PIN_NO(156) | 0) -#define PINMUX_GPIO156__FUNC_ANT_SEL1 (MTK_PIN_NO(156) | 1) -#define PINMUX_GPIO156__FUNC_SRCLKENAI0 (MTK_PIN_NO(156) | 2) -#define PINMUX_GPIO156__FUNC_SCL6 (MTK_PIN_NO(156) | 3) -#define PINMUX_GPIO156__FUNC_KPCOL2 (MTK_PIN_NO(156) | 4) -#define PINMUX_GPIO156__FUNC_IDDIG (MTK_PIN_NO(156) | 5) -#define PINMUX_GPIO156__FUNC_SCP_JTAG_TCK (MTK_PIN_NO(156) | 7) - -#define PINMUX_GPIO157__FUNC_GPIO157 (MTK_PIN_NO(157) | 0) -#define PINMUX_GPIO157__FUNC_ANT_SEL2 (MTK_PIN_NO(157) | 1) -#define PINMUX_GPIO157__FUNC_SRCLKENAI1 (MTK_PIN_NO(157) | 2) -#define PINMUX_GPIO157__FUNC_SDA6 (MTK_PIN_NO(157) | 3) -#define PINMUX_GPIO157__FUNC_KPROW2 (MTK_PIN_NO(157) | 4) -#define PINMUX_GPIO157__FUNC_USB_DRVVBUS (MTK_PIN_NO(157) | 5) -#define PINMUX_GPIO157__FUNC_SCP_JTAG_TRSTN (MTK_PIN_NO(157) | 7) - -#define PINMUX_GPIO158__FUNC_GPIO158 (MTK_PIN_NO(158) | 0) -#define PINMUX_GPIO158__FUNC_ANT_SEL3 (MTK_PIN_NO(158) | 1) - -#define PINMUX_GPIO159__FUNC_GPIO159 (MTK_PIN_NO(159) | 0) -#define PINMUX_GPIO159__FUNC_ANT_SEL4 (MTK_PIN_NO(159) | 1) - -#define PINMUX_GPIO160__FUNC_GPIO160 (MTK_PIN_NO(160) | 0) -#define PINMUX_GPIO160__FUNC_ANT_SEL5 (MTK_PIN_NO(160) | 1) - -#define PINMUX_GPIO161__FUNC_GPIO161 (MTK_PIN_NO(161) | 0) -#define PINMUX_GPIO161__FUNC_SPI1_A_MI (MTK_PIN_NO(161) | 1) -#define PINMUX_GPIO161__FUNC_SCP_SPI1_MI (MTK_PIN_NO(161) | 2) -#define PINMUX_GPIO161__FUNC_IDDIG (MTK_PIN_NO(161) | 3) -#define PINMUX_GPIO161__FUNC_ANT_SEL6 (MTK_PIN_NO(161) | 4) -#define PINMUX_GPIO161__FUNC_KPCOL2 (MTK_PIN_NO(161) | 5) -#define PINMUX_GPIO161__FUNC_PTA_RXD (MTK_PIN_NO(161) | 6) -#define PINMUX_GPIO161__FUNC_DBG_MON_B19 (MTK_PIN_NO(161) | 7) - -#define PINMUX_GPIO162__FUNC_GPIO162 (MTK_PIN_NO(162) | 0) -#define PINMUX_GPIO162__FUNC_SPI1_A_CSB (MTK_PIN_NO(162) | 1) -#define PINMUX_GPIO162__FUNC_SCP_SPI1_CS (MTK_PIN_NO(162) | 2) -#define PINMUX_GPIO162__FUNC_USB_DRVVBUS (MTK_PIN_NO(162) | 3) -#define PINMUX_GPIO162__FUNC_ANT_SEL5 (MTK_PIN_NO(162) | 4) -#define PINMUX_GPIO162__FUNC_KPROW2 (MTK_PIN_NO(162) | 5) -#define PINMUX_GPIO162__FUNC_PTA_TXD (MTK_PIN_NO(162) | 6) - -#define PINMUX_GPIO163__FUNC_GPIO163 (MTK_PIN_NO(163) | 0) -#define PINMUX_GPIO163__FUNC_SPI1_A_MO (MTK_PIN_NO(163) | 1) -#define PINMUX_GPIO163__FUNC_SCP_SPI1_MO (MTK_PIN_NO(163) | 2) -#define PINMUX_GPIO163__FUNC_SDA1 (MTK_PIN_NO(163) | 3) -#define PINMUX_GPIO163__FUNC_ANT_SEL4 (MTK_PIN_NO(163) | 4) -#define PINMUX_GPIO163__FUNC_CMMCLK2 (MTK_PIN_NO(163) | 5) -#define PINMUX_GPIO163__FUNC_DMIC_CLK (MTK_PIN_NO(163) | 6) - -#define PINMUX_GPIO164__FUNC_GPIO164 (MTK_PIN_NO(164) | 0) -#define PINMUX_GPIO164__FUNC_SPI1_A_CLK (MTK_PIN_NO(164) | 1) -#define PINMUX_GPIO164__FUNC_SCP_SPI1_CK (MTK_PIN_NO(164) | 2) -#define PINMUX_GPIO164__FUNC_SCL1 (MTK_PIN_NO(164) | 3) -#define PINMUX_GPIO164__FUNC_ANT_SEL3 (MTK_PIN_NO(164) | 4) -#define PINMUX_GPIO164__FUNC_CMMCLK3 (MTK_PIN_NO(164) | 5) -#define PINMUX_GPIO164__FUNC_DMIC_DAT (MTK_PIN_NO(164) | 6) - -#define PINMUX_GPIO165__FUNC_GPIO165 (MTK_PIN_NO(165) | 0) -#define PINMUX_GPIO165__FUNC_PWM_B (MTK_PIN_NO(165) | 1) -#define PINMUX_GPIO165__FUNC_CMMCLK2 (MTK_PIN_NO(165) | 2) -#define PINMUX_GPIO165__FUNC_SCP_VREQ_VAO (MTK_PIN_NO(165) | 3) -#define PINMUX_GPIO165__FUNC_TDM_MCK_2ND (MTK_PIN_NO(165) | 6) -#define PINMUX_GPIO165__FUNC_SCP_JTAG_TDO (MTK_PIN_NO(165) | 7) - -#define PINMUX_GPIO166__FUNC_GPIO166 (MTK_PIN_NO(166) | 0) -#define PINMUX_GPIO166__FUNC_ANT_SEL6 (MTK_PIN_NO(166) | 1) - -#define PINMUX_GPIO167__FUNC_GPIO167 (MTK_PIN_NO(167) | 0) -#define PINMUX_GPIO167__FUNC_RFIC0_BSI_EN (MTK_PIN_NO(167) | 1) -#define PINMUX_GPIO167__FUNC_SPM_BSI_EN (MTK_PIN_NO(167) | 2) - -#define PINMUX_GPIO168__FUNC_GPIO168 (MTK_PIN_NO(168) | 0) -#define PINMUX_GPIO168__FUNC_RFIC0_BSI_CK (MTK_PIN_NO(168) | 1) -#define PINMUX_GPIO168__FUNC_SPM_BSI_CK (MTK_PIN_NO(168) | 2) - -#define PINMUX_GPIO169__FUNC_GPIO169 (MTK_PIN_NO(169) | 0) -#define PINMUX_GPIO169__FUNC_PWM_C (MTK_PIN_NO(169) | 1) -#define PINMUX_GPIO169__FUNC_CMMCLK3 (MTK_PIN_NO(169) | 2) -#define PINMUX_GPIO169__FUNC_CMVREF1 (MTK_PIN_NO(169) | 3) -#define PINMUX_GPIO169__FUNC_ANT_SEL7 (MTK_PIN_NO(169) | 4) -#define PINMUX_GPIO169__FUNC_AGPS_SYNC (MTK_PIN_NO(169) | 5) -#define PINMUX_GPIO169__FUNC_TDM_BCK_2ND (MTK_PIN_NO(169) | 6) -#define PINMUX_GPIO169__FUNC_SCP_JTAG_TMS (MTK_PIN_NO(169) | 7) - -#define PINMUX_GPIO170__FUNC_GPIO170 (MTK_PIN_NO(170) | 0) -#define PINMUX_GPIO170__FUNC_I2S1_BCK (MTK_PIN_NO(170) | 1) -#define PINMUX_GPIO170__FUNC_I2S3_BCK (MTK_PIN_NO(170) | 2) -#define PINMUX_GPIO170__FUNC_SCL7 (MTK_PIN_NO(170) | 3) -#define PINMUX_GPIO170__FUNC_I2S5_BCK (MTK_PIN_NO(170) | 4) -#define PINMUX_GPIO170__FUNC_EXT_FRAME_SYNC (MTK_PIN_NO(170) | 5) -#define PINMUX_GPIO170__FUNC_TDM_LRCK_2ND (MTK_PIN_NO(170) | 6) -#define PINMUX_GPIO170__FUNC_ANT_SEL3 (MTK_PIN_NO(170) | 7) - -#define PINMUX_GPIO171__FUNC_GPIO171 (MTK_PIN_NO(171) | 0) -#define PINMUX_GPIO171__FUNC_I2S1_LRCK (MTK_PIN_NO(171) | 1) -#define PINMUX_GPIO171__FUNC_I2S3_LRCK (MTK_PIN_NO(171) | 2) -#define PINMUX_GPIO171__FUNC_SDA7 (MTK_PIN_NO(171) | 3) -#define PINMUX_GPIO171__FUNC_I2S5_LRCK (MTK_PIN_NO(171) | 4) -#define PINMUX_GPIO171__FUNC_URXD1 (MTK_PIN_NO(171) | 5) -#define PINMUX_GPIO171__FUNC_TDM_DATA0_2ND (MTK_PIN_NO(171) | 6) -#define PINMUX_GPIO171__FUNC_ANT_SEL4 (MTK_PIN_NO(171) | 7) - -#define PINMUX_GPIO172__FUNC_GPIO172 (MTK_PIN_NO(172) | 0) -#define PINMUX_GPIO172__FUNC_I2S1_DO (MTK_PIN_NO(172) | 1) -#define PINMUX_GPIO172__FUNC_I2S3_DO (MTK_PIN_NO(172) | 2) -#define PINMUX_GPIO172__FUNC_SCL8 (MTK_PIN_NO(172) | 3) -#define PINMUX_GPIO172__FUNC_I2S5_DO (MTK_PIN_NO(172) | 4) -#define PINMUX_GPIO172__FUNC_UTXD1 (MTK_PIN_NO(172) | 5) -#define PINMUX_GPIO172__FUNC_TDM_DATA1_2ND (MTK_PIN_NO(172) | 6) -#define PINMUX_GPIO172__FUNC_ANT_SEL5 (MTK_PIN_NO(172) | 7) - -#define PINMUX_GPIO173__FUNC_GPIO173 (MTK_PIN_NO(173) | 0) -#define PINMUX_GPIO173__FUNC_I2S1_MCK (MTK_PIN_NO(173) | 1) -#define PINMUX_GPIO173__FUNC_I2S3_MCK (MTK_PIN_NO(173) | 2) -#define PINMUX_GPIO173__FUNC_SDA8 (MTK_PIN_NO(173) | 3) -#define PINMUX_GPIO173__FUNC_I2S5_MCK (MTK_PIN_NO(173) | 4) -#define PINMUX_GPIO173__FUNC_UCTS0 (MTK_PIN_NO(173) | 5) -#define PINMUX_GPIO173__FUNC_TDM_DATA2_2ND (MTK_PIN_NO(173) | 6) -#define PINMUX_GPIO173__FUNC_ANT_SEL6 (MTK_PIN_NO(173) | 7) - -#define PINMUX_GPIO174__FUNC_GPIO174 (MTK_PIN_NO(174) | 0) -#define PINMUX_GPIO174__FUNC_I2S2_DI (MTK_PIN_NO(174) | 1) -#define PINMUX_GPIO174__FUNC_I2S0_DI (MTK_PIN_NO(174) | 2) -#define PINMUX_GPIO174__FUNC_DVFSRC_EXT_REQ (MTK_PIN_NO(174) | 3) -#define PINMUX_GPIO174__FUNC_I2S2_DI2 (MTK_PIN_NO(174) | 4) -#define PINMUX_GPIO174__FUNC_URTS0 (MTK_PIN_NO(174) | 5) -#define PINMUX_GPIO174__FUNC_TDM_DATA3_2ND (MTK_PIN_NO(174) | 6) -#define PINMUX_GPIO174__FUNC_ANT_SEL7 (MTK_PIN_NO(174) | 7) - -#define PINMUX_GPIO175__FUNC_GPIO175 (MTK_PIN_NO(175) | 0) -#define PINMUX_GPIO175__FUNC_ANT_SEL7 (MTK_PIN_NO(175) | 1) - -#define PINMUX_GPIO176__FUNC_GPIO176 (MTK_PIN_NO(176) | 0) - -#define PINMUX_GPIO177__FUNC_GPIO177 (MTK_PIN_NO(177) | 0) - -#define PINMUX_GPIO178__FUNC_GPIO178 (MTK_PIN_NO(178) | 0) - -#define PINMUX_GPIO179__FUNC_GPIO179 (MTK_PIN_NO(179) | 0) - -#endif /* __MT8183-PINFUNC_H */ diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index f90df6439c08..1933045da95d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -14,7 +14,7 @@ #include #include #include -#include "mt8183-pinfunc.h" +#include / { compatible = "mediatek,mt8183"; -- cgit From d21292f13f1f0721d60e8122e2db46bea8cf6950 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 9 Aug 2021 16:24:28 +0100 Subject: KVM: arm64: Add hyp_spin_is_locked() for basic locking assertions at EL2 Introduce hyp_spin_is_locked() so that functions can easily assert that a given lock is held (albeit possibly by another CPU!) without having to drag full lockdep support up to EL2. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-2-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 76b537f8d1c6..04f65b655fcf 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -15,6 +15,7 @@ #include #include +#include typedef union hyp_spinlock { u32 __val; @@ -89,4 +90,11 @@ static inline void hyp_spin_unlock(hyp_spinlock_t *lock) : "memory"); } +static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock) +{ + hyp_spinlock_t lockval = READ_ONCE(*lock); + + return lockval.owner != lockval.next; +} + #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */ -- cgit From 8e049e0daf23aa380c264e5e15e4c64ea5497ed7 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:29 +0100 Subject: KVM: arm64: Introduce hyp_assert_lock_held() Introduce a poor man's lockdep implementation at EL2 which allows to BUG() whenever a hyp spinlock is not held when it should. Hide this feature behind a new Kconfig option that targets the EL2 object specifically, instead of piggy backing on the existing CONFIG_LOCKDEP. EL2 cannot WARN() cleanly to report locking issues, hence BUG() is the only option and it is not clear whether we want this widely enabled. This is most likely going to be useful for local testing until the EL2 WARN() situation has improved. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-3-qperret@google.com --- arch/arm64/kvm/Kconfig | 9 +++++++++ arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a4eba0908bfa..9b9721895e5c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -46,6 +46,15 @@ if KVM source "virt/kvm/Kconfig" +config NVHE_EL2_DEBUG + bool "Debug mode for non-VHE EL2 object" + help + Say Y here to enable the debug mode for the non-VHE KVM EL2 object. + Failure reports will BUG() in the hypervisor. This is intended for + local EL2 hypervisor development. + + If unsure, say N. + endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 04f65b655fcf..4652fd04bdbe 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -97,4 +97,21 @@ static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock) return lockval.owner != lockval.next; } +#ifdef CONFIG_NVHE_EL2_DEBUG +static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) +{ + /* + * The __pkvm_init() path accesses protected data-structures without + * holding locks as the other CPUs are guaranteed to not enter EL2 + * concurrently at this point in time. The point by which EL2 is + * initialized on all CPUs is reflected in the pkvm static key, so + * wait until it is set before checking the lock state. + */ + if (static_branch_likely(&kvm_protected_mode_initialized)) + BUG_ON(!hyp_spin_is_locked(lock)); +} +#else +static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { } +#endif + #endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */ -- cgit From 1bac49d490cbc813f407a5c9806e464bf4a300c9 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:30 +0100 Subject: KVM: arm64: Provide the host_stage2_try() helper macro We currently unmap all MMIO mappings from the host stage-2 to recycle the pages whenever we run out. In order to make this pattern easy to re-use from other places, factor the logic out into a dedicated macro. While at it, apply the macro for the kvm_pgtable_stage2_set_owner() calls. They're currently only called early on and are guaranteed to succeed, but making them robust to the -ENOMEM case doesn't hurt and will avoid painful debugging sessions later on. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-4-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 40 +++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..74280a753efb 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -208,6 +208,25 @@ static inline int __host_stage2_idmap(u64 start, u64 end, prot, &host_s2_pool); } +/* + * The pool has been provided with enough pages to cover all of memory with + * page granularity, but it is difficult to know how much of the MMIO range + * we will need to cover upfront, so we may need to 'recycle' the pages if we + * run out. + */ +#define host_stage2_try(fn, ...) \ + ({ \ + int __ret; \ + hyp_assert_lock_held(&host_kvm.lock); \ + __ret = fn(__VA_ARGS__); \ + if (__ret == -ENOMEM) { \ + __ret = host_stage2_unmap_dev_all(); \ + if (!__ret) \ + __ret = fn(__VA_ARGS__); \ + } \ + __ret; \ + }) + static int host_stage2_idmap(u64 addr) { enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; @@ -223,22 +242,7 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot); - if (ret != -ENOMEM) - goto unlock; - - /* - * The pool has been provided with enough pages to cover all of memory - * with page granularity, but it is difficult to know how much of the - * MMIO range we will need to cover upfront, so we may need to 'recycle' - * the pages if we run out. - */ - ret = host_stage2_unmap_dev_all(); - if (ret) - goto unlock; - - ret = __host_stage2_idmap(range.start, range.end, prot); - + ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -257,8 +261,8 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) return -EINVAL; hyp_spin_lock(&host_kvm.lock); - ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_pool, pkvm_hyp_id); + ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + start, end - start, &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; -- cgit From 51add457733bbc4a442fc280d73d14bfe262e4a0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:32 +0100 Subject: KVM: arm64: Expose page-table helpers The KVM pgtable API exposes the kvm_pgtable_walk() function to allow the definition of walkers outside of pgtable.c. However, it is not easy to implement any of those walkers without some of the low-level helpers. Move some of them to the header file to allow re-use from other places. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-6-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 40 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 39 ----------------------------------- 2 files changed, 40 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index e42b55bd50a2..83c21d35be10 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0) typedef u64 kvm_pte_t; +#define KVM_PTE_VALID BIT(0) + +#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) +#define KVM_PTE_ADDR_51_48 GENMASK(15, 12) + +static inline bool kvm_pte_valid(kvm_pte_t pte) +{ + return pte & KVM_PTE_VALID; +} + +static inline u64 kvm_pte_to_phys(kvm_pte_t pte) +{ + u64 pa = pte & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) + pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; + + return pa; +} + +static inline u64 kvm_granule_shift(u32 level) +{ + /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ + return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); +} + +static inline u64 kvm_granule_size(u32 level) +{ + return BIT(kvm_granule_shift(level)); +} + +static inline bool kvm_level_supports_block_mapping(u32 level) +{ + /* + * Reject invalid block mappings and don't bother with 4TB mappings for + * 52-bit PAs. + */ + return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); +} + /** * struct kvm_pgtable_mm_ops - Memory management callbacks. * @zalloc_page: Allocate a single zeroed memory page. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 78f36bd5df6c..49d768b92997 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -11,16 +11,12 @@ #include #include -#define KVM_PTE_VALID BIT(0) #define KVM_PTE_TYPE BIT(1) #define KVM_PTE_TYPE_BLOCK 0 #define KVM_PTE_TYPE_PAGE 1 #define KVM_PTE_TYPE_TABLE 1 -#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) -#define KVM_PTE_ADDR_51_48 GENMASK(15, 12) - #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) @@ -61,17 +57,6 @@ struct kvm_pgtable_walk_data { u64 end; }; -static u64 kvm_granule_shift(u32 level) -{ - /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ - return ARM64_HW_PGTABLE_LEVEL_SHIFT(level); -} - -static u64 kvm_granule_size(u32 level) -{ - return BIT(kvm_granule_shift(level)); -} - #define KVM_PHYS_INVALID (-1ULL) static bool kvm_phys_is_valid(u64 phys) @@ -79,15 +64,6 @@ static bool kvm_phys_is_valid(u64 phys) return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX)); } -static bool kvm_level_supports_block_mapping(u32 level) -{ - /* - * Reject invalid block mappings and don't bother with 4TB mappings for - * 52-bit PAs. - */ - return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1)); -} - static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) { u64 granule = kvm_granule_size(level); @@ -135,11 +111,6 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; } -static bool kvm_pte_valid(kvm_pte_t pte) -{ - return pte & KVM_PTE_VALID; -} - static bool kvm_pte_table(kvm_pte_t pte, u32 level) { if (level == KVM_PGTABLE_MAX_LEVELS - 1) @@ -151,16 +122,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level) return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; } -static u64 kvm_pte_to_phys(kvm_pte_t pte) -{ - u64 pa = pte & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; - - return pa; -} - static kvm_pte_t kvm_phys_to_pte(u64 pa) { kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; -- cgit From c4f0935e4d957bfcea25ad76860445660a60f3fd Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:33 +0100 Subject: KVM: arm64: Optimize host memory aborts The kvm_pgtable_stage2_find_range() function is used in the host memory abort path to try and look for the largest block mapping that can be used to map the faulting address. In order to do so, the function currently walks the stage-2 page-table and looks for existing incompatible mappings within the range of the largest possible block. If incompatible mappings are found, it tries the same procedure again, but using a smaller block range, and repeats until a matching range is found (potentially up to page granularity). While this approach has benefits (mostly in the fact that it proactively coalesces host stage-2 mappings), it can be slow if the ranges are fragmented, and it isn't optimized to deal with CPUs faulting on the same IPA as all of them will do all the work every time. To avoid these issues, remove kvm_pgtable_stage2_find_range(), and walk the page-table only once in the host_mem_abort() path to find the closest leaf to the input address. With this, use the corresponding range if it is invalid and not owned by another entity. If a valid leaf is found, return -EAGAIN similar to what is done in the kvm_pgtable_stage2_map() path to optimize concurrent faults. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-7-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 30 -------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 45 ++++++++++++++++++++- arch/arm64/kvm/hyp/pgtable.c | 74 ----------------------------------- 3 files changed, 44 insertions(+), 105 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 83c21d35be10..8d6c710c1996 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -154,16 +154,6 @@ enum kvm_pgtable_prot { #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) -/** - * struct kvm_mem_range - Range of Intermediate Physical Addresses - * @start: Start of the range. - * @end: End of the range. - */ -struct kvm_mem_range { - u64 start; - u64 end; -}; - /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -491,24 +481,4 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, kvm_pte_t *ptep, u32 *level); - -/** - * kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical - * Addresses with compatible permission - * attributes. - * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). - * @addr: Address that must be covered by the range. - * @prot: Protection attributes that the range must be compatible with. - * @range: Range structure used to limit the search space at call time and - * that will hold the result. - * - * The offset of @addr within a page is ignored. An IPA is compatible with @prot - * iff its corresponding stage-2 page-table entry has default ownership and, if - * valid, is mapped with protection attributes identical to @prot. - * - * Return: 0 on success, negative error code on failure. - */ -int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, - enum kvm_pgtable_prot prot, - struct kvm_mem_range *range); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 74280a753efb..2148d3968aa5 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -159,6 +159,11 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +struct kvm_mem_range { + u64 start; + u64 end; +}; + static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; @@ -227,6 +232,44 @@ static inline int __host_stage2_idmap(u64 start, u64 end, __ret; \ }) +static inline bool range_included(struct kvm_mem_range *child, + struct kvm_mem_range *parent) +{ + return parent->start <= child->start && child->end <= parent->end; +} + +static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) +{ + struct kvm_mem_range cur; + kvm_pte_t pte; + u32 level; + int ret; + + hyp_assert_lock_held(&host_kvm.lock); + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); + if (ret) + return ret; + + if (kvm_pte_valid(pte)) + return -EAGAIN; + + if (pte) + return -EPERM; + + do { + u64 granule = kvm_granule_size(level); + cur.start = ALIGN_DOWN(addr, granule); + cur.end = cur.start + granule; + level++; + } while ((level < KVM_PGTABLE_MAX_LEVELS) && + !(kvm_level_supports_block_mapping(level) && + range_included(&cur, range))); + + *range = cur; + + return 0; +} + static int host_stage2_idmap(u64 addr) { enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; @@ -238,7 +281,7 @@ static int host_stage2_idmap(u64 addr) prot |= KVM_PGTABLE_PROT_X; hyp_spin_lock(&host_kvm.lock); - ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range); + ret = host_stage2_adjust_range(addr, &range); if (ret) goto unlock; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 49d768b92997..4dff2ad39ee4 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1102,77 +1102,3 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); pgt->pgd = NULL; } - -#define KVM_PTE_LEAF_S2_COMPAT_MASK (KVM_PTE_LEAF_ATTR_S2_PERMS | \ - KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \ - KVM_PTE_LEAF_ATTR_S2_IGNORED) - -static int stage2_check_permission_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) -{ - kvm_pte_t old_attr, pte = *ptep, *new_attr = arg; - - /* - * Compatible mappings are either invalid and owned by the page-table - * owner (whose id is 0), or valid with matching permission attributes. - */ - if (kvm_pte_valid(pte)) { - old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK; - if (old_attr != *new_attr) - return -EEXIST; - } else if (pte) { - return -EEXIST; - } - - return 0; -} - -int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr, - enum kvm_pgtable_prot prot, - struct kvm_mem_range *range) -{ - kvm_pte_t attr; - struct kvm_pgtable_walker check_perm_walker = { - .cb = stage2_check_permission_walker, - .flags = KVM_PGTABLE_WALK_LEAF, - .arg = &attr, - }; - u64 granule, start, end; - u32 level; - int ret; - - ret = stage2_set_prot_attr(pgt, prot, &attr); - if (ret) - return ret; - attr &= KVM_PTE_LEAF_S2_COMPAT_MASK; - - for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) { - granule = kvm_granule_size(level); - start = ALIGN_DOWN(addr, granule); - end = start + granule; - - if (!kvm_level_supports_block_mapping(level)) - continue; - - if (start < range->start || range->end < end) - continue; - - /* - * Check the presence of existing mappings with incompatible - * permissions within the current block range, and try one level - * deeper if one is found. - */ - ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker); - if (ret != -EEXIST) - break; - } - - if (!ret) { - range->start = start; - range->end = end; - } - - return ret; -} -- cgit From 178cac08d588e7406a09351a992f57892d8d9cc9 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:34 +0100 Subject: KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED The ignored bits for both stage-1 and stage-2 page and block descriptors are in [55:58], so rename KVM_PTE_LEAF_ATTR_S2_IGNORED to make it applicable to both. And while at it, since these bits are more commonly known as 'software' bits, rename accordingly. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-8-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 4dff2ad39ee4..59a394d82de3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -36,6 +36,8 @@ #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51) +#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) + #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) @@ -44,8 +46,6 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) -#define KVM_PTE_LEAF_ATTR_S2_IGNORED GENMASK(58, 55) - #define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) #define KVM_MAX_OWNER_ID 1 -- cgit From 8a0282c68121e53ab17413283cfed408a47e1a2a Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:35 +0100 Subject: KVM: arm64: Don't overwrite software bits with owner id We will soon start annotating page-tables with new flags to track shared pages and such, and we will do so in valid mappings using software bits in the PTEs, as provided by the architecture. However, it is possible that we will need to use those flags to annotate invalid mappings as well in the future, similar to what we do to track page ownership in the host stage-2. In order to facilitate the annotation of invalid mappings with such flags, it would be preferable to re-use the same bits as for valid mappings (bits [58-55]), but these are currently used for ownership encoding. Since we have plenty of bits left to use in invalid mappings, move the ownership bits further down the PTE to avoid the conflict. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-9-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 59a394d82de3..1ee1168ac32d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -46,7 +46,7 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) -#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56) +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) #define KVM_MAX_OWNER_ID 1 struct kvm_pgtable_walk_data { -- cgit From b53846c5f279cb5329b82f19a7d313f02cb9d21c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:36 +0100 Subject: KVM: arm64: Tolerate re-creating hyp mappings to set software bits The current hypervisor stage-1 mapping code doesn't allow changing an existing valid mapping. Relax this condition by allowing changes that only target software bits, as that will soon be needed to annotate shared pages. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-10-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 1ee1168ac32d..2689fcb7901d 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -362,6 +362,21 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) return 0; } +static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) +{ + /* + * Tolerate KVM recreating the exact same mapping, or changing software + * bits if the existing mapping was valid. + */ + if (old == new) + return false; + + if (!kvm_pte_valid(old)) + return true; + + return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW); +} + static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct hyp_map_data *data) { @@ -371,9 +386,8 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; - /* Tolerate KVM recreating the exact same mapping */ new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (old != new && !WARN_ON(kvm_pte_valid(old))) + if (hyp_pte_needs_update(old, new)) smp_store_release(ptep, new); data->phys += granule; -- cgit From 5651311941105ca077d3ab74dd4a92e646ecf7fb Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:37 +0100 Subject: KVM: arm64: Enable forcing page-level stage-2 mappings Much of the stage-2 manipulation logic relies on being able to destroy block mappings if e.g. installing a smaller mapping in the range. The rationale for this behaviour is that stage-2 mappings can always be re-created lazily. However, this gets more complicated when the stage-2 page-table is used to store metadata about the underlying pages. In such cases, destroying a block mapping may lead to losing part of the state, and confuse the user of those metadata (such as the hypervisor in nVHE protected mode). To avoid this, introduce a callback function in the pgtable struct which is called during all map operations to determine whether the mappings can use blocks, or should be forced to page granularity. This is used by the hypervisor when creating the host stage-2 to force page-level mappings when using non-default protection attributes. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-11-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 66 ++++++++++++++++++++++------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 34 +++++++++++++++--- arch/arm64/kvm/hyp/pgtable.c | 29 ++++++++++++--- 3 files changed, 94 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 8d6c710c1996..ca0c039547b5 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -115,25 +115,6 @@ enum kvm_pgtable_stage2_flags { KVM_PGTABLE_S2_IDMAP = BIT(1), }; -/** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pte_t *pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; -}; - /** * enum kvm_pgtable_prot - Page-table permissions and attributes. * @KVM_PGTABLE_PROT_X: Execute permission. @@ -149,11 +130,43 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_DEVICE = BIT(3), }; -#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X) + +#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX +#define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW + +#define PAGE_HYP KVM_PGTABLE_PROT_RW #define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) #define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) #define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) +typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, + enum kvm_pgtable_prot prot); + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pte_t *pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; +}; + /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -246,21 +259,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); /** - * kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table. + * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. * @arch: Arch-specific KVM structure representing the guest virtual * machine. * @mm_ops: Memory management callbacks. * @flags: Stage-2 configuration flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, - struct kvm_pgtable_mm_ops *mm_ops, - enum kvm_pgtable_stage2_flags flags); +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags, + kvm_pgtable_force_pte_cb_t force_pte_cb); #define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \ - kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0) + __kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL) /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2148d3968aa5..6fed6772c673 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -89,6 +89,8 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } +static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot); + int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; @@ -101,8 +103,9 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) if (ret) return ret; - ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch, - &host_kvm.mm_ops, KVM_HOST_S2_FLAGS); + ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch, + &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, + host_stage2_force_pte_cb); if (ret) return ret; @@ -270,15 +273,36 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return 0; } +static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) +{ + /* + * Block mappings must be used with care in the host stage-2 as a + * kvm_pgtable_stage2_map() operation targeting a page in the range of + * an existing block will delete the block under the assumption that + * mappings in the rest of the block range can always be rebuilt lazily. + * That assumption is correct for the host stage-2 with RWX mappings + * targeting memory or RW mappings targeting MMIO ranges (see + * host_stage2_idmap() below which implements some of the host memory + * abort logic). However, this is not safe for any other mappings where + * the host stage-2 page-table is in fact the only place where this + * state is stored. In all those cases, it is safer to use page-level + * mappings, hence avoiding to lose the state because of side-effects in + * kvm_pgtable_stage2_map(). + */ + if (range_is_memory(addr, end)) + return prot != PKVM_HOST_MEM_PROT; + else + return prot != PKVM_HOST_MMIO_PROT; +} + static int host_stage2_idmap(u64 addr) { - enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); + enum kvm_pgtable_prot prot; int ret; - if (is_memory) - prot |= KVM_PGTABLE_PROT_X; + prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; hyp_spin_lock(&host_kvm.lock); ret = host_stage2_adjust_range(addr, &range); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 2689fcb7901d..e25d829587b9 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -452,6 +452,8 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels; pgt->mm_ops = mm_ops; pgt->mmu = NULL; + pgt->force_pte_cb = NULL; + return 0; } @@ -489,6 +491,9 @@ struct stage2_map_data { void *memcache; struct kvm_pgtable_mm_ops *mm_ops; + + /* Force mappings to page granularity */ + bool force_pte; }; u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) @@ -602,6 +607,15 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } +static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, + struct stage2_map_data *data) +{ + if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1))) + return false; + + return kvm_block_mapping_supported(addr, end, data->phys, level); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) @@ -611,7 +625,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - if (!kvm_block_mapping_supported(addr, end, phys, level)) + if (!stage2_leaf_mapping_allowed(addr, end, level, data)) return -E2BIG; if (kvm_phys_is_valid(phys)) @@ -655,7 +669,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, if (data->anchor) return 0; - if (!kvm_block_mapping_supported(addr, end, data->phys, level)) + if (!stage2_leaf_mapping_allowed(addr, end, level, data)) return 0; data->childp = kvm_pte_follow(*ptep, data->mm_ops); @@ -785,6 +799,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, .mmu = pgt->mmu, .memcache = mc, .mm_ops = pgt->mm_ops, + .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -816,6 +831,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .memcache = mc, .mm_ops = pgt->mm_ops, .owner_id = owner_id, + .force_pte = true, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -1057,9 +1073,11 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) return kvm_pgtable_walk(pgt, addr, size, &walker); } -int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch, - struct kvm_pgtable_mm_ops *mm_ops, - enum kvm_pgtable_stage2_flags flags) + +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, + struct kvm_pgtable_mm_ops *mm_ops, + enum kvm_pgtable_stage2_flags flags, + kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; u64 vtcr = arch->vtcr; @@ -1077,6 +1095,7 @@ int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch pgt->mm_ops = mm_ops; pgt->mmu = &arch->mmu; pgt->flags = flags; + pgt->force_pte_cb = force_pte_cb; /* Ensure zeroed PGD pages are visible to the hardware walker */ dsb(ishst); -- cgit From 4505e9b624cefafa4b75d8a28e72f32076c33375 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:38 +0100 Subject: KVM: arm64: Allow populating software bits Introduce infrastructure allowing to manipulate software bits in stage-1 and stage-2 page-tables using additional entries in the kvm_pgtable_prot enum. This is heavily inspired by Marc's implementation of a similar feature in the NV patch series, but adapted to allow stage-1 changes as well: https://lore.kernel.org/kvmarm/20210510165920.1913477-56-maz@kernel.org/ Suggested-by: Marc Zyngier Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-12-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 12 +++++++++++- arch/arm64/kvm/hyp/pgtable.c | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index ca0c039547b5..bfea573703d7 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -121,6 +121,10 @@ enum kvm_pgtable_stage2_flags { * @KVM_PGTABLE_PROT_W: Write permission. * @KVM_PGTABLE_PROT_R: Read permission. * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + * @KVM_PGTABLE_PROT_SW0: Software bit 0. + * @KVM_PGTABLE_PROT_SW1: Software bit 1. + * @KVM_PGTABLE_PROT_SW2: Software bit 2. + * @KVM_PGTABLE_PROT_SW3: Software bit 3. */ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_X = BIT(0), @@ -128,6 +132,11 @@ enum kvm_pgtable_prot { KVM_PGTABLE_PROT_R = BIT(2), KVM_PGTABLE_PROT_DEVICE = BIT(3), + + KVM_PGTABLE_PROT_SW0 = BIT(55), + KVM_PGTABLE_PROT_SW1 = BIT(56), + KVM_PGTABLE_PROT_SW2 = BIT(57), + KVM_PGTABLE_PROT_SW3 = BIT(58), }; #define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) @@ -420,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); * If there is a valid, leaf page-table entry used to translate @addr, then * relax the permissions in that entry according to the read, write and * execute permissions specified by @prot. No permissions are removed, and - * TLB invalidation is performed after updating the entry. + * TLB invalidation is performed after updating the entry. Software bits cannot + * be set or cleared using kvm_pgtable_stage2_relax_perms(). * * Return: 0 on success, negative error code on failure. */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index e25d829587b9..cff744136044 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -357,6 +357,7 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; + attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; return 0; @@ -558,6 +559,7 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; + attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; *ptep = attr; return 0; @@ -1025,6 +1027,9 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, u32 level; kvm_pte_t set = 0, clr = 0; + if (prot & KVM_PTE_LEAF_ATTR_HI_SW) + return -EINVAL; + if (prot & KVM_PGTABLE_PROT_R) set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; -- cgit From ec250a67ea8db6209918a389554cf3aec0395b1f Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:39 +0100 Subject: KVM: arm64: Add helpers to tag shared pages in SW bits We will soon start annotating shared pages in page-tables in nVHE protected mode. Define all the states in which a page can be (owned, shared and owned, shared and borrowed), and provide helpers allowing to convert this into SW bits annotations using the matching prot attributes. Reviewed-by: Fuad Tabba Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-13-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 9c227d87c36d..87b1690c439f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -12,6 +12,32 @@ #include #include +/* + * SW bits 0-1 are reserved to track the memory ownership state of each page: + * 00: The page is owned exclusively by the page-table owner. + * 01: The page is owned by the page-table owner, but is shared + * with another entity. + * 10: The page is shared with, but not owned by the page-table owner. + * 11: Reserved for future use (lending). + */ +enum pkvm_page_state { + PKVM_PAGE_OWNED = 0ULL, + PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, + PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, +}; + +#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) +static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, + enum pkvm_page_state state) +{ + return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state; +} + +static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) +{ + return prot & PKVM_PAGE_STATE_PROT_MASK; +} + struct host_kvm { struct kvm_arch arch; struct kvm_pgtable pgt; -- cgit From 39257da0e04e5cdb1e4a3ca715dc3d949fe8b059 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:40 +0100 Subject: KVM: arm64: Expose host stage-2 manipulation helpers We will need to manipulate the host stage-2 page-table from outside mem_protect.c soon. Introduce two functions allowing this, and make them usable to users of mem_protect.h. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-14-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 87b1690c439f..0849ee8fa260 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -49,6 +49,8 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); +int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 6fed6772c673..f95a5a4aa09c 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -273,6 +273,22 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) return 0; } +int host_stage2_idmap_locked(phys_addr_t addr, u64 size, + enum kvm_pgtable_prot prot) +{ + hyp_assert_lock_held(&host_kvm.lock); + + return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); +} + +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) +{ + hyp_assert_lock_held(&host_kvm.lock); + + return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + addr, size, &host_s2_pool, owner_id); +} + static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) { /* @@ -309,7 +325,7 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = host_stage2_try(__host_stage2_idmap, range.start, range.end, prot); + ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); unlock: hyp_spin_unlock(&host_kvm.lock); -- cgit From 2d77e238badb022adb364332b7d6a1d627f77145 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:41 +0100 Subject: KVM: arm64: Expose pkvm_hyp_id Allow references to the hypervisor's owner id from outside mem_protect.c. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-15-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 ++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0849ee8fa260..23316a021880 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -46,6 +46,8 @@ struct host_kvm { }; extern struct host_kvm host_kvm; +extern const u8 pkvm_hyp_id; + int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f95a5a4aa09c..ee255171945c 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -31,7 +31,7 @@ static struct hyp_pool host_s2_pool; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; -static const u8 pkvm_hyp_id = 1; +const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { -- cgit From e009dce1292c37cf8ee7c33e0887ad3c642f980f Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:42 +0100 Subject: KVM: arm64: Introduce addr_is_memory() Introduce a helper usable in nVHE protected mode to check whether a physical address is in a RAM region or not. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-16-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 23316a021880..49db0ec5a606 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -51,6 +51,7 @@ extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); +bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ee255171945c..cb023d31666e 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -197,6 +197,13 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) return false; } +bool addr_is_memory(phys_addr_t phys) +{ + struct kvm_mem_range range; + + return find_mem_range(phys, &range); +} + static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; -- cgit From 9024b3d0069ab4b8ef70cf55f0ee09e61f3a0747 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:43 +0100 Subject: KVM: arm64: Enable retrieving protections attributes of PTEs Introduce helper functions in the KVM stage-2 and stage-1 page-table manipulation library allowing to retrieve the enum kvm_pgtable_prot of a PTE. This will be useful to implement custom walkers outside of pgtable.c. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-17-qperret@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 20 +++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index bfea573703d7..027783829584 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -507,4 +507,24 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, */ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, kvm_pte_t *ptep, u32 *level); + +/** + * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a + * stage-2 Page-Table Entry. + * @pte: Page-table entry + * + * Return: protection attributes of the page-table entry in the enum + * kvm_pgtable_prot format. + */ +enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte); + +/** + * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1 + * Page-Table Entry. + * @pte: Page-table entry + * + * Return: protection attributes of the page-table entry in the enum + * kvm_pgtable_prot format. + */ +enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cff744136044..f8ceebe4982e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -363,6 +363,26 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) return 0; } +enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) +{ + enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; + u32 ap; + + if (!kvm_pte_valid(pte)) + return prot; + + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) + prot |= KVM_PGTABLE_PROT_X; + + ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); + if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) + prot |= KVM_PGTABLE_PROT_R; + else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW) + prot |= KVM_PGTABLE_PROT_RW; + + return prot; +} + static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) { /* @@ -565,6 +585,23 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p return 0; } +enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) +{ + enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; + + if (!kvm_pte_valid(pte)) + return prot; + + if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R) + prot |= KVM_PGTABLE_PROT_R; + if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) + prot |= KVM_PGTABLE_PROT_W; + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) + prot |= KVM_PGTABLE_PROT_X; + + return prot; +} + static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) { if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) -- cgit From 2c50166c62ba7f3c23c1bbdbb9324db462ddc97b Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:44 +0100 Subject: KVM: arm64: Mark host bss and rodata section as shared As the hypervisor maps the host's .bss and .rodata sections in its stage-1, make sure to tag them as shared in hyp and host page-tables. But since the hypervisor relies on the presence of these mappings, we cannot let the host in complete control of the memory regions -- it must not unshare or donate them to another entity for example. To prevent this, let's transfer the ownership of those ranges to the hypervisor itself, and share the pages back with the host. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-18-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/setup.c | 82 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0b574d106519..57c27846320f 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -58,6 +58,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, { void *start, *end, *virt = hyp_phys_to_virt(phys); unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT; + enum kvm_pgtable_prot prot; int ret, i; /* Recreate the hyp page-table using the early page allocator */ @@ -83,10 +84,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO); - if (ret) - return ret; - ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO); if (ret) return ret; @@ -95,10 +92,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO); - if (ret) - return ret; - ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP); if (ret) return ret; @@ -117,6 +110,24 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, return ret; } + /* + * Map the host's .bss and .rodata sections RO in the hypervisor, but + * transfer the ownership from the host to the hypervisor itself to + * make sure it can't be donated or shared with another entity. + * + * The ownership transition requires matching changes in the host + * stage-2. This will be done later (see finalize_host_mappings()) once + * the hyp_vmemmap is addressable. + */ + prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); + ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot); + if (ret) + return ret; + + ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot); + if (ret) + return ret; + return 0; } @@ -148,6 +159,57 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } +static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + enum kvm_pgtable_prot prot; + enum pkvm_page_state state; + kvm_pte_t pte = *ptep; + phys_addr_t phys; + + if (!kvm_pte_valid(pte)) + return 0; + + if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) + return -EINVAL; + + phys = kvm_pte_to_phys(pte); + if (!addr_is_memory(phys)) + return 0; + + /* + * Adjust the host stage-2 mappings to match the ownership attributes + * configured in the hypervisor stage-1. + */ + state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + switch (state) { + case PKVM_PAGE_OWNED: + return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); + case PKVM_PAGE_SHARED_OWNED: + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); + break; + case PKVM_PAGE_SHARED_BORROWED: + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); + break; + default: + return -EINVAL; + } + + return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); +} + +static int finalize_host_mappings(void) +{ + struct kvm_pgtable_walker walker = { + .cb = finalize_host_mappings_walker, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -167,6 +229,10 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + ret = finalize_host_mappings(); + if (ret) + goto out; + pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, -- cgit From ad0e0139a8e163245d8f44ab4f6ec3bc9b08034d Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:45 +0100 Subject: KVM: arm64: Remove __pkvm_mark_hyp Now that we mark memory owned by the hypervisor in the host stage-2 during __pkvm_init(), we no longer need to rely on the host to explicitly mark the hyp sections later on. Remove the __pkvm_mark_hyp() hypercall altogether. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-19-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 3 +- arch/arm64/kvm/arm.c | 46 --------------------------- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 - arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ------ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 19 ----------- 5 files changed, 1 insertion(+), 77 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9f0bf2109be7..432a9ea1f02e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -63,8 +63,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 -#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20 -#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21 +#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..2f378482471b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1954,57 +1954,11 @@ static void _kvm_host_prot_finalize(void *discard) WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); } -static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) -{ - return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end); -} - -#define pkvm_mark_hyp_section(__section) \ - pkvm_mark_hyp(__pa_symbol(__section##_start), \ - __pa_symbol(__section##_end)) - static int finalize_hyp_mode(void) { - int cpu, ret; - if (!is_protected_kvm_enabled()) return 0; - ret = pkvm_mark_hyp_section(__hyp_idmap_text); - if (ret) - return ret; - - ret = pkvm_mark_hyp_section(__hyp_text); - if (ret) - return ret; - - ret = pkvm_mark_hyp_section(__hyp_rodata); - if (ret) - return ret; - - ret = pkvm_mark_hyp_section(__hyp_bss); - if (ret) - return ret; - - ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size); - if (ret) - return ret; - - for_each_possible_cpu(cpu) { - phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]); - phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order()); - - ret = pkvm_mark_hyp(start, end); - if (ret) - return ret; - - start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu)); - end = start + PAGE_SIZE; - ret = pkvm_mark_hyp(start, end); - if (ret) - return ret; - } - /* * Flip the static key upfront as that may no longer be possible * once the host stage 2 is installed. diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 49db0ec5a606..0118527b07b0 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -49,7 +49,6 @@ extern struct host_kvm host_kvm; extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); -int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 1632f001f4ed..7900d5b66ba3 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -163,14 +163,6 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) { cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } - -static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt) -{ - DECLARE_REG(phys_addr_t, start, host_ctxt, 1); - DECLARE_REG(phys_addr_t, end, host_ctxt, 2); - - cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end); -} typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -196,7 +188,6 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_create_mappings), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), - HANDLE_FUNC(__pkvm_mark_hyp), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index cb023d31666e..2991dc6996b9 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -339,25 +339,6 @@ unlock: return ret; } -int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) -{ - int ret; - - /* - * host_stage2_unmap_dev_all() currently relies on MMIO mappings being - * non-persistent, so don't allow changing page ownership in MMIO range. - */ - if (!range_is_memory(start, end)) - return -EINVAL; - - hyp_spin_lock(&host_kvm.lock); - ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, - start, end - start, &host_s2_pool, pkvm_hyp_id); - hyp_spin_unlock(&host_kvm.lock); - - return ret != -EAGAIN ? ret : 0; -} - void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; -- cgit From f9370010e92638f66473baf342e19de940403362 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:46 +0100 Subject: KVM: arm64: Refactor protected nVHE stage-1 locking Refactor the hypervisor stage-1 locking in nVHE protected mode to expose a new pkvm_create_mappings_locked() function. This will be used in later patches to allow walking and changing the hypervisor stage-1 without releasing the lock. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-20-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 1 + arch/arm64/kvm/hyp/nvhe/mm.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 8ec3a5a7744b..c76d7136ed9b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -23,6 +23,7 @@ int hyp_map_vectors(void); int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot); unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index a8efdf0f9003..6fbe8e8030f6 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -67,13 +67,15 @@ out: return addr; } -int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) +int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot) { unsigned long start = (unsigned long)from; unsigned long end = (unsigned long)to; unsigned long virt_addr; phys_addr_t phys; + hyp_assert_lock_held(&pkvm_pgd_lock); + start = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -81,7 +83,8 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) int err; phys = hyp_virt_to_phys((void *)virt_addr); - err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot); + err = kvm_pgtable_hyp_map(&pkvm_pgtable, virt_addr, PAGE_SIZE, + phys, prot); if (err) return err; } @@ -89,6 +92,17 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return 0; } +int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) +{ + int ret; + + hyp_spin_lock(&pkvm_pgd_lock); + ret = pkvm_create_mappings_locked(from, to, prot); + hyp_spin_unlock(&pkvm_pgd_lock); + + return ret; +} + int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) { unsigned long start, end; -- cgit From 66c57edd3bc79e3527daaae8123f72ecd1e3fa25 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:47 +0100 Subject: KVM: arm64: Restrict EL2 stage-1 changes in protected mode The host kernel is currently able to change EL2 stage-1 mappings without restrictions thanks to the __pkvm_create_mappings() hypercall. But in a world where the host is no longer part of the TCB, this clearly poses a problem. To fix this, introduce a new hypercall to allow the host to share a physical memory page with the hypervisor, and remove the __pkvm_create_mappings() variant. The new hypercall implements ownership and permission checks before allowing the sharing operation, and it annotates the shared page in the hypervisor stage-1 and host stage-2 page-tables. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-21-qperret@google.com --- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 ++-- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 88 +++++++++++++++++++++++++++ arch/arm64/kvm/mmu.c | 28 +++++++-- 5 files changed, 118 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 432a9ea1f02e..aed2aa61766a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -59,7 +59,7 @@ #define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 #define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 -#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16 +#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16 #define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 0118527b07b0..03e604f842e2 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -49,6 +49,7 @@ extern struct host_kvm host_kvm; extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); +int __pkvm_host_share_hyp(u64 pfn); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 7900d5b66ba3..2da6aa8da868 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -140,14 +140,11 @@ static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot); } -static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt) +static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(unsigned long, start, host_ctxt, 1); - DECLARE_REG(unsigned long, size, host_ctxt, 2); - DECLARE_REG(unsigned long, phys, host_ctxt, 3); - DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4); + DECLARE_REG(u64, pfn, host_ctxt, 1); - cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot); + cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn); } static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) @@ -185,7 +182,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_restore_aprs), HANDLE_FUNC(__pkvm_init), HANDLE_FUNC(__pkvm_cpu_set_vector), - HANDLE_FUNC(__pkvm_create_mappings), + HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), }; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2991dc6996b9..8165390d3ec9 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -339,6 +339,94 @@ unlock: return ret; } +static inline bool check_prot(enum kvm_pgtable_prot prot, + enum kvm_pgtable_prot required, + enum kvm_pgtable_prot denied) +{ + return (prot & (required | denied)) == required; +} + +int __pkvm_host_share_hyp(u64 pfn) +{ + phys_addr_t addr = hyp_pfn_to_phys(pfn); + enum kvm_pgtable_prot prot, cur; + void *virt = __hyp_va(addr); + enum pkvm_page_state state; + kvm_pte_t pte; + int ret; + + if (!addr_is_memory(addr)) + return -EINVAL; + + hyp_spin_lock(&host_kvm.lock); + hyp_spin_lock(&pkvm_pgd_lock); + + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); + if (ret) + goto unlock; + if (!pte) + goto map_shared; + + /* + * Check attributes in the host stage-2 PTE. We need the page to be: + * - mapped RWX as we're sharing memory; + * - not borrowed, as that implies absence of ownership. + * Otherwise, we can't let it got through + */ + cur = kvm_pgtable_stage2_pte_prot(pte); + prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); + if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { + ret = -EPERM; + goto unlock; + } + + state = pkvm_getstate(cur); + if (state == PKVM_PAGE_OWNED) + goto map_shared; + + /* + * Tolerate double-sharing the same page, but this requires + * cross-checking the hypervisor stage-1. + */ + if (state != PKVM_PAGE_SHARED_OWNED) { + ret = -EPERM; + goto unlock; + } + + ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); + if (ret) + goto unlock; + + /* + * If the page has been shared with the hypervisor, it must be + * already mapped as SHARED_BORROWED in its stage-1. + */ + cur = kvm_pgtable_hyp_pte_prot(pte); + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + if (!check_prot(cur, prot, ~prot)) + ret = EPERM; + goto unlock; + +map_shared: + /* + * If the page is not yet shared, adjust mappings in both page-tables + * while both locks are held. + */ + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); + ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); + BUG_ON(ret); + + prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); + ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); + BUG_ON(ret); + +unlock: + hyp_spin_unlock(&pkvm_pgd_lock); + hyp_spin_unlock(&host_kvm.lock); + + return ret; +} + void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..cbab146cda6a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -259,10 +259,8 @@ static int __create_hyp_mappings(unsigned long start, unsigned long size, { int err; - if (!kvm_host_owns_hyp_mappings()) { - return kvm_call_hyp_nvhe(__pkvm_create_mappings, - start, size, phys, prot); - } + if (WARN_ON(!kvm_host_owns_hyp_mappings())) + return -EINVAL; mutex_lock(&kvm_hyp_pgd_mutex); err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot); @@ -282,6 +280,21 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) } } +static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) +{ + phys_addr_t addr; + int ret; + + for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) { + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, + __phys_to_pfn(addr)); + if (ret) + return ret; + } + + return 0; +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -302,6 +315,13 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) if (is_kernel_in_hyp_mode()) return 0; + if (!kvm_host_owns_hyp_mappings()) { + if (WARN_ON(prot != PAGE_HYP)) + return -EPERM; + return pkvm_share_hyp(kvm_kaddr_to_phys(from), + kvm_kaddr_to_phys(to)); + } + start = start & PAGE_MASK; end = PAGE_ALIGN(end); -- cgit From 64a80fb766f9a91e26930bfc56d8e7c12425df12 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Mon, 9 Aug 2021 16:24:48 +0100 Subject: KVM: arm64: Make __pkvm_create_mappings static The __pkvm_create_mappings() function is no longer used outside of nvhe/mm.c, make it static. Signed-off-by: Quentin Perret Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210809152448.1810400-22-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 2 -- arch/arm64/kvm/hyp/nvhe/mm.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index c76d7136ed9b..c9a8f535212e 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -24,8 +24,6 @@ int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); -int __pkvm_create_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot); unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 6fbe8e8030f6..2fabeceb889a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -23,8 +23,8 @@ u64 __io_map_base; struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; unsigned int hyp_memblock_nr; -int __pkvm_create_mappings(unsigned long start, unsigned long size, - unsigned long phys, enum kvm_pgtable_prot prot) +static int __pkvm_create_mappings(unsigned long start, unsigned long size, + unsigned long phys, enum kvm_pgtable_prot prot) { int err; -- cgit From 6fadc1241c33fe0228c94bc6a1aa6c1da8872e8b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 11 Aug 2021 08:57:06 +0530 Subject: KVM: arm64: perf: Replace '0xf' instances with ID_AA64DFR0_PMUVER_IMP_DEF ID_AA64DFR0_PMUVER_IMP_DEF which indicate implementation defined PMU, never actually gets used although there are '0xf' instances scattered all around. Just do the macro replacement to improve readability. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Marc Zyngier Cc: linux-perf-users@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier --- arch/arm64/kvm/perf.c | 2 +- arch/arm64/kvm/pmu-emul.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c index 151c31fb9860..f9bb3b14130e 100644 --- a/arch/arm64/kvm/perf.c +++ b/arch/arm64/kvm/perf.c @@ -50,7 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { int kvm_perf_init(void) { - if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled()) + if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled()) static_branch_enable(&kvm_arm_pmu_available); return perf_register_guest_info_callbacks(&kvm_guest_cbs); diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f33825c995cb..60f89bdbeebb 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -745,7 +745,7 @@ int kvm_pmu_probe_pmuver(void) struct perf_event_attr attr = { }; struct perf_event *event; struct arm_pmu *pmu; - int pmuver = 0xf; + int pmuver = ID_AA64DFR0_PMUVER_IMP_DEF; /* * Create a dummy event that only counts user cycles. As we'll never @@ -770,7 +770,7 @@ int kvm_pmu_probe_pmuver(void) if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", PTR_ERR(event)); - return 0xf; + return ID_AA64DFR0_PMUVER_IMP_DEF; } if (event->pmu) { @@ -923,7 +923,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!vcpu->kvm->arch.pmuver) vcpu->kvm->arch.pmuver = kvm_pmu_probe_pmuver(); - if (vcpu->kvm->arch.pmuver == 0xf) + if (vcpu->kvm->arch.pmuver == ID_AA64DFR0_PMUVER_IMP_DEF) return -ENODEV; switch (attr->attr) { -- cgit From b31578f627177bda5c16894e3170a7a6a1236136 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 10 Aug 2021 09:59:42 +0530 Subject: arm64/mm: Define ID_AA64MMFR0_TGRAN_2_SHIFT Streamline the Stage-2 TGRAN value extraction from ID_AA64MMFR0 register by adding a page size agnostic ID_AA64MMFR0_TGRAN_2_SHIFT. This is similar to the existing Stage-1 TGRAN shift i.e ID_AA64MMFR0_TGRAN_SHIFT. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628569782-30213-1-git-send-email-anshuman.khandual@arm.com --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kvm/reset.c | 17 ++--------------- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7b9c3acba684..943d31d92b5b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1028,14 +1028,17 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN4_2_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN4_SUPPORTED #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #elif defined(CONFIG_ARM64_16K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN16_2_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN16_SUPPORTED #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0xF #elif defined(CONFIG_ARM64_64K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_2_SHIFT ID_AA64MMFR0_TGRAN64_2_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_TGRAN64_SUPPORTED #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX 0x7 #endif diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index cba7872d69a8..20588220fe66 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -311,7 +311,7 @@ u32 get_kvm_ipa_limit(void) int kvm_set_ipa_limit(void) { - unsigned int parange, tgran_2; + unsigned int parange; u64 mmfr0; mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); @@ -322,20 +322,7 @@ int kvm_set_ipa_limit(void) * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at * Stage-2. If not, things will stop very quickly. */ - switch (PAGE_SIZE) { - default: - case SZ_4K: - tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT; - break; - case SZ_16K: - tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT; - break; - case SZ_64K: - tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT; - break; - } - - switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { + switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) { case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE: kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; -- cgit From 5e5df9571c319fb107d7a523cc96fcc99961ee70 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 11 Aug 2021 16:41:15 +0530 Subject: KVM: arm64: Restrict IPA size to maximum 48 bits on 4K and 16K page size Even though ID_AA64MMFR0.PARANGE reports 52 bit PA size support, it cannot be enabled as guest IPA size on 4K or 16K page size configurations. Hence kvm_ipa_limit must be restricted to 48 bits. This change achieves required IPA capping. Before the commit c9b69a0cf0b4 ("KVM: arm64: Don't constrain maximum IPA size based on host configuration"), the problem here would have been just latent via PHYS_MASK_SHIFT (which earlier in turn capped kvm_ipa_limit), which remains capped at 48 bits on 4K and 16K configs. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Fixes: c9b69a0cf0b4 ("KVM: arm64: Don't constrain maximum IPA size based on host configuration") Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628680275-16578-1-git-send-email-anshuman.khandual@arm.com --- arch/arm64/kvm/reset.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 20588220fe66..18ffc6ad67b8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -317,6 +317,14 @@ int kvm_set_ipa_limit(void) mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_PARANGE_SHIFT); + /* + * IPA size beyond 48 bits could not be supported + * on either 4K or 16K page size. Hence let's cap + * it to 48 bits, in case it's reported as larger + * on the system. + */ + if (PAGE_SIZE != SZ_64K) + parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48); /* * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at -- cgit From ed5aacc81cd41efc4d561e14af408d1003f7b855 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 26 May 2021 00:03:37 -0700 Subject: xtensa: fix kconfig unmet dependency warning for HAVE_FUTEX_CMPXCHG XTENSA should only select HAVE_FUTEX_CMPXCHG when FUTEX is set/enabled. This prevents a kconfig warning. WARNING: unmet direct dependencies detected for HAVE_FUTEX_CMPXCHG Depends on [n]: FUTEX [=n] Selected by [y]: - XTENSA [=y] && !MMU [=n] Fixes: d951ba21b959 ("xtensa: nommu: select HAVE_FUTEX_CMPXCHG") Signed-off-by: Randy Dunlap Cc: Max Filippov Cc: Chris Zankel Cc: linux-xtensa@linux-xtensa.org Message-Id: <20210526070337.28130-1-rdunlap@infradead.org> Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 3878880469d1..b843902ad9fd 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -30,7 +30,7 @@ config XTENSA select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER - select HAVE_FUTEX_CMPXCHG if !MMU + select HAVE_FUTEX_CMPXCHG if !MMU && FUTEX select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING select HAVE_PCI -- cgit From 43ba2237281a59b40ea2393da9e89ea3a68de2a5 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 9 Jul 2021 04:13:23 -0700 Subject: xtensa: add fairness to IRQ handling Track which IRQs have been served at each level to make sure that no IRQ is served more than once while other IRQs at the same level are pending. Signed-off-by: Max Filippov --- arch/xtensa/kernel/traps.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index efc3a29cde80..874b6efc6fb3 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -268,6 +268,7 @@ void do_interrupt(struct pt_regs *regs) XCHAL_INTLEVEL7_MASK, }; struct pt_regs *old_regs; + unsigned unhandled = ~0u; trace_hardirqs_off(); @@ -283,6 +284,10 @@ void do_interrupt(struct pt_regs *regs) for (level = LOCKLEVEL; level > 0; --level) { if (int_at_level & int_level_mask[level]) { int_at_level &= int_level_mask[level]; + if (int_at_level & unhandled) + int_at_level &= unhandled; + else + unhandled |= int_level_mask[level]; break; } } @@ -290,6 +295,8 @@ void do_interrupt(struct pt_regs *regs) if (level == 0) break; + /* clear lowest pending irq in the unhandled mask */ + unhandled ^= (int_at_level & -int_at_level); do_IRQ(__ffs(int_at_level), regs); } -- cgit From 13066c303769b5c6bc67c0b990a4eb80058fb1b4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 10 Aug 2021 16:08:06 -0700 Subject: xtensa: ISS: don't use string pointer before NULL check Move strlen call inside the if block that checks string pointer for NULL. While at it also fix the following coccicheck warning: ./arch/xtensa/platforms/iss/console.c:204:10-11: WARNING comparing pointer to 0. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Max Filippov --- arch/xtensa/platforms/iss/console.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 21184488c277..f6e5fc5579be 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -186,10 +186,10 @@ late_initcall(rs_init); static void iss_console_write(struct console *co, const char *s, unsigned count) { - int len = strlen(s); - - if (s != 0 && *s != 0) + if (s && *s != 0) { + int len = strlen(s); simc_write(1, s, count < len ? count : len); + } } static struct tty_driver* iss_console_device(struct console *c, int *index) -- cgit From ef71db4845c02fe9f898e09046857ab1a9ff4be8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:37:28 +0900 Subject: xtensa: remove unneeded exports These are not used in any of subdirectories. Signed-off-by: Masahiro Yamada Message-Id: <20210811163731.186125-1-masahiroy@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index e9c8f064c44d..6fab7fc87579 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -17,7 +17,6 @@ variant-y := $(patsubst "%",%,$(CONFIG_XTENSA_VARIANT_NAME)) VARIANT = $(variant-y) -export VARIANT ifneq ($(VARIANT),) ifdef cross_compiling @@ -34,7 +33,6 @@ platform-$(CONFIG_XTENSA_PLATFORM_ISS) := iss platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA) := xtfpga PLATFORM = $(platform-y) -export PLATFORM # temporarily until string.h is fixed KBUILD_CFLAGS += -ffreestanding -D__linux__ -- cgit From c548584438d1a447900797a40571d95fb36a605f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:37:29 +0900 Subject: xtensa: do not build variants directory None of arch/xtensa/variants/*/ has Makefile, so 'buildvar' is always empty. Perhaps, downstream variant code might be dropped in, but given the fact that none of upstream variants builds anything in their variant directory, I doubt this is needed. Signed-off-by: Masahiro Yamada Message-Id: <20210811163731.186125-2-masahiroy@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 6fab7fc87579..3c0573fe6761 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -55,9 +55,7 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs)) KBUILD_DEFCONFIG := iss_defconfig -# Only build variant and/or platform if it includes a Makefile - -buildvar := $(shell test -e $(srctree)/arch/xtensa/variants/$(VARIANT)/Makefile && echo arch/xtensa/variants/$(VARIANT)/) +# Only build platform if it includes a Makefile buildplf := $(shell test -e $(srctree)/arch/xtensa/platforms/$(PLATFORM)/Makefile && echo arch/xtensa/platforms/$(PLATFORM)/) # Find libgcc.a @@ -66,7 +64,7 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) head-y := arch/xtensa/kernel/head.o core-y += arch/xtensa/kernel/ arch/xtensa/mm/ -core-y += $(buildvar) $(buildplf) +core-y += $(buildplf) core-y += arch/xtensa/boot/dts/ libs-y += arch/xtensa/lib/ $(LIBGCC) -- cgit From 59210499a02a58e5b2c6da763d08adecb760d74b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:37:30 +0900 Subject: xtensa: build platform directories unconditionally All of arch/xtensa/platforms/*/ have Makefile. You do not need to check the presence of Makefile. Signed-off-by: Masahiro Yamada Message-Id: <20210811163731.186125-3-masahiroy@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/Makefile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 3c0573fe6761..093e87b889be 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -32,8 +32,6 @@ platform-$(CONFIG_XTENSA_PLATFORM_XT2000) := xt2000 platform-$(CONFIG_XTENSA_PLATFORM_ISS) := iss platform-$(CONFIG_XTENSA_PLATFORM_XTFPGA) := xtfpga -PLATFORM = $(platform-y) - # temporarily until string.h is fixed KBUILD_CFLAGS += -ffreestanding -D__linux__ KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals @@ -55,16 +53,13 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs)) KBUILD_DEFCONFIG := iss_defconfig -# Only build platform if it includes a Makefile -buildplf := $(shell test -e $(srctree)/arch/xtensa/platforms/$(PLATFORM)/Makefile && echo arch/xtensa/platforms/$(PLATFORM)/) - # Find libgcc.a LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) head-y := arch/xtensa/kernel/head.o core-y += arch/xtensa/kernel/ arch/xtensa/mm/ -core-y += $(buildplf) +core-y += arch/xtensa/platforms/$(platform-y)/ core-y += arch/xtensa/boot/dts/ libs-y += arch/xtensa/lib/ $(LIBGCC) -- cgit From 7b7cec477fc3cd42ce565dfc3e53f144504fc95c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:37:31 +0900 Subject: xtensa: move core-y in arch/xtensa/Makefile to arch/xtensa/Kbuild Use obj-y to clean up Makefile. Signed-off-by: Masahiro Yamada Message-Id: <20210811163731.186125-4-masahiroy@kernel.org> Signed-off-by: Max Filippov --- arch/xtensa/Kbuild | 1 + arch/xtensa/Makefile | 3 --- arch/xtensa/platforms/Makefile | 4 ++++ 3 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 arch/xtensa/platforms/Makefile (limited to 'arch') diff --git a/arch/xtensa/Kbuild b/arch/xtensa/Kbuild index a4e40e534e6a..fd12f61745ba 100644 --- a/arch/xtensa/Kbuild +++ b/arch/xtensa/Kbuild @@ -1 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ mm/ platforms/ boot/dts/ diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 093e87b889be..96714ef7c89e 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -58,9 +58,6 @@ KBUILD_DEFCONFIG := iss_defconfig LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) head-y := arch/xtensa/kernel/head.o -core-y += arch/xtensa/kernel/ arch/xtensa/mm/ -core-y += arch/xtensa/platforms/$(platform-y)/ -core-y += arch/xtensa/boot/dts/ libs-y += arch/xtensa/lib/ $(LIBGCC) diff --git a/arch/xtensa/platforms/Makefile b/arch/xtensa/platforms/Makefile new file mode 100644 index 000000000000..e2e7e0726979 --- /dev/null +++ b/arch/xtensa/platforms/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_XTENSA_PLATFORM_XT2000) += xt2000/ +obj-$(CONFIG_XTENSA_PLATFORM_ISS) += iss/ +obj-$(CONFIG_XTENSA_PLATFORM_XTFPGA) += xtfpga/ -- cgit From 12593568d7319c34c72038ea799ab1bd0f0eb01c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 11 Aug 2021 18:36:25 +0100 Subject: KVM: arm64: Return -EPERM from __pkvm_host_share_hyp() Fix the error code returned by __pkvm_host_share_hyp() when the host attempts to share with EL2 a page that has already been shared with another entity. Reported-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210811173630.2536721-1-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 8165390d3ec9..6ec695311498 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -404,7 +404,7 @@ int __pkvm_host_share_hyp(u64 pfn) cur = kvm_pgtable_hyp_pte_prot(pte); prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); if (!check_prot(cur, prot, ~prot)) - ret = EPERM; + ret = -EPERM; goto unlock; map_shared: -- cgit From 59a27e1122133831111f9e2e40fec2307d742487 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 21 Jul 2021 09:59:37 +0200 Subject: riscv: Optimize kernel virtual address conversion macro The current test in kernel_mapping_va_to_pa only applies when CONFIG_XIP_KERNEL is set, so use IS_ENABLED to optimize this macro at compile-time in standard kernels that do not require this test. Signed-off-by: Alexandre Ghiti Tested-by: Emil Renner Berthing Reviewed-by: Jisheng Zhang Reviewed-By: Vitaly Wool Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index cca8764aed83..d40d77d76d82 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -122,7 +122,7 @@ extern struct kernel_mapping kernel_map; #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset) #define kernel_mapping_va_to_pa(y) ({ \ unsigned long _y = y; \ - (_y < kernel_map.virt_addr + XIP_OFFSET) ? \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ ((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) : \ ((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ }) -- cgit From 0aba691a7443a7541c1dc56423e0c92cc6ea7164 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 23 Jul 2021 15:01:24 +0200 Subject: riscv: Introduce va_kernel_pa_offset for 32-bit kernel va_kernel_pa_offset was only used for 64-bit as the kernel mapping lies in the linear mapping for 32-bit kernel and then only the offset between the PAGE_OFFSET and the kernel load address is needed. But this distinction complexifies the code with #ifdefs and especially with a separate definition of the address conversions macros. Simplify the code by defining this variable for both 32-bit and 64-bit. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 15 ++------------- arch/riscv/mm/init.c | 3 --- 2 files changed, 2 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index d40d77d76d82..4da92cf28a19 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -91,10 +91,8 @@ struct kernel_mapping { uintptr_t size; /* Offset between linear mapping virtual address and kernel load address */ unsigned long va_pa_offset; -#ifdef CONFIG_64BIT /* Offset between kernel mapping virtual address and kernel load address */ unsigned long va_kernel_pa_offset; -#endif unsigned long va_kernel_xip_pa_offset; #ifdef CONFIG_XIP_KERNEL uintptr_t xiprom; @@ -104,11 +102,11 @@ struct kernel_mapping { extern struct kernel_mapping kernel_map; -#ifdef CONFIG_64BIT #define is_kernel_mapping(x) \ ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) + #define is_linear_mapping(x) \ - ((x) >= PAGE_OFFSET && (x) < kernel_map.virt_addr) + ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ @@ -132,15 +130,6 @@ extern struct kernel_mapping kernel_map; is_linear_mapping(_x) ? \ linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \ }) -#else -#define is_kernel_mapping(x) \ - ((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size)) -#define is_linear_mapping(x) \ - ((x) >= PAGE_OFFSET) - -#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + kernel_map.va_pa_offset)) -#define __va_to_pa_nodebug(x) ((unsigned long)(x) - kernel_map.va_pa_offset) -#endif /* CONFIG_64BIT */ #ifdef CONFIG_DEBUG_VIRTUAL extern phys_addr_t __virt_to_phys(unsigned long x); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 269fc648ef3d..263ae055e81a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -552,11 +552,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; #endif - kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; -#ifdef CONFIG_64BIT kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; -#endif pfn_base = PFN_DOWN(kernel_map.phys_addr); -- cgit From 526f83df1d83b9c95e571ea5d4dff12be1b215ec Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 23 Jul 2021 15:01:25 +0200 Subject: riscv: Get rid of map_size parameter to create_kernel_page_table The kernel must always be mapped using PMD_SIZE, and this is already the case, this just simplifies create_kernel_page_table. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 263ae055e81a..83d0ed915914 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -495,36 +495,35 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) #endif #ifdef CONFIG_XIP_KERNEL -static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size, +static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) { uintptr_t va, end_va; /* Map the flash resident part */ end_va = kernel_map.virt_addr + kernel_map.xiprom_sz; - for (va = kernel_map.virt_addr; va < end_va; va += map_size) + for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.xiprom + (va - kernel_map.virt_addr), - map_size, PAGE_KERNEL_EXEC); + PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; - for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += map_size) + for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), - map_size, PAGE_KERNEL); + PMD_SIZE, PAGE_KERNEL); } #else -static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size, - bool early) +static void __init create_kernel_page_table(pgd_t *pgdir, bool early) { uintptr_t va, end_va; end_va = kernel_map.virt_addr + kernel_map.size; - for (va = kernel_map.virt_addr; va < end_va; va += map_size) + for (va = kernel_map.virt_addr; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - kernel_map.virt_addr), - map_size, + PMD_SIZE, early ? PAGE_KERNEL_EXEC : pgprot_from_va(va)); } @@ -533,7 +532,6 @@ static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size, asmlinkage void __init setup_vm(uintptr_t dtb_pa) { uintptr_t __maybe_unused pa; - uintptr_t map_size; #ifndef __PAGETABLE_PMD_FOLDED pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif @@ -557,15 +555,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pfn_base = PFN_DOWN(kernel_map.phys_addr); - /* - * Enforce boot alignment requirements of RV32 and - * RV64 by only allowing PMD or PGD mappings. - */ - map_size = PMD_SIZE; - /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); - BUG_ON((kernel_map.phys_addr % map_size) != 0); + BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); pt_ops.alloc_pte = alloc_pte_early; pt_ops.get_pte_virt = get_pte_virt_early; @@ -602,7 +594,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * us to reach paging_init(). We map all memory banks later * in setup_vm_final() below. */ - create_kernel_page_table(early_pg_dir, map_size, true); + create_kernel_page_table(early_pg_dir, true); #ifndef __PAGETABLE_PMD_FOLDED /* Setup early PMD for DTB */ @@ -718,7 +710,7 @@ static void __init setup_vm_final(void) #ifdef CONFIG_64BIT /* Map the kernel */ - create_kernel_page_table(swapper_pg_dir, PMD_SIZE, false); + create_kernel_page_table(swapper_pg_dir, false); #endif /* Clear fixmap PTE and PMD mappings */ -- cgit From 6f3e5fd241c33d2407f7aaa930b141af6ad7c35b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 23 Jul 2021 15:01:26 +0200 Subject: riscv: Use __maybe_unused instead of #ifdefs around variable declarations This allows to simplify the code and make it more readable. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 83d0ed915914..a456b5a68d99 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -532,9 +532,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) asmlinkage void __init setup_vm(uintptr_t dtb_pa) { uintptr_t __maybe_unused pa; -#ifndef __PAGETABLE_PMD_FOLDED - pmd_t fix_bmap_spmd, fix_bmap_epmd; -#endif + pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; kernel_map.virt_addr = KERNEL_LINK_ADDR; -- cgit From 977765ce319b98939205cf07aa1d76150713c69b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 23 Jul 2021 15:01:27 +0200 Subject: riscv: Simplify BUILTIN_DTB device tree mapping handling __PAGETABLE_PMD_FOLDED defines a 2-level page table that is only used in 32-bit kernel, so there is no need to check for CONFIG_64BIT in #ifndef __PAGETABLE_PMD_FOLDED and vice-versa. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a456b5a68d99..a93822df9d81 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -607,18 +607,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ -#ifdef CONFIG_64BIT /* * __va can't be used since it would return a linear mapping address * whereas dtb_early_va will be used before setup_vm_final installs * the linear mapping. */ dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); -#else - dtb_early_va = __va(dtb_pa); -#endif /* CONFIG_64BIT */ #endif /* CONFIG_BUILTIN_DTB */ -#else +#else /* __PAGETABLE_PMD_FOLDED */ #ifndef CONFIG_BUILTIN_DTB /* Create two consecutive PGD mappings for FDT early scan */ pa = dtb_pa & ~(PGDIR_SIZE - 1); @@ -628,13 +624,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ -#ifdef CONFIG_64BIT - dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); -#else dtb_early_va = __va(dtb_pa); -#endif /* CONFIG_64BIT */ #endif /* CONFIG_BUILTIN_DTB */ -#endif +#endif /* __PAGETABLE_PMD_FOLDED */ dtb_early_pa = dtb_pa; /* -- cgit From fe45ffa4c505783637233609b677446020738b87 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 23 Jul 2021 15:01:28 +0200 Subject: riscv: Move early fdt mapping creation in its own function The code that handles the early fdt mapping is hard to read and does not create the same mapping size depending on the kernel: - for 64-bit, 2 PMD entries are used which amounts to a 4MB mapping - for 32-bit, 2 PGDIR entries are used which amounts to a 8MB mapping So keep using 2 PMD entries for 64-bit and use only one PGD entry for 32-bit needed to cover 4MB. Move that into a new function called create_fdt_early_page_table which, using the same naming as create_kernel_page_table. Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 76 +++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a93822df9d81..fdf093d01c6f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -222,6 +222,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) @@ -302,7 +303,6 @@ static void __init create_pte_mapping(pte_t *ptep, static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); -static pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) @@ -388,6 +388,7 @@ static void __init create_pmd_mapping(pmd_t *pmdp, #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next fixmap_pte +#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) #endif void __init create_pgd_mapping(pgd_t *pgdp, @@ -529,9 +530,44 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) } #endif +/* + * Setup a 4MB mapping that encompasses the device tree: for 64-bit kernel, + * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR + * entry. + */ +static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) +{ +#ifndef CONFIG_BUILTIN_DTB + uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); + + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa, + PGDIR_SIZE, + IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); + + if (IS_ENABLED(CONFIG_64BIT)) { + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + pa, PMD_SIZE, PAGE_KERNEL); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); + } + + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); +#else + /* + * For 64-bit kernel, __va can't be used since it would return a linear + * mapping address whereas dtb_early_va will be used before + * setup_vm_final installs the linear mapping. For 32-bit kernel, as the + * kernel is mapped in the linear mapping, that makes no difference. + */ + dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); +#endif + + dtb_early_pa = dtb_pa; +} + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t __maybe_unused pa; pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; kernel_map.virt_addr = KERNEL_LINK_ADDR; @@ -594,40 +630,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) */ create_kernel_page_table(early_pg_dir, true); -#ifndef __PAGETABLE_PMD_FOLDED - /* Setup early PMD for DTB */ - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE); -#ifndef CONFIG_BUILTIN_DTB - /* Create two consecutive PMD mappings for FDT early scan */ - pa = dtb_pa & ~(PMD_SIZE - 1); - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, - pa, PMD_SIZE, PAGE_KERNEL); - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, - pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); - dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); -#else /* CONFIG_BUILTIN_DTB */ - /* - * __va can't be used since it would return a linear mapping address - * whereas dtb_early_va will be used before setup_vm_final installs - * the linear mapping. - */ - dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); -#endif /* CONFIG_BUILTIN_DTB */ -#else /* __PAGETABLE_PMD_FOLDED */ -#ifndef CONFIG_BUILTIN_DTB - /* Create two consecutive PGD mappings for FDT early scan */ - pa = dtb_pa & ~(PGDIR_SIZE - 1); - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - pa, PGDIR_SIZE, PAGE_KERNEL); - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, - pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); - dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); -#else /* CONFIG_BUILTIN_DTB */ - dtb_early_va = __va(dtb_pa); -#endif /* CONFIG_BUILTIN_DTB */ -#endif /* __PAGETABLE_PMD_FOLDED */ - dtb_early_pa = dtb_pa; + /* Setup early mapping for FDT early scan */ + create_fdt_early_page_table(early_pg_dir, dtb_pa); /* * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap -- cgit From 666173ee32e2d6f2853bd54b11b8127ac97eb019 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 23 Jul 2021 10:25:35 +0800 Subject: MIPS: generic: Allow generating FIT image for Marduk board Marduk is based on IMG pistachio SoC. The platform is using MIPS UHI booting protocol and does have a proper devicetree implement, thus it could be a part of generic kernel. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/Makefile | 1 + arch/mips/boot/dts/img/Makefile | 2 ++ arch/mips/generic/Kconfig | 6 ++++++ arch/mips/generic/Platform | 1 + arch/mips/generic/board-marduk.its.S | 22 ++++++++++++++++++++++ 5 files changed, 32 insertions(+) create mode 100644 arch/mips/generic/board-marduk.its.S (limited to 'arch') diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index 60bd7d2a9ad8..188301164d9e 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -2,6 +2,7 @@ subdir-$(CONFIG_BMIPS_GENERIC) += brcm subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon subdir-$(CONFIG_MACH_PISTACHIO) += img +subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += img subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img subdir-$(CONFIG_MACH_INGENIC) += ingenic subdir-$(CONFIG_LANTIQ) += lantiq diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile index 441a3c16efb0..24f6bbeadd48 100644 --- a/arch/mips/boot/dts/img/Makefile +++ b/arch/mips/boot/dts/img/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += boston.dtb +dtb-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += pistachio_marduk.dtb + dtb-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb obj-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb.o diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig index 657dd93c5e76..7dc5b3821cc6 100644 --- a/arch/mips/generic/Kconfig +++ b/arch/mips/generic/Kconfig @@ -58,6 +58,12 @@ config FIT_IMAGE_FDT_BOSTON enable this if you wish to boot on a MIPS Boston board, as it is expected by the bootloader. +config FIT_IMAGE_FDT_MARDUK + bool "Include FDT for IMG Pistachio Marduk (CI40) boards" + help + Enable this to include the FDT for the IMG Pistachio Marduk (CI40) + from Imagination Technologies in the FIT kernel image. + config FIT_IMAGE_FDT_NI169445 bool "Include FDT for NI 169445" help diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index b871af16b5b6..e1abc113b409 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -24,3 +24,4 @@ its-$(CONFIG_FIT_IMAGE_FDT_LUTON) += board-luton.its.S its-$(CONFIG_FIT_IMAGE_FDT_JAGUAR2) += board-jaguar2.its.S its-$(CONFIG_FIT_IMAGE_FDT_SERVAL) += board-serval.its.S its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S +its-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += board-marduk.its.S diff --git a/arch/mips/generic/board-marduk.its.S b/arch/mips/generic/board-marduk.its.S new file mode 100644 index 000000000000..4f633794db90 --- /dev/null +++ b/arch/mips/generic/board-marduk.its.S @@ -0,0 +1,22 @@ +/ { + images { + fdt-marduk { + description = "img,pistachio-marduk Device Tree"; + data = /incbin/("boot/dts/img/pistachio_marduk.dtb"); + type = "flat_dt"; + arch = "mips"; + compression = "none"; + hash { + algo = "sha1"; + }; + }; + }; + + configurations { + conf-marduk { + description = "Marduk Linux kernel"; + kernel = "kernel"; + fdt = "fdt-marduk"; + }; + }; +}; -- cgit From d32524a2d05791181756bb4ab4f6e0628471fde2 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 23 Jul 2021 10:25:36 +0800 Subject: MIPS: DTS: Pistachio add missing cpc and cdmm CPC and CDMM addresses are adjustable and we should tell kernel how to place them in devicetree. Note that MACH_PISTACHIO code hardcoded CDMM base to 0x1bdd0000, however it will collide with GIC address range. As we don't have any CDMM device on this platform it won't be a problem. I found another spare range, 0x1bdf0000~0x1be00000 to place CDMM instead. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/img/pistachio.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/mips/boot/dts/img/pistachio.dtsi b/arch/mips/boot/dts/img/pistachio.dtsi index dc3b7909de73..b1db8b8f446f 100644 --- a/arch/mips/boot/dts/img/pistachio.dtsi +++ b/arch/mips/boot/dts/img/pistachio.dtsi @@ -900,6 +900,16 @@ }; }; + cpc: cpc@1bde0000 { + compatible = "mti,mips-cpc"; + reg = <0x1bde0000 0x10000>; + }; + + cdmm: cdmm@1bdf0000 { + compatible = "mti,mips-cdmm"; + reg = <0x1bdf0000 0x10000>; + }; + usb_phy: usb-phy { compatible = "img,pistachio-usb-phy"; clocks = <&clk_core CLK_USB_PHY>; -- cgit From 917b64f1df2b9cec0a0859bd8d96b24679038f78 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 23 Jul 2021 10:25:41 +0800 Subject: MIPS: config: generic: Add config for Marduk board The config is minimal config to allow it boot from SD Card. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/configs/generic/board-marduk.config | 53 +++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 arch/mips/configs/generic/board-marduk.config (limited to 'arch') diff --git a/arch/mips/configs/generic/board-marduk.config b/arch/mips/configs/generic/board-marduk.config new file mode 100644 index 000000000000..05ca34cd5a73 --- /dev/null +++ b/arch/mips/configs/generic/board-marduk.config @@ -0,0 +1,53 @@ +CONFIG_FIT_IMAGE_FDT_MARDUK=y + +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y + +CONFIG_CLKSRC_PISTACHIO=y + +CONFIG_COMMON_CLK_PISTACHIO=y + +CONFIG_DMADEVICES=y +CONFIG_IMG_MDC_DMA=y + +CONFIG_GPIOLIB=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_PCH=y + +CONFIG_I2C=y +CONFIG_I2C_IMG=y + +CONFIG_MMC=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_PLTFM=y + +CONFIG_NETDEVICES=y +CONFIG_STMMAC_ETH=y +CONFIG_STMMAC_PLATFORM=y + +CONFIG_PHY_PISTACHIO_USB=y + +CONFIG_PINCTRL=y +CONFIG_PINCTRL_PISTACHIO=y + +CONFIG_RESET_PISTACHIO=y + +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_8250_DW=y + +CONFIG_SPI=y +CONFIG_SRAM=y + +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_DWC2=y + +CONFIG_CRYPTO_DEV_IMGTEC_HASH=y +CONFIG_IMGPDC_WDT=y +CONFIG_IR_IMG=y +CONFIG_CC10001_ADC=y +CONFIG_SND_SOC_IMG=y -- cgit From 104f942b2832ab1340dab34ae2dad8b31b772788 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 23 Jul 2021 10:25:42 +0800 Subject: MIPS: Retire MACH_PISTACHIO Now it can be replaced by generic kernel. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kbuild.platforms | 1 - arch/mips/Kconfig | 30 ---- arch/mips/boot/dts/Makefile | 1 - arch/mips/boot/dts/img/Makefile | 3 - arch/mips/configs/pistachio_defconfig | 316 ---------------------------------- arch/mips/pistachio/Kconfig | 14 -- arch/mips/pistachio/Makefile | 2 - arch/mips/pistachio/Platform | 6 - arch/mips/pistachio/init.c | 125 -------------- arch/mips/pistachio/irq.c | 24 --- arch/mips/pistachio/time.c | 55 ------ 11 files changed, 577 deletions(-) delete mode 100644 arch/mips/configs/pistachio_defconfig delete mode 100644 arch/mips/pistachio/Kconfig delete mode 100644 arch/mips/pistachio/Makefile delete mode 100644 arch/mips/pistachio/Platform delete mode 100644 arch/mips/pistachio/init.c delete mode 100644 arch/mips/pistachio/irq.c delete mode 100644 arch/mips/pistachio/time.c (limited to 'arch') diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index e4f6e49417a9..584081df89c2 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -21,7 +21,6 @@ platform-$(CONFIG_MIPS_MALTA) += mti-malta/ platform-$(CONFIG_MACH_NINTENDO64) += n64/ platform-$(CONFIG_NLM_COMMON) += netlogic/ platform-$(CONFIG_PIC32MZDA) += pic32/ -platform-$(CONFIG_MACH_PISTACHIO) += pistachio/ platform-$(CONFIG_RALINK) += ralink/ platform-$(CONFIG_MIKROTIK_RB532) += rb532/ platform-$(CONFIG_SGI_IP22) += sgi-ip22/ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee6087cd686..b783264243e4 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -513,35 +513,6 @@ config MACH_LOONGSON64 and Loongson-2F which will be removed), developed by the Institute of Computing Technology (ICT), Chinese Academy of Sciences (CAS). -config MACH_PISTACHIO - bool "IMG Pistachio SoC based boards" - select BOOT_ELF32 - select BOOT_RAW - select CEVT_R4K - select CLKSRC_MIPS_GIC - select COMMON_CLK - select CSRC_R4K - select DMA_NONCOHERENT - select GPIOLIB - select IRQ_MIPS_CPU - select MFD_SYSCON - select MIPS_CPU_SCACHE - select MIPS_GIC - select PINCTRL - select REGULATOR - select SYS_HAS_CPU_MIPS32_R2 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_LITTLE_ENDIAN - select SYS_SUPPORTS_MIPS_CPS - select SYS_SUPPORTS_MULTITHREADING - select SYS_SUPPORTS_RELOCATABLE - select SYS_SUPPORTS_ZBOOT - select SYS_HAS_EARLY_PRINTK - select USE_GENERIC_EARLY_PRINTK_8250 - select USE_OF - help - This enables support for the IMG Pistachio SoC platform. - config MIPS_MALTA bool "MIPS Malta board" select ARCH_MAY_HAVE_PC_FDC @@ -1088,7 +1059,6 @@ source "arch/mips/ingenic/Kconfig" source "arch/mips/jazz/Kconfig" source "arch/mips/lantiq/Kconfig" source "arch/mips/pic32/Kconfig" -source "arch/mips/pistachio/Kconfig" source "arch/mips/ralink/Kconfig" source "arch/mips/sgi-ip27/Kconfig" source "arch/mips/sibyte/Kconfig" diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index 188301164d9e..be96d35eb582 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 subdir-$(CONFIG_BMIPS_GENERIC) += brcm subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon -subdir-$(CONFIG_MACH_PISTACHIO) += img subdir-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += img subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img subdir-$(CONFIG_MACH_INGENIC) += ingenic diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile index 24f6bbeadd48..ebb47490b04b 100644 --- a/arch/mips/boot/dts/img/Makefile +++ b/arch/mips/boot/dts/img/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += boston.dtb dtb-$(CONFIG_FIT_IMAGE_FDT_MARDUK) += pistachio_marduk.dtb - -dtb-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb -obj-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb.o diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig deleted file mode 100644 index b9adf15ebbec..000000000000 --- a/arch/mips/configs/pistachio_defconfig +++ /dev/null @@ -1,316 +0,0 @@ -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="localhost" -CONFIG_SYSVIPC=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT_VOLUNTARY=y -CONFIG_IKCONFIG=m -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=18 -CONFIG_CGROUPS=y -CONFIG_CGROUP_SCHED=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_CGROUP_FREEZER=y -CONFIG_NAMESPACES=y -CONFIG_USER_NS=y -CONFIG_BLK_DEV_INITRD=y -# CONFIG_RD_BZIP2 is not set -# CONFIG_RD_LZMA is not set -# CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set -CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_EMBEDDED=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_MACH_PISTACHIO=y -CONFIG_MIPS_CPS=y -CONFIG_NR_CPUS=4 -CONFIG_PM_DEBUG=y -CONFIG_PM_ADVANCED_DEBUG=y -CONFIG_CPU_IDLE=y -# CONFIG_MIPS_CPS_CPUIDLE is not set -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_COMPACTION is not set -CONFIG_DEFAULT_MMAP_MIN_ADDR=32768 -CONFIG_ZSMALLOC=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -# CONFIG_INET_DIAG is not set -CONFIG_TCP_CONG_ADVANCED=y -# CONFIG_TCP_CONG_BIC is not set -# CONFIG_TCP_CONG_WESTWOOD is not set -# CONFIG_TCP_CONG_HTCP is not set -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_MD5SIG=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_IPV6_SIT=m -CONFIG_NETWORK_SECMARK=y -CONFIG_NETFILTER=y -# CONFIG_BRIDGE_NETFILTER is not set -CONFIG_NF_CONNTRACK=y -CONFIG_NF_CT_NETLINK=y -CONFIG_NETFILTER_XT_MARK=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y -CONFIG_NETFILTER_XT_TARGET_DSCP=y -CONFIG_NETFILTER_XT_TARGET_NFLOG=y -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y -CONFIG_NETFILTER_XT_TARGET_SECMARK=y -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y -CONFIG_NETFILTER_XT_MATCH_DSCP=y -CONFIG_NETFILTER_XT_MATCH_POLICY=y -CONFIG_NETFILTER_XT_MATCH_STATE=y -CONFIG_NF_NAT_IPV4=m -CONFIG_IP_NF_IPTABLES=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_MANGLE=y -CONFIG_NF_NAT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_MARK=y -CONFIG_BT=m -CONFIG_BT_RFCOMM=m -CONFIG_BT_HCIBTUSB=m -CONFIG_BT_HCIBFUSB=m -CONFIG_BT_HCIVHCI=m -CONFIG_CFG80211=m -CONFIG_NL80211_TESTMODE=y -CONFIG_CFG80211_DEBUGFS=y -CONFIG_CFG80211_WEXT=y -CONFIG_MAC80211=m -CONFIG_MAC80211_LEDS=y -CONFIG_MAC80211_DEBUGFS=y -CONFIG_MAC80211_DEBUG_MENU=y -CONFIG_MAC80211_VERBOSE_DEBUG=y -CONFIG_RFKILL=y -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_DEBUG_DEVRES=y -CONFIG_CONNECTOR=y -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_SPI_NOR=y -CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_BLOCK=y -CONFIG_ZRAM=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=m -CONFIG_SCSI_SPI_ATTRS=y -CONFIG_MD=y -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=y -CONFIG_DM_VERITY=y -CONFIG_NETDEVICES=y -CONFIG_TUN=m -CONFIG_VETH=m -# CONFIG_NET_VENDOR_MARVELL is not set -# CONFIG_NET_VENDOR_MICREL is not set -# CONFIG_NET_VENDOR_MICROCHIP is not set -# CONFIG_NET_VENDOR_NATSEMI is not set -# CONFIG_NET_VENDOR_SEEQ is not set -# CONFIG_NET_VENDOR_SMSC is not set -CONFIG_STMMAC_ETH=y -# CONFIG_NET_VENDOR_VIA is not set -CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_RTL8152=m -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_SMSC75XX=m -CONFIG_USB_NET_SMSC95XX=m -CONFIG_USB_NET_MCS7830=m -# CONFIG_USB_NET_CDC_SUBSET is not set -# CONFIG_USB_NET_ZAURUS is not set -CONFIG_HOSTAP=m -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_LIBERTAS_THINFIRM=m -CONFIG_RT2X00=m -CONFIG_RT2800USB=m -CONFIG_MAC80211_HWSIM=m -CONFIG_USB_NET_RNDIS_WLAN=m -CONFIG_INPUT_EVDEV=y -# CONFIG_KEYBOARD_ATKBD is not set -CONFIG_KEYBOARD_GPIO=y -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_SERIAL_8250=y -# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_DW=y -CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_HW_RANDOM=y -CONFIG_TCG_TPM=y -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_IMG=y -CONFIG_I2C_STUB=m -CONFIG_SPI=y -CONFIG_SPI_BITBANG=m -CONFIG_SPI_IMG_SPFI=y -CONFIG_SPI_SPIDEV=y -CONFIG_DEBUG_GPIO=y -CONFIG_GPIO_SYSFS=y -CONFIG_POWER_SUPPLY=y -CONFIG_THERMAL=y -CONFIG_WATCHDOG=y -CONFIG_IMGPDC_WDT=y -CONFIG_REGULATOR_FIXED_VOLTAGE=y -CONFIG_REGULATOR_GPIO=y -CONFIG_RC_CORE=y -CONFIG_RC_DEVICES=y -CONFIG_IR_IMG=y -CONFIG_IR_IMG_NEC=y -CONFIG_IR_IMG_JVC=y -CONFIG_IR_IMG_SONY=y -CONFIG_IR_IMG_SHARP=y -CONFIG_IR_IMG_SANYO=y -CONFIG_IR_IMG_RC5=y -CONFIG_IR_IMG_RC6=y -CONFIG_MEDIA_SUPPORT=y -CONFIG_FB=y -CONFIG_FB_MODE_HELPERS=y -# CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_SOUND=y -CONFIG_SND=y -CONFIG_SND_HRTIMER=m -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -# CONFIG_SND_SPI is not set -CONFIG_SND_USB_AUDIO=m -CONFIG_USB=y -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -# CONFIG_USB_DEFAULT_PERSIST is not set -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_ACM=y -CONFIG_USB_STORAGE=y -CONFIG_USB_DWC2=y -CONFIG_USB_SERIAL=y -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_CP210X=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_OTI6858=m -CONFIG_USB_SERIAL_QUALCOMM=m -CONFIG_USB_SERIAL_SIERRAWIRELESS=m -CONFIG_USB_SERIAL_OPTION=m -CONFIG_MMC=y -CONFIG_MMC_BLOCK_MINORS=16 -CONFIG_MMC_TEST=m -CONFIG_MMC_DW=y -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_RTC_CLASS=y -CONFIG_DMADEVICES=y -CONFIG_IMG_MDC_DMA=y -CONFIG_STAGING=y -CONFIG_ASHMEM=y -# CONFIG_IOMMU_SUPPORT is not set -CONFIG_MEMORY=y -CONFIG_IIO=y -CONFIG_CC10001_ADC=y -CONFIG_PWM=y -CONFIG_PWM_IMG=y -CONFIG_PHY_PISTACHIO_USB=y -CONFIG_ANDROID=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -# CONFIG_DNOTIFY is not set -CONFIG_FUSE_FS=m -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_VFAT_FS=m -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_ECRYPT_FS=y -CONFIG_HFSPLUS_FS=m -CONFIG_UBIFS_FS=y -CONFIG_SQUASHFS=y -CONFIG_SQUASHFS_FILE_DIRECT=y -CONFIG_SQUASHFS_LZO=y -CONFIG_PSTORE=y -CONFIG_PSTORE_CONSOLE=y -CONFIG_PSTORE_RAM=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_YAMA=y -CONFIG_CRYPTO_AUTHENC=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=y -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_ARC4=y -CONFIG_CRYPTO_DES=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=m -CONFIG_CRC7=m -# CONFIG_XZ_DEC_X86 is not set -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0 -# CONFIG_SCHED_DEBUG is not set -CONFIG_SCHEDSTATS=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_CREDENTIALS=y -CONFIG_FUNCTION_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_LKDTM=y -CONFIG_TEST_UDELAY=m diff --git a/arch/mips/pistachio/Kconfig b/arch/mips/pistachio/Kconfig deleted file mode 100644 index 9a0e06c95184..000000000000 --- a/arch/mips/pistachio/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PISTACHIO_GPTIMER_CLKSRC - bool "Enable General Purpose Timer based clocksource" - depends on MACH_PISTACHIO - select CLKSRC_PISTACHIO - select MIPS_EXTERNAL_TIMER - help - This option enables a clocksource driver based on a Pistachio - SoC General Purpose external timer. - - If you want to enable the CPUFreq, you need to enable - this option. - - If you don't want to enable CPUFreq, you can leave this disabled. diff --git a/arch/mips/pistachio/Makefile b/arch/mips/pistachio/Makefile deleted file mode 100644 index 66f4af17fb66..000000000000 --- a/arch/mips/pistachio/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-y += init.o irq.o time.o diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform deleted file mode 100644 index c59de86dbddf..000000000000 --- a/arch/mips/pistachio/Platform +++ /dev/null @@ -1,6 +0,0 @@ -# -# IMG Pistachio SoC -# -load-$(CONFIG_MACH_PISTACHIO) += 0xffffffff80400000 -zload-$(CONFIG_MACH_PISTACHIO) += 0xffffffff81000000 -all-$(CONFIG_MACH_PISTACHIO) := uImage.gz diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c deleted file mode 100644 index e0bacfc3c6b4..000000000000 --- a/arch/mips/pistachio/init.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Pistachio platform setup - * - * Copyright (C) 2014 Google, Inc. - * Copyright (C) 2016 Imagination Technologies - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* - * Core revision register decoding - * Bits 23 to 20: Major rev - * Bits 15 to 8: Minor rev - * Bits 7 to 0: Maintenance rev - */ -#define PISTACHIO_CORE_REV_REG 0xB81483D0 -#define PISTACHIO_CORE_REV_A1 0x00100006 -#define PISTACHIO_CORE_REV_B0 0x00100106 - -const char *get_system_type(void) -{ - u32 core_rev; - const char *sys_type; - - core_rev = __raw_readl((const void *)PISTACHIO_CORE_REV_REG); - - switch (core_rev) { - case PISTACHIO_CORE_REV_B0: - sys_type = "IMG Pistachio SoC (B0)"; - break; - - case PISTACHIO_CORE_REV_A1: - sys_type = "IMG Pistachio SoC (A1)"; - break; - - default: - sys_type = "IMG Pistachio SoC"; - break; - } - - return sys_type; -} - -void __init *plat_get_fdt(void) -{ - if (fw_arg0 != -2) - panic("Device-tree not present"); - return (void *)fw_arg1; -} - -void __init plat_mem_setup(void) -{ - __dt_setup_arch(plat_get_fdt()); -} - -#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 -#define DEFAULT_CDMM_BASE_ADDR 0x1bdd0000 - -phys_addr_t mips_cpc_default_phys_base(void) -{ - return DEFAULT_CPC_BASE_ADDR; -} - -phys_addr_t mips_cdmm_phys_base(void) -{ - return DEFAULT_CDMM_BASE_ADDR; -} - -static void __init mips_nmi_setup(void) -{ - void *base; - - base = cpu_has_veic ? - (void *)(CAC_BASE + 0xa80) : - (void *)(CAC_BASE + 0x380); - memcpy(base, except_vec_nmi, 0x80); - flush_icache_range((unsigned long)base, - (unsigned long)base + 0x80); -} - -static void __init mips_ejtag_setup(void) -{ - void *base; - extern char except_vec_ejtag_debug[]; - - base = cpu_has_veic ? - (void *)(CAC_BASE + 0xa00) : - (void *)(CAC_BASE + 0x300); - memcpy(base, except_vec_ejtag_debug, 0x80); - flush_icache_range((unsigned long)base, - (unsigned long)base + 0x80); -} - -void __init prom_init(void) -{ - board_nmi_handler_setup = mips_nmi_setup; - board_ejtag_handler_setup = mips_ejtag_setup; - - mips_cm_probe(); - mips_cpc_probe(); - register_cps_smp_ops(); - - pr_info("SoC Type: %s\n", get_system_type()); -} - -void __init device_tree_init(void) -{ - if (!initial_boot_params) - return; - - unflatten_and_copy_device_tree(); -} diff --git a/arch/mips/pistachio/irq.c b/arch/mips/pistachio/irq.c deleted file mode 100644 index 437c3101ac45..000000000000 --- a/arch/mips/pistachio/irq.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Pistachio IRQ setup - * - * Copyright (C) 2014 Google, Inc. - */ - -#include -#include -#include - -#include -#include - -void __init arch_init_irq(void) -{ - pr_info("EIC is %s\n", cpu_has_veic ? "on" : "off"); - pr_info("VINT is %s\n", cpu_has_vint ? "on" : "off"); - - if (!cpu_has_veic) - mips_cpu_irq_init(); - - irqchip_init(); -} diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c deleted file mode 100644 index de64751dec40..000000000000 --- a/arch/mips/pistachio/time.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Pistachio clocksource/timer setup - * - * Copyright (C) 2014 Google, Inc. - */ - -#include -#include -#include -#include -#include - -#include -#include - -unsigned int get_c0_compare_int(void) -{ - return gic_get_c0_compare_int(); -} - -int get_c0_perfcount_int(void) -{ - return gic_get_c0_perfcount_int(); -} -EXPORT_SYMBOL_GPL(get_c0_perfcount_int); - -int get_c0_fdc_int(void) -{ - return gic_get_c0_fdc_int(); -} - -void __init plat_time_init(void) -{ - struct device_node *np; - struct clk *clk; - - of_clk_init(NULL); - timer_probe(); - - np = of_get_cpu_node(0, NULL); - if (!np) { - pr_err("Failed to get CPU node\n"); - return; - } - - clk = of_clk_get(np, 0); - if (IS_ERR(clk)) { - pr_err("Failed to get CPU clock: %ld\n", PTR_ERR(clk)); - return; - } - - mips_hpt_frequency = clk_get_rate(clk) / 2; - clk_put(clk); -} -- cgit From 3f66601ef3f3e336c1f8181f183001b58be62067 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 23 Jul 2021 10:25:43 +0800 Subject: MIPS: Make a alias for pistachio_defconfig For those who miss the old defconfig, make a alias to generic kernel. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 4e942b7ef022..6eaeee3fb46c 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -560,6 +560,9 @@ sead3micro_defconfig-y := micro32r2el_defconfig BOARDS=sead-3 legacy_defconfigs += xilfpga_defconfig xilfpga_defconfig-y := 32r2el_defconfig BOARDS=xilfpga +legacy_defconfigs += pistachio_defconfig +pistachio_defconfig-y := 32r2el_defconfig BOARDS=marduk + .PHONY: $(legacy_defconfigs) $(legacy_defconfigs): $(Q)$(MAKE) -f $(srctree)/Makefile $($@-y) -- cgit From 4d2ee1be4c2a5552f083b6d7db4be224f96313b5 Mon Sep 17 00:00:00 2001 From: Huilong Deng Date: Wed, 11 Aug 2021 12:36:15 +0800 Subject: MIPS: generic: Return true/false (not 1/0) from bool functions ./arch/mips/generic/board-ocelot.c:29:9-10: WARNING: return of 0/1 in function 'ocelot_detect' with return type bool Signed-off-by: Huilong Deng Signed-off-by: Thomas Bogendoerfer --- arch/mips/generic/board-ocelot.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c index c238e95190ac..7115410acb4f 100644 --- a/arch/mips/generic/board-ocelot.c +++ b/arch/mips/generic/board-ocelot.c @@ -26,13 +26,13 @@ static __init bool ocelot_detect(void) tlb_probe_hazard(); idx = read_c0_index(); if (idx < 0) - return 0; + return false; /* A TLB entry exists, lets assume its usable and check the CHIP ID */ rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID); if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID) - return 0; + return false; /* Copy command line from bootloader early for Initrd detection */ if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) { @@ -44,7 +44,7 @@ static __init bool ocelot_detect(void) strcpy(arcs_cmdline, prom_argv[1]); } - return 1; + return true; } static void __init ocelot_earlyprintk_init(void) -- cgit From e15ac2080ec2fde6ebc59f1c8afdf7c52374035e Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczyński Date: Thu, 12 Aug 2021 17:17:17 +0000 Subject: x86/PCI: Add pci_numachip_init() declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit numachip.c defines pci_numachip_init(), but neglected to include its declaration, causing the following sparse and compile time warnings: arch/x86/pci/numachip.c:108:12: warning: no previous prototype for function 'pci_numachip_init' [-Wmissing-prototypes] arch/x86/pci/numachip.c:108:12: warning: symbol 'pci_numachip_init' was not declared. Should it be static? Include asm/numachip/numachip.h, which includes the missing declaration. Link: https://lore.kernel.org/r/20210812171717.1471243-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- arch/x86/pci/numachip.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c index 01a085d9135a..4f0147d4e225 100644 --- a/arch/x86/pci/numachip.c +++ b/arch/x86/pci/numachip.c @@ -12,6 +12,7 @@ #include #include +#include static u8 limit __read_mostly; -- cgit From 34e9f860071f717965f1816171a11eaf2d378ee0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 10 Aug 2021 01:43:05 +0800 Subject: KVM: X86: Remove unneeded KVM_DEBUGREG_RELOAD Commit ae561edeb421 ("KVM: x86: DR0-DR3 are not clear on reset") added code to ensure eff_db are updated when they're modified through non-standard paths. But there is no reason to also update hardware DRs unless hardware breakpoints are active or DR exiting is disabled, and in those cases updating hardware is handled by KVM_DEBUGREG_WONT_EXIT and KVM_DEBUGREG_BP_ENABLED. KVM_DEBUGREG_RELOAD just causes unnecesarry load of hardware DRs and is better to be removed. Suggested-by: Sean Christopherson Signed-off-by: Lai Jiangshan Message-Id: <20210809174307.145263-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/x86.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 07d2652c6d61..5e35334980a6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,7 +523,6 @@ struct kvm_pmu_ops; enum { KVM_DEBUGREG_BP_ENABLED = 1, KVM_DEBUGREG_WONT_EXIT = 2, - KVM_DEBUGREG_RELOAD = 4, }; struct kvm_mtrr_range { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3cedc7cc132a..eca5f005a7fc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1181,7 +1181,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu) if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { for (i = 0; i < KVM_NR_DB_REGS; i++) vcpu->arch.eff_db[i] = vcpu->arch.db[i]; - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } } @@ -9604,7 +9603,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } @@ -9634,7 +9632,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) static_call(kvm_x86_sync_dirty_debug_regs)(vcpu); kvm_update_dr0123(vcpu); kvm_update_dr7(vcpu); - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* -- cgit From 375e28ffc0cf4fc48862c03994ec4a93254cf1c6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Aug 2021 06:07:06 -0400 Subject: KVM: X86: Set host DR6 only on VMX and for KVM_DEBUGREG_WONT_EXIT Commit c77fb5fe6f03 ("KVM: x86: Allow the guest to run with dirty debug registers") allows the guest accessing to DRs without exiting when KVM_DEBUGREG_WONT_EXIT and we need to ensure that they are synchronized on entry to the guest---including DR6 that was not synced before the commit. But the commit sets the hardware DR6 not only when KVM_DEBUGREG_WONT_EXIT, but also when KVM_DEBUGREG_BP_ENABLED. The second case is unnecessary and just leads to a more case which leaks stale DR6 to the host which has to be resolved by unconditionally reseting DR6 in kvm_arch_vcpu_put(). Even if KVM_DEBUGREG_WONT_EXIT, however, setting the host DR6 only matters on VMX because SVM always uses the DR6 value from the VMCB. So move this line to vmx.c and make it conditional on KVM_DEBUGREG_WONT_EXIT. Reported-by: Lai Jiangshan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 4 ++++ arch/x86/kvm/x86.c | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ae8e62df16dd..7c1c8ef1a286 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6625,6 +6625,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } + /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ + if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + set_debugreg(vcpu->arch.dr6, 6); + /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eca5f005a7fc..15ac8645e532 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9602,7 +9602,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); - set_debugreg(vcpu->arch.dr6, 6); } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } -- cgit From 1ccb6f983a063e794daeb03f90b3517f87dfae8f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Aug 2021 06:11:35 -0400 Subject: KVM: VMX: Reset DR6 only when KVM_DEBUGREG_WONT_EXIT The commit efdab992813fb ("KVM: x86: fix escape of guest dr6 to the host") fixed a bug by resetting DR6 unconditionally when the vcpu being scheduled out. But writing to debug registers is slow, and it can be visible in perf results sometimes, even if neither the host nor the guest activate breakpoints. Since KVM_DEBUGREG_WONT_EXIT on Intel processors is the only case where DR6 gets the guest value, and it never happens at all on SVM, the register can be cleared in vmx.c right after reading it. Reported-by: Lai Jiangshan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 6 ++++++ arch/x86/kvm/x86.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7c1c8ef1a286..123ecca91c27 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5110,6 +5110,12 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); + + /* + * exc_debug expects dr6 to be cleared after it runs, avoid that it sees + * a stale dr6 from the guest. + */ + set_debugreg(DR6_RESERVED, 6); } static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 15ac8645e532..70d42b50199a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4310,12 +4310,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); - /* - * If userspace has set any breakpoints or watchpoints, dr6 is restored - * on every vmexit, but if not, we might have a stale dr6 from the - * guest. do_debug expects dr6 to be cleared after it runs, do the same. - */ - set_debugreg(0, 6); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, -- cgit From 389ab25216c9d09e0d335e764eeeb84c2089614f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 10:19:50 -0700 Subject: KVM: nVMX: Pull KVM L0's desired controls directly from vmcs01 When preparing controls for vmcs02, grab KVM's desired controls from vmcs01's shadow state instead of recalculating the controls from scratch, or in the secondary execution controls, instead of using the dedicated cache. Calculating secondary exec controls is eye-poppingly expensive due to the guest CPUID checks, hence the dedicated cache, but the other calculations aren't exactly free either. Explicitly clear several bits (x2APIC, DESC exiting, and load EFER on exit) as appropriate as they may be set in vmcs01, whereas the previous implementation relied on dynamic bits being cleared in the calculator. Intentionally propagate VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL from vmcs01 to vmcs02. Whether or not PERF_GLOBAL_CTRL is loaded depends on whether or not perf itself is active, so unless perf stops between the exit from L1 and entry to L2, vmcs01 will hold the desired value. This is purely an optimization as atomic_switch_perf_msrs() will set/clear the control as needed at VM-Enter, i.e. it avoids two extra VMWRITEs in the case where perf is active (versus starting with the bits clear in vmcs02, which was the previous behavior). Cc: Zeng Guang Signed-off-by: Sean Christopherson Message-Id: <20210810171952.2758100-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 25 ++++++++++++++++--------- arch/x86/kvm/vmx/vmx.h | 6 +++++- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0d0dd6580cfd..264a9f4c9179 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2170,7 +2170,8 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, } } -static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) +static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01, + struct vmcs12 *vmcs12) { u32 exec_control; u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); @@ -2181,7 +2182,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) /* * PIN CONTROLS */ - exec_control = vmx_pin_based_exec_ctrl(vmx); + exec_control = __pin_controls_get(vmcs01); exec_control |= (vmcs12->pin_based_vm_exec_control & ~PIN_BASED_VMX_PREEMPTION_TIMER); @@ -2197,7 +2198,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) /* * EXEC CONTROLS */ - exec_control = vmx_exec_control(vmx); /* L0's desires */ + exec_control = __exec_controls_get(vmcs01); /* L0's desires */ exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING; exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING; exec_control &= ~CPU_BASED_TPR_SHADOW; @@ -2234,10 +2235,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * SECONDARY EXEC CONTROLS */ if (cpu_has_secondary_exec_ctrls()) { - exec_control = vmx->secondary_exec_control; + exec_control = __secondary_exec_controls_get(vmcs01); /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_ENABLE_RDTSCP | SECONDARY_EXEC_XSAVES | @@ -2245,7 +2247,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_TSC_SCALING); + SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_DESC); + if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; @@ -2285,8 +2289,9 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * on the related bits (if supported by the CPU) in the hope that * we can avoid VMWrites during vmx_set_efer(). */ - exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) & - ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER; + exec_control = __vm_entry_controls_get(vmcs01); + exec_control |= vmcs12->vm_entry_controls; + exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER); if (cpu_has_load_ia32_efer()) { if (guest_efer & EFER_LMA) exec_control |= VM_ENTRY_IA32E_MODE; @@ -2302,9 +2307,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER * bits may be modified by vmx_set_efer() in prepare_vmcs02(). */ - exec_control = vmx_vmexit_ctrl(); + exec_control = __vm_exit_controls_get(vmcs01); if (cpu_has_load_ia32_efer() && guest_efer != host_efer) exec_control |= VM_EXIT_LOAD_IA32_EFER; + else + exec_control &= ~VM_EXIT_LOAD_IA32_EFER; vm_exit_controls_set(vmx, exec_control); /* @@ -3347,7 +3354,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); - prepare_vmcs02_early(vmx, vmcs12); + prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12); if (from_vmentry) { if (unlikely(!nested_get_vmcs12_pages(vcpu))) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 4f151175d45a..414b440de9ac 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -418,9 +418,13 @@ static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ vmx->loaded_vmcs->controls_shadow.lname = val; \ } \ } \ +static inline u32 __##lname##_controls_get(struct loaded_vmcs *vmcs) \ +{ \ + return vmcs->controls_shadow.lname; \ +} \ static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ { \ - return vmx->loaded_vmcs->controls_shadow.lname; \ + return __##lname##_controls_get(vmx->loaded_vmcs); \ } \ static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ { \ -- cgit From b6247686b7571003eca2305b2096f59e1e1ce976 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 10:19:51 -0700 Subject: KVM: VMX: Drop caching of KVM's desired sec exec controls for vmcs01 Remove the secondary execution controls cache now that it's effectively dead code; it is only read immediately after it is written. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210810171952.2758100-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 20 ++++++++------------ arch/x86/kvm/vmx/vmx.h | 3 +-- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 123ecca91c27..f2622dd335ce 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4212,7 +4212,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, #define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \ vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true) -static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) +u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { struct kvm_vcpu *vcpu = &vmx->vcpu; @@ -4298,7 +4298,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (!vcpu->kvm->arch.bus_lock_detection_enabled) exec_control &= ~SECONDARY_EXEC_BUS_LOCK_DETECTION; - vmx->secondary_exec_control = exec_control; + return exec_control; } #define VMX_XSS_EXIT_BITMAP 0 @@ -4322,10 +4322,8 @@ static void init_vmcs(struct vcpu_vmx *vmx) exec_controls_set(vmx, vmx_exec_control(vmx)); - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - secondary_exec_controls_set(vmx, vmx->secondary_exec_control); - } + if (cpu_has_secondary_exec_ctrls()) + secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx)); if (kvm_vcpu_apicv_active(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); @@ -6992,7 +6990,7 @@ exit: return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat; } -static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) +static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl) { /* * These bits in the secondary execution controls field @@ -7006,7 +7004,6 @@ static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC; - u32 new_ctl = vmx->secondary_exec_control; u32 cur_ctl = secondary_exec_controls_get(vmx); secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); @@ -7151,10 +7148,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_setup_uret_msrs(vmx); - if (cpu_has_secondary_exec_ctrls()) { - vmx_compute_secondary_exec_control(vmx); - vmcs_set_secondary_exec_control(vmx); - } + if (cpu_has_secondary_exec_ctrls()) + vmcs_set_secondary_exec_control(vmx, + vmx_secondary_exec_control(vmx)); if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 414b440de9ac..2bd07867e9da 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -263,8 +263,6 @@ struct vcpu_vmx { u64 spec_ctrl; u32 msr_ia32_umwait_control; - u32 secondary_exec_control; - /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested @@ -477,6 +475,7 @@ static inline u32 vmx_vmexit_ctrl(void) } u32 vmx_exec_control(struct vcpu_vmx *vmx); +u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx); u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx); static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) -- cgit From 2fba4fc155280727b4997c6ee86f24c260dd9155 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 10:19:52 -0700 Subject: KVM: VMX: Hide VMCS control calculators in vmx.c Now that nested VMX pulls KVM's desired VMCS controls from vmcs01 instead of re-calculating on the fly, bury the helpers that do the calcluations in vmx.c. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210810171952.2758100-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++++++++++++++--- arch/x86/kvm/vmx/vmx.h | 26 -------------------------- 2 files changed, 27 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f2622dd335ce..cd913100b300 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4095,7 +4095,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vcpu->arch.cr4_guest_owned_bits); } -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) { u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; @@ -4111,6 +4111,30 @@ u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) return pin_based_exec_ctrl; } +static u32 vmx_vmentry_ctrl(void) +{ + u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; + + if (vmx_pt_mode_is_system()) + vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | + VM_ENTRY_LOAD_IA32_RTIT_CTL); + /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ + return vmentry_ctrl & + ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); +} + +static u32 vmx_vmexit_ctrl(void) +{ + u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; + + if (vmx_pt_mode_is_system()) + vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | + VM_EXIT_CLEAR_IA32_RTIT_CTL); + /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ + return vmexit_ctrl & + ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); +} + static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4130,7 +4154,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) vmx_update_msr_bitmap_x2apic(vcpu); } -u32 vmx_exec_control(struct vcpu_vmx *vmx) +static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -4212,7 +4236,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, #define vmx_adjust_sec_exec_exiting(vmx, exec_control, lname, uname) \ vmx_adjust_sec_exec_control(vmx, exec_control, lname, uname, uname##_EXITING, true) -u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) +static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { struct kvm_vcpu *vcpu = &vmx->vcpu; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 2bd07867e9da..4858c5fd95f2 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -452,32 +452,6 @@ static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu) vcpu->arch.regs_dirty = 0; } -static inline u32 vmx_vmentry_ctrl(void) -{ - u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; - if (vmx_pt_mode_is_system()) - vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | - VM_ENTRY_LOAD_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmentry_ctrl & - ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); -} - -static inline u32 vmx_vmexit_ctrl(void) -{ - u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; - if (vmx_pt_mode_is_system()) - vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | - VM_EXIT_CLEAR_IA32_RTIT_CTL); - /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmexit_ctrl & - ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); -} - -u32 vmx_exec_control(struct vcpu_vmx *vmx); -u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx); -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx); - static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) { return container_of(kvm, struct kvm_vmx, kvm); -- cgit From ad0577c375299a2cc426913c141086c0e9033c78 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 9 Aug 2021 10:39:54 -0700 Subject: KVM: x86: Kill off __ex() and __kvm_handle_fault_on_reboot() Remove the __kvm_handle_fault_on_reboot() and __ex() macros now that all VMX and SVM instructions use asm goto to handle the fault (or in the case of VMREAD, completely custom logic). Drop kvm_spurious_fault()'s asmlinkage annotation as __kvm_handle_fault_on_reboot() was the only flow that invoked it from assembly code. Cc: Uros Bizjak Cc: Like Xu Signed-off-by: Sean Christopherson Message-Id: <20210809173955.1710866-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 25 +------------------------ arch/x86/kvm/svm/sev.c | 2 -- arch/x86/kvm/svm/svm.c | 2 -- arch/x86/kvm/vmx/vmx_ops.h | 2 -- arch/x86/kvm/x86.c | 9 ++++++++- 5 files changed, 9 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5e35334980a6..8c8aa8ab5f0b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1811,30 +1811,7 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) -asmlinkage void kvm_spurious_fault(void); - -/* - * Hardware virtualization extension instructions may fault if a - * reboot turns off virtualization while processes are running. - * Usually after catching the fault we just panic; during reboot - * instead the instruction is ignored. - */ -#define __kvm_handle_fault_on_reboot(insn) \ - "666: \n\t" \ - insn "\n\t" \ - "jmp 668f \n\t" \ - "667: \n\t" \ - "1: \n\t" \ - ".pushsection .discard.instr_begin \n\t" \ - ".long 1b - . \n\t" \ - ".popsection \n\t" \ - "call kvm_spurious_fault \n\t" \ - "1: \n\t" \ - ".pushsection .discard.instr_end \n\t" \ - ".long 1b - . \n\t" \ - ".popsection \n\t" \ - "668: \n\t" \ - _ASM_EXTABLE(666b, 667b) +void kvm_spurious_fault(void); #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 81601c31f549..75e0b21ad07c 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -28,8 +28,6 @@ #include "cpuid.h" #include "trace.h" -#define __ex(x) __kvm_handle_fault_on_reboot(x) - #ifndef CONFIG_KVM_AMD_SEV /* * When this config is not defined, SEV feature is not supported and APIs in diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9d72b1df426e..2b6632d4c76f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -46,8 +46,6 @@ #include "kvm_onhyperv.h" #include "svm_onhyperv.h" -#define __ex(x) __kvm_handle_fault_on_reboot(x) - MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 164b64f65a8f..c0d74b994b56 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -10,8 +10,6 @@ #include "evmcs.h" #include "vmcs.h" -#define __ex(x) __kvm_handle_fault_on_reboot(x) - asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, bool fault); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 70d42b50199a..58a72c7d3330 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -486,7 +486,14 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } EXPORT_SYMBOL_GPL(kvm_set_apic_base); -asmlinkage __visible noinstr void kvm_spurious_fault(void) +/* + * Handle a fault on a hardware virtualization (VMX or SVM) instruction. + * + * Hardware virtualization extension instructions may fault if a reboot turns + * off virtualization while processes are running. Usually after catching the + * fault we just panic; during reboot instead the instruction is ignored. + */ +noinstr void kvm_spurious_fault(void) { /* Fault while not rebooting. We want the trace. */ BUG_ON(!kvm_rebooting); -- cgit From 65297341d8e15b04cc9e206597a3d7c407c346f6 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 9 Aug 2021 10:39:55 -0700 Subject: KVM: x86: Move declaration of kvm_spurious_fault() to x86.h Move the declaration of kvm_spurious_fault() to KVM's "private" x86.h, it should never be called by anything other than low level KVM code. Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Uros Bizjak [sean: rebased to a series without __ex()/__kvm_handle_fault_on_reboot()] Signed-off-by: Sean Christopherson Message-Id: <20210809173955.1710866-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 -- arch/x86/kvm/svm/svm_ops.h | 2 +- arch/x86/kvm/vmx/vmx_ops.h | 2 +- arch/x86/kvm/x86.h | 2 ++ 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8c8aa8ab5f0b..a612d213be5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1811,8 +1811,6 @@ enum { #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) -void kvm_spurious_fault(void); - #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 8170f2a5a16f..22e2b019de37 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -4,7 +4,7 @@ #include -#include +#include "x86.h" #define svm_asm(insn, clobber...) \ do { \ diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index c0d74b994b56..9e9ef47e988c 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -4,11 +4,11 @@ #include -#include #include #include "evmcs.h" #include "vmcs.h" +#include "x86.h" asmlinkage void vmread_error(unsigned long field, bool fault); __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 44ae10312740..7d66d63dc55a 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -8,6 +8,8 @@ #include "kvm_cache_regs.h" #include "kvm_emulate.h" +void kvm_spurious_fault(void); + static __always_inline void kvm_guest_enter_irqoff(void) { /* -- cgit From c1a527a1de46ad6f0f9d5907b29fc98e50267f8e Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 9 Aug 2021 17:34:08 +0800 Subject: KVM: x86: Clean up redundant ROL16(val, n) macro definition The ROL16(val, n) macro is repeatedly defined in several vmcs-related files, and it has never been used outside the KVM context. Let's move it to vmcs.h without any intended functional changes. Signed-off-by: Like Xu Message-Id: <20210809093410.59304-4-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/evmcs.c | 1 - arch/x86/kvm/vmx/evmcs.h | 4 ---- arch/x86/kvm/vmx/vmcs.h | 2 ++ arch/x86/kvm/vmx/vmcs12.c | 1 - arch/x86/kvm/vmx/vmcs12.h | 4 ---- 5 files changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 896b2a50b4aa..0dab1b7b529f 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -14,7 +14,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs); #if IS_ENABLED(CONFIG_HYPERV) -#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ {EVMCS1_OFFSET(name), clean_field} diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 2ec9b46f0d0c..152ab0aa82cf 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -73,8 +73,6 @@ struct evmcs_field { extern const struct evmcs_field vmcs_field_to_evmcs_1[]; extern const unsigned int nr_evmcs_1_fields; -#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) - static __always_inline int get_evmcs_offset(unsigned long field, u16 *clean_field) { @@ -95,8 +93,6 @@ static __always_inline int get_evmcs_offset(unsigned long field, return evmcs_field->offset; } -#undef ROL16 - static inline void evmcs_write64(unsigned long field, u64 value) { u16 clean_field; diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 4b9957e2bf5b..6e5de2e2b0da 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -11,6 +11,8 @@ #include "capabilities.h" +#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) + struct vmcs_hdr { u32 revision_id:31; u32 shadow_vmcs:1; diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index d9f5d7c56ae3..cab6ba7a5005 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -2,7 +2,6 @@ #include "vmcs12.h" -#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) #define FIELD(number, name) [ROL16(number, 6)] = VMCS12_OFFSET(name) #define FIELD64(number, name) \ diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 5e0e1b39f495..2a45f026ee11 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -364,8 +364,6 @@ static inline void vmx_check_vmcs12_offsets(void) extern const unsigned short vmcs_field_to_offset_table[]; extern const unsigned int nr_vmcs12_fields; -#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) - static inline short vmcs_field_to_offset(unsigned long field) { unsigned short offset; @@ -385,8 +383,6 @@ static inline short vmcs_field_to_offset(unsigned long field) return offset; } -#undef ROL16 - static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field, u16 offset) { -- cgit From f7782bb8d818d8f47c26b22079db10599922787a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 07:45:26 -0700 Subject: KVM: nVMX: Unconditionally clear nested.pi_pending on nested VM-Enter Clear nested.pi_pending on nested VM-Enter even if L2 will run without posted interrupts enabled. If nested.pi_pending is left set from a previous L2, vmx_complete_nested_posted_interrupt() will pick up the stale flag and exit to userspace with an "internal emulation error" due the new L2 not having a valid nested.pi_desc. Arguably, vmx_complete_nested_posted_interrupt() should first check for posted interrupts being enabled, but it's also completely reasonable that KVM wouldn't screw up a fundamental flag. Not to mention that the mere existence of nested.pi_pending is a long-standing bug as KVM shouldn't move the posted interrupt out of the IRR until it's actually processed, e.g. KVM effectively drops an interrupt when it performs a nested VM-Exit with a "pending" posted interrupt. Fixing the mess is a future problem. Prior to vmx_complete_nested_posted_interrupt() interpreting a null PI descriptor as an error, this was a benign bug as the null PI descriptor effectively served as a check on PI not being enabled. Even then, the new flow did not become problematic until KVM started checking the result of kvm_check_nested_events(). Fixes: 705699a13994 ("KVM: nVMX: Enable nested posted interrupt processing") Fixes: 966eefb89657 ("KVM: nVMX: Disable vmcs02 posted interrupts if vmcs12 PID isn't mappable") Fixes: 47d3530f86c0 ("KVM: x86: Exit to userspace when kvm_check_nested_events fails") Cc: stable@vger.kernel.org Cc: Jim Mattson Signed-off-by: Sean Christopherson Message-Id: <20210810144526.2662272-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 264a9f4c9179..bc6327950657 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2187,12 +2187,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 ~PIN_BASED_VMX_PREEMPTION_TIMER); /* Posted interrupts setting is only taken from vmcs12. */ - if (nested_cpu_has_posted_intr(vmcs12)) { + vmx->nested.pi_pending = false; + if (nested_cpu_has_posted_intr(vmcs12)) vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; - vmx->nested.pi_pending = false; - } else { + else exec_control &= ~PIN_BASED_POSTED_INTR; - } pin_controls_set(vmx, exec_control); /* -- cgit From b11748e693166679acc13c8a4328a71efe1d4a89 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sun, 1 Aug 2021 09:38:20 +0200 Subject: powerpc: wii.dts: Reduce the size of the control area This is wrong, but needed in order to avoid overlapping ranges with the OTP area added in the next commit. A refactor of this part of the device tree is needed: according to Wiibrew[1], this area starts at 0x0d800000 and spans 0x400 bytes (that is, 0x100 32-bit registers), encompassing PIC and GPIO registers, amongst the ones already exposed in this device tree, which should become children of the control@d800000 node. [1] https://wiibrew.org/wiki/Hardware/Hollywood_Registers Signed-off-by: Emmanuel Gil Peyrot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210801073822.12452-4-linkmauve@linkmauve.fr --- arch/powerpc/boot/dts/wii.dts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts index aaa381da1906..c5fb54f8cc02 100644 --- a/arch/powerpc/boot/dts/wii.dts +++ b/arch/powerpc/boot/dts/wii.dts @@ -216,7 +216,13 @@ control@d800100 { compatible = "nintendo,hollywood-control"; - reg = <0x0d800100 0x300>; + /* + * Both the address and length are wrong, according to + * Wiibrew this should be <0x0d800000 0x400>, but it + * requires refactoring the PIC1 and GPIO nodes before + * changing that. + */ + reg = <0x0d800100 0xa0>; }; disk@d806000 { -- cgit From 562a610b4c5119034aed300f6ae212ec7a20c4b4 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sun, 1 Aug 2021 09:38:21 +0200 Subject: powerpc: wii.dts: Expose the OTP on this platform This can be used by the newly-added nintendo-otp nvmem module. Signed-off-by: Emmanuel Gil Peyrot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210801073822.12452-5-linkmauve@linkmauve.fr --- arch/powerpc/boot/dts/wii.dts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts index c5fb54f8cc02..e9c945b123c6 100644 --- a/arch/powerpc/boot/dts/wii.dts +++ b/arch/powerpc/boot/dts/wii.dts @@ -219,12 +219,17 @@ /* * Both the address and length are wrong, according to * Wiibrew this should be <0x0d800000 0x400>, but it - * requires refactoring the PIC1 and GPIO nodes before - * changing that. + * requires refactoring the PIC1, GPIO and OTP nodes + * before changing that. */ reg = <0x0d800100 0xa0>; }; + otp@d8001ec { + compatible = "nintendo,hollywood-otp"; + reg = <0x0d8001ec 0x8>; + }; + disk@d806000 { compatible = "nintendo,hollywood-di"; reg = <0x0d806000 0x40>; -- cgit From 140a89b7bfe65e9649c4a3678f74c32556834ec1 Mon Sep 17 00:00:00 2001 From: Emmanuel Gil Peyrot Date: Sun, 1 Aug 2021 09:38:22 +0200 Subject: powerpc: wii_defconfig: Enable OTP by default This selects the nintendo-otp module when building for this platform. Signed-off-by: Emmanuel Gil Peyrot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210801073822.12452-6-linkmauve@linkmauve.fr --- arch/powerpc/configs/wii_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 379c171f3ddd..a0c45bf2bfb1 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -99,6 +99,7 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_PANIC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y +CONFIG_NVMEM_NINTENDO_OTP=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y CONFIG_FUSE_FS=m -- cgit From 3e188b1ae8807f26cc5a530a9d55f3f643fe050a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:58:30 +0530 Subject: powerpc/book3s64/radix: make tlb_single_page_flush_ceiling a debugfs entry Similar to x86/s390 add a debugfs file to tune tlb_single_page_flush_ceiling. Also add a debugfs entry for tlb_local_single_page_flush_ceiling. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132831.233794-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index aefc100d79a7..1fa2bc6a969e 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "internal.h" @@ -1106,8 +1107,8 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range); * invalidating a full PID, so it has a far lower threshold to change from * individual page flushes to full-pid flushes. */ -static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; -static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; +static u32 tlb_single_page_flush_ceiling __read_mostly = 33; +static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; static inline void __radix__flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) @@ -1524,3 +1525,14 @@ void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + +static int __init create_tlb_single_page_flush_ceiling(void) +{ + debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, + powerpc_debugfs_root, &tlb_single_page_flush_ceiling); + debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, + powerpc_debugfs_root, &tlb_local_single_page_flush_ceiling); + return 0; +} +late_initcall(create_tlb_single_page_flush_ceiling); + -- cgit From dbf77fed8b302e87561c7c2fc06050c88f4d3120 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:58:31 +0530 Subject: powerpc: rename powerpc_debugfs_root to arch_debugfs_dir No functional change in this patch. arch_debugfs_dir is the generic kernel name declared in linux/debugfs.h for arch-specific debugfs directory. Architectures like x86/s390 already use the name. Rename powerpc specific powerpc_debugfs_root to arch_debugfs_dir. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132831.233794-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/debugfs.h | 13 ------------- arch/powerpc/kernel/Makefile | 3 ++- arch/powerpc/kernel/dawr.c | 3 +-- arch/powerpc/kernel/eeh.c | 16 ++++++++-------- arch/powerpc/kernel/eeh_cache.c | 4 ++-- arch/powerpc/kernel/fadump.c | 4 ++-- arch/powerpc/kernel/hw_breakpoint.c | 1 - arch/powerpc/kernel/kdebugfs.c | 14 ++++++++++++++ arch/powerpc/kernel/security.c | 16 ++++++++-------- arch/powerpc/kernel/setup-common.c | 13 ------------- arch/powerpc/kernel/setup_64.c | 1 - arch/powerpc/kernel/traps.c | 4 ++-- arch/powerpc/kvm/book3s_xics.c | 6 +++--- arch/powerpc/kvm/book3s_xive.c | 3 +-- arch/powerpc/kvm/book3s_xive_native.c | 3 +-- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- arch/powerpc/mm/book3s64/radix_tlb.c | 6 +++--- arch/powerpc/mm/ptdump/bats.c | 4 ++-- arch/powerpc/mm/ptdump/segment_regs.c | 4 ++-- arch/powerpc/platforms/cell/axon_msi.c | 4 ++-- arch/powerpc/platforms/powernv/memtrace.c | 3 +-- arch/powerpc/platforms/powernv/opal-imc.c | 4 ++-- arch/powerpc/platforms/powernv/opal-lpc.c | 4 ++-- arch/powerpc/platforms/powernv/opal-xscom.c | 4 ++-- arch/powerpc/platforms/powernv/pci-ioda.c | 4 ++-- arch/powerpc/platforms/pseries/dtl.c | 4 ++-- arch/powerpc/platforms/pseries/lpar.c | 5 +++-- arch/powerpc/sysdev/xive/common.c | 3 +-- arch/powerpc/xmon/xmon.c | 6 +++--- 30 files changed, 75 insertions(+), 92 deletions(-) delete mode 100644 arch/powerpc/include/asm/debugfs.h create mode 100644 arch/powerpc/kernel/kdebugfs.c (limited to 'arch') diff --git a/arch/powerpc/include/asm/debugfs.h b/arch/powerpc/include/asm/debugfs.h deleted file mode 100644 index 2c5c48571d75..000000000000 --- a/arch/powerpc/include/asm/debugfs.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ASM_POWERPC_DEBUGFS_H -#define _ASM_POWERPC_DEBUGFS_H - -/* - * Copyright 2017, Michael Ellerman, IBM Corporation. - */ - -#include - -extern struct dentry *powerpc_debugfs_root; - -#endif /* _ASM_POWERPC_DEBUGFS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f66b63e81c3b..7be36c1e1db6 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -46,7 +46,8 @@ obj-y := cputable.o syscalls.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ - hw_breakpoint_constraints.o interrupt.o + hw_breakpoint_constraints.o interrupt.o \ + kdebugfs.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ paca.o nvram_64.o note.o diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index cdc2dccb987d..64e423d2fe0f 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -101,7 +100,7 @@ static int __init dawr_force_setup(void) if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { /* Turn DAWR off by default, but allow admin to turn it on */ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, - powerpc_debugfs_root, + arch_debugfs_dir, &dawr_force_enable, &dawr_enable_fops); } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 3bbdcc86d01b..e9b597ed423c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -1901,24 +1901,24 @@ static int __init eeh_init_proc(void) proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); #ifdef CONFIG_DEBUG_FS debugfs_create_file_unsafe("eeh_enable", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &eeh_enable_dbgfs_ops); debugfs_create_u32("eeh_max_freezes", 0600, - powerpc_debugfs_root, &eeh_max_freezes); + arch_debugfs_dir, &eeh_max_freezes); debugfs_create_bool("eeh_disable_recovery", 0600, - powerpc_debugfs_root, + arch_debugfs_dir, &eeh_debugfs_no_recover); debugfs_create_file_unsafe("eeh_dev_check", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &eeh_dev_check_fops); debugfs_create_file_unsafe("eeh_dev_break", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &eeh_dev_break_fops); debugfs_create_file_unsafe("eeh_force_recover", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &eeh_force_recover_fops); debugfs_create_file_unsafe("eeh_dev_can_recover", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &eeh_dev_can_recover_fops); eeh_cache_debugfs_init(); #endif diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index bf3270426d82..9bdaaf7fddc9 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -12,8 +12,8 @@ #include #include #include +#include #include -#include #include @@ -283,6 +283,6 @@ DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); void eeh_cache_debugfs_init(void) { debugfs_create_file_unsafe("eeh_address_cache", 0400, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &eeh_addr_cache_fops); } diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b990075285f5..b7ceb041743c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -24,8 +24,8 @@ #include #include #include +#include -#include #include #include #include @@ -1557,7 +1557,7 @@ static void fadump_init_files(void) return; } - debugfs_create_file("fadump_region", 0444, powerpc_debugfs_root, NULL, + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); if (fw_dump.dump_active) { diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 21a638aff72f..91a3be14808b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/kdebugfs.c b/arch/powerpc/kernel/kdebugfs.c new file mode 100644 index 000000000000..36d3124d5a8b --- /dev/null +++ b/arch/powerpc/kernel/kdebugfs.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + +static int __init arch_kdebugfs_init(void) +{ + arch_debugfs_dir = debugfs_create_dir("powerpc", NULL); + return 0; +} +arch_initcall(arch_kdebugfs_init); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index cc51fa52e783..1a998490fe60 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -11,10 +11,10 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -106,7 +106,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get, static __init int barrier_nospec_debugfs_init(void) { debugfs_create_file_unsafe("barrier_nospec", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &fops_barrier_nospec); return 0; } @@ -114,7 +114,7 @@ device_initcall(barrier_nospec_debugfs_init); static __init int security_feature_debugfs_init(void) { - debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, + debugfs_create_x64("security_features", 0400, arch_debugfs_dir, &powerpc_security_features); return 0; } @@ -420,7 +420,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, static __init int stf_barrier_debugfs_init(void) { - debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root, + debugfs_create_file_unsafe("stf_barrier", 0600, arch_debugfs_dir, NULL, &fops_stf_barrier); return 0; } @@ -748,7 +748,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get, static __init int count_cache_flush_debugfs_init(void) { debugfs_create_file_unsafe("count_cache_flush", 0600, - powerpc_debugfs_root, NULL, + arch_debugfs_dir, NULL, &fops_count_cache_flush); return 0; } @@ -834,9 +834,9 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set static __init int rfi_flush_debugfs_init(void) { - debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); - debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); - debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); + debugfs_create_file("rfi_flush", 0600, arch_debugfs_dir, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, arch_debugfs_dir, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, arch_debugfs_dir, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index aa9c2d01424a..b1e43b69a559 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -773,18 +772,6 @@ static int __init check_cache_coherency(void) late_initcall(check_cache_coherency); #endif /* CONFIG_CHECK_CACHE_COHERENCY */ -#ifdef CONFIG_DEBUG_FS -struct dentry *powerpc_debugfs_root; -EXPORT_SYMBOL(powerpc_debugfs_root); - -static int powerpc_debugfs_init(void) -{ - powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL); - return 0; -} -arch_initcall(powerpc_debugfs_init); -#endif - void ppc_printk_progress(char *s, unsigned short hex) { pr_info("%s\n", s); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1ff258f6c76c..eaa79a0996d1 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -32,7 +32,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfbce527c98e..c8f648727d36 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -37,10 +37,10 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -2267,7 +2267,7 @@ static int __init ppc_warn_emulated_init(void) struct ppc_emulated_entry *entries = (void *)&ppc_emulated; dir = debugfs_create_dir("emulated_instructions", - powerpc_debugfs_root); + arch_debugfs_dir); debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated); diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 303e3cb096db..ebd5d920de8c 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -10,13 +10,13 @@ #include #include #include - +#include #include + #include #include #include #include -#include #include #include @@ -1024,7 +1024,7 @@ static void xics_debugfs_init(struct kvmppc_xics *xics) return; } - xics->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, + xics->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, xics, &xics_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 912c1e9eef6b..a18db9e16ea4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -2360,7 +2359,7 @@ static void xive_debugfs_init(struct kvmppc_xive *xive) return; } - xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xive->dentry = debugfs_create_file(name, S_IRUGO, arch_debugfs_dir, xive, &xive_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index af65ea21bde7..99db9ac49901 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -1268,7 +1267,7 @@ static void xive_native_debugfs_init(struct kvmppc_xive *xive) return; } - xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, + xive->dentry = debugfs_create_file(name, 0444, arch_debugfs_dir, xive, &xive_native_debug_fops); pr_debug("%s: created %s\n", __func__, name); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index ac5720371c0d..c145776d3ae5 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -36,8 +36,8 @@ #include #include #include +#include -#include #include #include #include @@ -2072,7 +2072,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n") static int __init hash64_debugfs(void) { - debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, NULL, + debugfs_create_file("hpt_order", 0600, arch_debugfs_dir, NULL, &fops_hpt_order); return 0; } diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 300099de553b..9e16c7b1a6c5 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -6,9 +6,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -520,7 +520,7 @@ static int __init pgtable_debugfs_setup(void) * invalidated as expected. */ debugfs_create_bool("tlbie_enabled", 0600, - powerpc_debugfs_root, + arch_debugfs_dir, &tlbie_enabled); return 0; diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 1fa2bc6a969e..7724af19ed7e 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include #include "internal.h" @@ -1529,9 +1529,9 @@ EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); static int __init create_tlb_single_page_flush_ceiling(void) { debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, - powerpc_debugfs_root, &tlb_single_page_flush_ceiling); + arch_debugfs_dir, &tlb_single_page_flush_ceiling); debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, - powerpc_debugfs_root, &tlb_local_single_page_flush_ceiling); + arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); return 0; } late_initcall(create_tlb_single_page_flush_ceiling); diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index c4c628b03cf8..8bf7383fb26c 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include "ptdump.h" @@ -103,7 +103,7 @@ static const struct file_operations bats_fops = { static int __init bats_init(void) { debugfs_create_file("block_address_translation", 0400, - powerpc_debugfs_root, NULL, &bats_fops); + arch_debugfs_dir, NULL, &bats_fops); return 0; } device_initcall(bats_init); diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index 565048a0c9be..9223dfb85c51 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -6,7 +6,7 @@ * This dumps the content of Segment Registers */ -#include +#include static void seg_show(struct seq_file *m, int i) { @@ -55,7 +55,7 @@ static const struct file_operations sr_fops = { static int __init sr_init(void) { - debugfs_create_file("segment_registers", 0400, powerpc_debugfs_root, + debugfs_create_file("segment_registers", 0400, arch_debugfs_dir, NULL, &sr_fops); return 0; } diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index ca2555b8a0c2..82335e364c44 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -12,8 +12,8 @@ #include #include #include +#include -#include #include #include #include @@ -480,6 +480,6 @@ void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic) snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn)); - debugfs_create_file(name, 0600, powerpc_debugfs_root, msic, &fops_msic); + debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic); } #endif /* DEBUG */ diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 537a4daed614..877720c64515 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -18,7 +18,6 @@ #include #include #include -#include #include /* This enables us to keep track of the memory removed from each node. */ @@ -330,7 +329,7 @@ DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, static int memtrace_init(void) { memtrace_debugfs_dir = debugfs_create_dir("memtrace", - powerpc_debugfs_root); + arch_debugfs_dir); debugfs_create_file("enable", 0600, memtrace_debugfs_dir, NULL, &memtrace_init_fops); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index ba02a75c1410..05d3832019b9 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -13,11 +13,11 @@ #include #include #include +#include #include #include #include #include -#include static struct dentry *imc_debugfs_parent; @@ -56,7 +56,7 @@ static void export_imc_mode_and_cmd(struct device_node *node, u32 cb_offset; struct imc_mem_info *ptr = pmu_ptr->mem_info; - imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); + imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir); if (of_property_read_u32(node, "cb_offset", &cb_offset)) cb_offset = IMC_CNTL_BLK_OFFSET; diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 608569082ba0..1e5d51db40f8 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -10,13 +10,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include static int opal_lpc_chip_id = -1; @@ -371,7 +371,7 @@ static int opal_lpc_init_debugfs(void) if (opal_lpc_chip_id < 0) return -ENODEV; - root = debugfs_create_dir("lpc", powerpc_debugfs_root); + root = debugfs_create_dir("lpc", arch_debugfs_dir); rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO); rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index fd510d961b8c..6b4eed2ef4fa 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -14,11 +14,11 @@ #include #include #include +#include #include #include #include -#include #include static u64 opal_scom_unmangle(u64 addr) @@ -189,7 +189,7 @@ static int scom_debug_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return 0; - root = debugfs_create_dir("scom", powerpc_debugfs_root); + root = debugfs_create_dir("scom", arch_debugfs_dir); if (!root) return -1; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2389cd79c3c8..3dd35c327d1c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -2475,7 +2475,7 @@ static void pnv_pci_ioda_create_dbgfs(void) phb = hose->private_data; sprintf(name, "PCI%04x", hose->global_number); - phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); + phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir); debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, phb, &pnv_pci_diag_data_fops); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 982f069e4c31..352af5b14a0f 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -11,10 +11,10 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -338,7 +338,7 @@ static int dtl_init(void) /* set up common debugfs structure */ - dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root); + dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir); debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 869ef638698a..bd1fcb0881ea 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include #include #include @@ -39,7 +41,6 @@ #include #include #include -#include #include #include "pseries.h" @@ -2019,7 +2020,7 @@ static int __init vpa_debugfs_init(void) if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; - vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); + vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir); /* set up the per-cpu vpa file*/ for_each_possible_cpu(i) { diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 4018964bbd69..458645c7a72b 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -1769,7 +1768,7 @@ DEFINE_SHOW_ATTRIBUTE(xive_core_debug); int xive_core_debug_init(void) { if (xive_enabled()) - debugfs_create_file("xive", 0400, powerpc_debugfs_root, + debugfs_create_file("xive", 0400, arch_debugfs_dir, NULL, &xive_core_debug_fops); return 0; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index da4d7f225a40..ead460b80905 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -26,8 +26,8 @@ #include #include #include +#include -#include #include #include #include @@ -4077,8 +4077,8 @@ DEFINE_SIMPLE_ATTRIBUTE(xmon_dbgfs_ops, xmon_dbgfs_get, static int __init setup_xmon_dbgfs(void) { - debugfs_create_file("xmon", 0600, powerpc_debugfs_root, NULL, - &xmon_dbgfs_ops); + debugfs_create_file("xmon", 0600, arch_debugfs_dir, NULL, + &xmon_dbgfs_ops); return 0; } device_initcall(setup_xmon_dbgfs); -- cgit From 7e35ef662ca05c42dbc2f401bb76d9219dd7fd02 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:52:19 +0530 Subject: powerpc/pseries: rename min_common_depth to primary_domain_index No functional change in this patch. Signed-off-by: Aneesh Kumar K.V Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132223.225214-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/numa.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 094a1076fd1f..79132744b728 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(node_to_cpumask_map); EXPORT_SYMBOL(node_data); -static int min_common_depth; +static int primary_domain_index; static int n_mem_addr_cells, n_mem_size_cells; static int form1_affinity; @@ -232,8 +232,8 @@ static int associativity_to_nid(const __be32 *associativity) if (!numa_enabled) goto out; - if (of_read_number(associativity, 1) >= min_common_depth) - nid = of_read_number(&associativity[min_common_depth], 1); + if (of_read_number(associativity, 1) >= primary_domain_index) + nid = of_read_number(&associativity[primary_domain_index], 1); /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= nr_node_ids) @@ -284,9 +284,9 @@ int of_node_to_nid(struct device_node *device) } EXPORT_SYMBOL(of_node_to_nid); -static int __init find_min_common_depth(void) +static int __init find_primary_domain_index(void) { - int depth; + int index; struct device_node *root; if (firmware_has_feature(FW_FEATURE_OPAL)) @@ -326,7 +326,7 @@ static int __init find_min_common_depth(void) } if (form1_affinity) { - depth = of_read_number(distance_ref_points, 1); + index = of_read_number(distance_ref_points, 1); } else { if (distance_ref_points_depth < 2) { printk(KERN_WARNING "NUMA: " @@ -334,7 +334,7 @@ static int __init find_min_common_depth(void) goto err; } - depth = of_read_number(&distance_ref_points[1], 1); + index = of_read_number(&distance_ref_points[1], 1); } /* @@ -348,7 +348,7 @@ static int __init find_min_common_depth(void) } of_node_put(root); - return depth; + return index; err: of_node_put(root); @@ -437,16 +437,16 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb) int nid = default_nid; int rc, index; - if ((min_common_depth < 0) || !numa_enabled) + if ((primary_domain_index < 0) || !numa_enabled) return default_nid; rc = of_get_assoc_arrays(&aa); if (rc) return default_nid; - if (min_common_depth <= aa.array_sz && + if (primary_domain_index <= aa.array_sz && !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { - index = lmb->aa_index * aa.array_sz + min_common_depth - 1; + index = lmb->aa_index * aa.array_sz + primary_domain_index - 1; nid = of_read_number(&aa.arrays[index], 1); if (nid == 0xffff || nid >= nr_node_ids) @@ -708,18 +708,18 @@ static int __init parse_numa_properties(void) return -1; } - min_common_depth = find_min_common_depth(); + primary_domain_index = find_primary_domain_index(); - if (min_common_depth < 0) { + if (primary_domain_index < 0) { /* - * if we fail to parse min_common_depth from device tree + * if we fail to parse primary_domain_index from device tree * mark the numa disabled, boot with numa disabled. */ numa_enabled = false; - return min_common_depth; + return primary_domain_index; } - dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); + dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); /* * Even though we connect cpus to numa domains later in SMP @@ -924,7 +924,7 @@ static void __init find_possible_nodes(void) goto out; } - max_nodes = of_read_number(&domains[min_common_depth], 1); + max_nodes = of_read_number(&domains[primary_domain_index], 1); pr_info("Partition configured for %d NUMA nodes.\n", max_nodes); for (i = 0; i < max_nodes; i++) { @@ -933,7 +933,7 @@ static void __init find_possible_nodes(void) } prop_length /= sizeof(int); - if (prop_length > min_common_depth + 2) + if (prop_length > primary_domain_index + 2) coregroup_enabled = 1; out: @@ -1266,7 +1266,7 @@ int cpu_to_coregroup_id(int cpu) goto out; index = of_read_number(associativity, 1); - if (index > min_common_depth + 1) + if (index > primary_domain_index + 1) return of_read_number(&associativity[index - 1], 1); out: -- cgit From 0eacd06bb8adea8dd9edb0a30144166d9f227e64 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:52:20 +0530 Subject: powerpc/pseries: Rename TYPE1_AFFINITY to FORM1_AFFINITY Also make related code cleanup that will allow adding FORM2_AFFINITY in later patches. No functional change in this patch. Signed-off-by: Aneesh Kumar K.V Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132223.225214-3-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/firmware.h | 4 ++-- arch/powerpc/include/asm/prom.h | 2 +- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/mm/numa.c | 35 ++++++++++++++++++------------- arch/powerpc/platforms/pseries/firmware.c | 2 +- 5 files changed, 26 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 7604673787d6..60b631161360 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -44,7 +44,7 @@ #define FW_FEATURE_OPAL ASM_CONST(0x0000000010000000) #define FW_FEATURE_SET_MODE ASM_CONST(0x0000000040000000) #define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000) -#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) +#define FW_FEATURE_FORM1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) #define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) @@ -69,7 +69,7 @@ enum { FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | - FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | + FW_FEATURE_FORM1_AFFINITY | FW_FEATURE_PRRN | FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 324a13351749..df9fec9d232c 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -147,7 +147,7 @@ extern int of_read_drc_info_cell(struct property **prop, #define OV5_MSI 0x0201 /* PCIe/MSI support */ #define OV5_CMO 0x0480 /* Cooperative Memory Overcommitment */ #define OV5_XCMO 0x0440 /* Page Coalescing */ -#define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ +#define OV5_FORM1_AFFINITY 0x0580 /* FORM1 NUMA affinity */ #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ #define OV5_HP_EVT 0x0604 /* Hot Plug Event support */ #define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a5bf355ce1d6..57db605ad33a 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1096,7 +1096,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { #else 0, #endif - .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), + .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN), .bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT), .micro_checkpoint = 0, .reserved0 = 0, diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 79132744b728..0bad11b3e929 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -53,7 +53,10 @@ EXPORT_SYMBOL(node_data); static int primary_domain_index; static int n_mem_addr_cells, n_mem_size_cells; -static int form1_affinity; + +#define FORM0_AFFINITY 0 +#define FORM1_AFFINITY 1 +static int affinity_form; #define MAX_DISTANCE_REF_POINTS 4 static int distance_ref_points_depth; @@ -190,7 +193,7 @@ int __node_distance(int a, int b) int i; int distance = LOCAL_DISTANCE; - if (!form1_affinity) + if (affinity_form == FORM0_AFFINITY) return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); for (i = 0; i < distance_ref_points_depth; i++) { @@ -210,7 +213,7 @@ static void initialize_distance_lookup_table(int nid, { int i; - if (!form1_affinity) + if (affinity_form != FORM1_AFFINITY) return; for (i = 0; i < distance_ref_points_depth; i++) { @@ -289,6 +292,17 @@ static int __init find_primary_domain_index(void) int index; struct device_node *root; + /* + * Check for which form of affinity. + */ + if (firmware_has_feature(FW_FEATURE_OPAL)) { + affinity_form = FORM1_AFFINITY; + } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) { + dbg("Using form 1 affinity\n"); + affinity_form = FORM1_AFFINITY; + } else + affinity_form = FORM0_AFFINITY; + if (firmware_has_feature(FW_FEATURE_OPAL)) root = of_find_node_by_path("/ibm,opal"); else @@ -318,23 +332,16 @@ static int __init find_primary_domain_index(void) } distance_ref_points_depth /= sizeof(int); - - if (firmware_has_feature(FW_FEATURE_OPAL) || - firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) { - dbg("Using form 1 affinity\n"); - form1_affinity = 1; - } - - if (form1_affinity) { - index = of_read_number(distance_ref_points, 1); - } else { + if (affinity_form == FORM0_AFFINITY) { if (distance_ref_points_depth < 2) { printk(KERN_WARNING "NUMA: " - "short ibm,associativity-reference-points\n"); + "short ibm,associativity-reference-points\n"); goto err; } index = of_read_number(&distance_ref_points[1], 1); + } else { + index = of_read_number(distance_ref_points, 1); } /* diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 4c7b7f5a2ebc..5d4c2bc20bba 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -119,7 +119,7 @@ struct vec5_fw_feature { static __initdata struct vec5_fw_feature vec5_fw_features_table[] = { - {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY}, + {FW_FEATURE_FORM1_AFFINITY, OV5_FORM1_AFFINITY}, {FW_FEATURE_PRRN, OV5_PRRN}, {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, {FW_FEATURE_DRC_INFO, OV5_DRC_INFO}, -- cgit From 8ddc6448ec5a5ef50eaa581a7dec0e12a02850ff Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:52:21 +0530 Subject: powerpc/pseries: Consolidate different NUMA distance update code paths The associativity details of the newly added resourced are collected from the hypervisor via "ibm,configure-connector" rtas call. Update the numa distance details of the newly added numa node after the above call. Instead of updating NUMA distance every time we lookup a node id from the associativity property, add helpers that can be used during boot which does this only once. Also remove the distance update from node id lookup helpers. Currently, we duplicate parsing code for ibm,associativity and ibm,associativity-lookup-arrays in the kernel. The associativity array provided by these device tree properties are very similar and hence can use a helper to parse the node id and numa distance details. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132223.225214-4-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/topology.h | 2 + arch/powerpc/mm/numa.c | 212 +++++++++++++++++------- arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 + arch/powerpc/platforms/pseries/hotplug-memory.c | 2 + 4 files changed, 161 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index e4db64c0e184..a6425a70c37b 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu) } int of_drconf_to_nid_single(struct drmem_lmb *lmb); +void update_numa_distance(struct device_node *node); #else @@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) return first_online_node; } +static inline void update_numa_distance(struct device_node *node) {} #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 0bad11b3e929..3cda37c52f26 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -208,50 +208,35 @@ int __node_distance(int a, int b) } EXPORT_SYMBOL(__node_distance); -static void initialize_distance_lookup_table(int nid, - const __be32 *associativity) +static int __associativity_to_nid(const __be32 *associativity, + int max_array_sz) { - int i; + int nid; + /* + * primary_domain_index is 1 based array index. + */ + int index = primary_domain_index - 1; - if (affinity_form != FORM1_AFFINITY) - return; + if (!numa_enabled || index >= max_array_sz) + return NUMA_NO_NODE; - for (i = 0; i < distance_ref_points_depth; i++) { - const __be32 *entry; + nid = of_read_number(&associativity[index], 1); - entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1]; - distance_lookup_table[nid][i] = of_read_number(entry, 1); - } + /* POWER4 LPAR uses 0xffff as invalid node */ + if (nid == 0xffff || nid >= nr_node_ids) + nid = NUMA_NO_NODE; + return nid; } - /* * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA * info is found. */ static int associativity_to_nid(const __be32 *associativity) { - int nid = NUMA_NO_NODE; - - if (!numa_enabled) - goto out; - - if (of_read_number(associativity, 1) >= primary_domain_index) - nid = of_read_number(&associativity[primary_domain_index], 1); - - /* POWER4 LPAR uses 0xffff as invalid node */ - if (nid == 0xffff || nid >= nr_node_ids) - nid = NUMA_NO_NODE; - - if (nid > 0 && - of_read_number(associativity, 1) >= distance_ref_points_depth) { - /* - * Skip the length field and send start of associativity array - */ - initialize_distance_lookup_table(nid, associativity + 1); - } + int array_sz = of_read_number(associativity, 1); -out: - return nid; + /* Skip the first element in the associativity array */ + return __associativity_to_nid((associativity + 1), array_sz); } /* Returns the nid associated with the given device tree node, @@ -287,6 +272,60 @@ int of_node_to_nid(struct device_node *device) } EXPORT_SYMBOL(of_node_to_nid); +static void __initialize_form1_numa_distance(const __be32 *associativity, + int max_array_sz) +{ + int i, nid; + + if (affinity_form != FORM1_AFFINITY) + return; + + nid = __associativity_to_nid(associativity, max_array_sz); + if (nid != NUMA_NO_NODE) { + for (i = 0; i < distance_ref_points_depth; i++) { + const __be32 *entry; + int index = be32_to_cpu(distance_ref_points[i]) - 1; + + /* + * broken hierarchy, return with broken distance table + */ + if (WARN(index >= max_array_sz, "Broken ibm,associativity property")) + return; + + entry = &associativity[index]; + distance_lookup_table[nid][i] = of_read_number(entry, 1); + } + } +} + +static void initialize_form1_numa_distance(const __be32 *associativity) +{ + int array_sz; + + array_sz = of_read_number(associativity, 1); + /* Skip the first element in the associativity array */ + __initialize_form1_numa_distance(associativity + 1, array_sz); +} + +/* + * Used to update distance information w.r.t newly added node. + */ +void update_numa_distance(struct device_node *node) +{ + if (affinity_form == FORM0_AFFINITY) + return; + else if (affinity_form == FORM1_AFFINITY) { + const __be32 *associativity; + + associativity = of_get_associativity(node); + if (!associativity) + return; + + initialize_form1_numa_distance(associativity); + return; + } +} + static int __init find_primary_domain_index(void) { int index; @@ -433,6 +472,38 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) return 0; } +static int get_nid_and_numa_distance(struct drmem_lmb *lmb) +{ + struct assoc_arrays aa = { .arrays = NULL }; + int default_nid = NUMA_NO_NODE; + int nid = default_nid; + int rc, index; + + if ((primary_domain_index < 0) || !numa_enabled) + return default_nid; + + rc = of_get_assoc_arrays(&aa); + if (rc) + return default_nid; + + if (primary_domain_index <= aa.array_sz && + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { + const __be32 *associativity; + + index = lmb->aa_index * aa.array_sz; + associativity = &aa.arrays[index]; + nid = __associativity_to_nid(associativity, aa.array_sz); + if (nid > 0 && affinity_form == FORM1_AFFINITY) { + /* + * lookup array associativity entries have + * no length of the array as the first element. + */ + __initialize_form1_numa_distance(associativity, aa.array_sz); + } + } + return nid; +} + /* * This is like of_node_to_nid_single() for memory represented in the * ibm,dynamic-reconfiguration-memory node. @@ -453,26 +524,19 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb) if (primary_domain_index <= aa.array_sz && !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { - index = lmb->aa_index * aa.array_sz + primary_domain_index - 1; - nid = of_read_number(&aa.arrays[index], 1); - - if (nid == 0xffff || nid >= nr_node_ids) - nid = default_nid; + const __be32 *associativity; - if (nid > 0) { - index = lmb->aa_index * aa.array_sz; - initialize_distance_lookup_table(nid, - &aa.arrays[index]); - } + index = lmb->aa_index * aa.array_sz; + associativity = &aa.arrays[index]; + nid = __associativity_to_nid(associativity, aa.array_sz); } - return nid; } #ifdef CONFIG_PPC_SPLPAR -static int vphn_get_nid(long lcpu) + +static int __vphn_get_associativity(long lcpu, __be32 *associativity) { - __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; long rc, hwid; /* @@ -492,12 +556,30 @@ static int vphn_get_nid(long lcpu) rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); if (rc == H_SUCCESS) - return associativity_to_nid(associativity); + return 0; } + return -1; +} + +static int vphn_get_nid(long lcpu) +{ + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + + + if (!__vphn_get_associativity(lcpu, associativity)) + return associativity_to_nid(associativity); + return NUMA_NO_NODE; + } #else + +static int __vphn_get_associativity(long lcpu, __be32 *associativity) +{ + return -1; +} + static int vphn_get_nid(long unused) { return NUMA_NO_NODE; @@ -692,7 +774,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, size = read_n_cells(n_mem_size_cells, usm); } - nid = of_drconf_to_nid_single(lmb); + nid = get_nid_and_numa_distance(lmb); fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), &nid); node_set_online(nid); @@ -709,6 +791,7 @@ static int __init parse_numa_properties(void) struct device_node *memory; int default_nid = 0; unsigned long i; + const __be32 *associativity; if (numa_enabled == 0) { printk(KERN_WARNING "NUMA disabled by user\n"); @@ -734,18 +817,30 @@ static int __init parse_numa_properties(void) * each node to be onlined must have NODE_DATA etc backing it. */ for_each_present_cpu(i) { + __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE]; struct device_node *cpu; - int nid = vphn_get_nid(i); + int nid = NUMA_NO_NODE; - /* - * Don't fall back to default_nid yet -- we will plug - * cpus into nodes once the memory scan has discovered - * the topology. - */ - if (nid == NUMA_NO_NODE) { + memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32)); + + if (__vphn_get_associativity(i, vphn_assoc) == 0) { + nid = associativity_to_nid(vphn_assoc); + initialize_form1_numa_distance(vphn_assoc); + } else { + + /* + * Don't fall back to default_nid yet -- we will plug + * cpus into nodes once the memory scan has discovered + * the topology. + */ cpu = of_get_cpu_node(i, NULL); BUG_ON(!cpu); - nid = of_node_to_nid_single(cpu); + + associativity = of_get_associativity(cpu); + if (associativity) { + nid = associativity_to_nid(associativity); + initialize_form1_numa_distance(associativity); + } of_node_put(cpu); } @@ -781,8 +876,11 @@ new_range: * have associativity properties. If none, then * everything goes to default_nid. */ - nid = of_node_to_nid_single(memory); - if (nid < 0) + associativity = of_get_associativity(memory); + if (associativity) { + nid = associativity_to_nid(associativity); + initialize_form1_numa_distance(associativity); + } else nid = default_nid; fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index e1f224320102..1ef40ef699a6 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -580,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index) return saved_rc; } + update_numa_distance(dn); + rc = dlpar_online_cpu(dn); if (rc) { saved_rc = rc; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 0beb3ca2b549..14fccd7b9c99 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb) return -ENODEV; } + update_numa_distance(lmb_node); + dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (!dr_node) { dlpar_free_cc_nodes(lmb_node); -- cgit From ef31cb83d19c4c589d650747cd5a7e502be9f665 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:52:22 +0530 Subject: powerpc/pseries: Add a helper for form1 cpu distance This helper is only used with the dispatch trace log collection. A later patch will add Form2 affinity support and this change helps in keeping that simpler. Also add a comment explaining we don't expect the code to be called with FORM0 Signed-off-by: Aneesh Kumar K.V Reviewed-by: David Gibson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132223.225214-5-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/topology.h | 4 ++-- arch/powerpc/mm/numa.c | 10 +++++++++- arch/powerpc/platforms/pseries/lpar.c | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index a6425a70c37b..a4712ecad3e9 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -36,7 +36,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) -extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc); +int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc); extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) @@ -84,7 +84,7 @@ static inline void sysfs_remove_device_from_node(struct device *dev, static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} -static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) +static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) { return 0; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3cda37c52f26..fe3a9a38eddf 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -166,7 +166,7 @@ static void unmap_cpu_from_node(unsigned long cpu) } #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ -int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) +static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) { int dist = 0; @@ -182,6 +182,14 @@ int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) return dist; } +int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) +{ + /* We should not get called with FORM0 */ + VM_WARN_ON(affinity_form == FORM0_AFFINITY); + + return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc); +} + /* must hold reference to node during call */ static const __be32 *of_get_associativity(struct device_node *dev) { diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index bd1fcb0881ea..3df6bdfea475 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -262,7 +262,7 @@ static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu) if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc) return -EIO; - return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc); + return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc); } static int cpu_home_node_dispatch_distance(int disp_cpu) @@ -282,7 +282,7 @@ static int cpu_home_node_dispatch_distance(int disp_cpu) if (!disp_cpu_assoc || !vcpu_assoc) return -EIO; - return cpu_distance(disp_cpu_assoc, vcpu_assoc); + return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc); } static void update_vcpu_disp_stat(int disp_cpu) -- cgit From 1c6b5a7e74052768977855f95d6b8812f6e7772c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 12 Aug 2021 18:52:23 +0530 Subject: powerpc/pseries: Add support for FORM2 associativity PAPR interface currently supports two different ways of communicating resource grouping details to the OS. These are referred to as Form 0 and Form 1 associativity grouping. Form 0 is the older format and is now considered deprecated. This patch adds another resource grouping named FORM2. Signed-off-by: Daniel Henrique Barboza Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210812132223.225214-6-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/firmware.h | 3 +- arch/powerpc/include/asm/prom.h | 1 + arch/powerpc/kernel/prom_init.c | 3 +- arch/powerpc/mm/numa.c | 187 ++++++++++++++++++++++++------ arch/powerpc/platforms/pseries/firmware.c | 1 + 5 files changed, 158 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 60b631161360..97a3bd9ffeb9 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -53,6 +53,7 @@ #define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000) #define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000) #define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000) +#define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000) #ifndef __ASSEMBLY__ @@ -73,7 +74,7 @@ enum { FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR | - FW_FEATURE_RPT_INVALIDATE, + FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index df9fec9d232c..5c80152e8f18 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -149,6 +149,7 @@ extern int of_read_drc_info_cell(struct property **prop, #define OV5_XCMO 0x0440 /* Page Coalescing */ #define OV5_FORM1_AFFINITY 0x0580 /* FORM1 NUMA affinity */ #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ +#define OV5_FORM2_AFFINITY 0x0520 /* Form2 NUMA affinity */ #define OV5_HP_EVT 0x0604 /* Hot Plug Event support */ #define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */ #define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 57db605ad33a..95a42d49e291 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1096,7 +1096,8 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { #else 0, #endif - .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN), + .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) | + OV5_FEAT(OV5_FORM2_AFFINITY), .bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT), .micro_checkpoint = 0, .reserved0 = 0, diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index fe3a9a38eddf..fe9c6ced40b2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -56,12 +56,17 @@ static int n_mem_addr_cells, n_mem_size_cells; #define FORM0_AFFINITY 0 #define FORM1_AFFINITY 1 +#define FORM2_AFFINITY 2 static int affinity_form; #define MAX_DISTANCE_REF_POINTS 4 static int distance_ref_points_depth; static const __be32 *distance_ref_points; static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; +static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = { + [0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 } +}; +static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE }; /* * Allocate node_to_cpumask_map based on number of available nodes @@ -166,6 +171,54 @@ static void unmap_cpu_from_node(unsigned long cpu) } #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ +static int __associativity_to_nid(const __be32 *associativity, + int max_array_sz) +{ + int nid; + /* + * primary_domain_index is 1 based array index. + */ + int index = primary_domain_index - 1; + + if (!numa_enabled || index >= max_array_sz) + return NUMA_NO_NODE; + + nid = of_read_number(&associativity[index], 1); + + /* POWER4 LPAR uses 0xffff as invalid node */ + if (nid == 0xffff || nid >= nr_node_ids) + nid = NUMA_NO_NODE; + return nid; +} +/* + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA + * info is found. + */ +static int associativity_to_nid(const __be32 *associativity) +{ + int array_sz = of_read_number(associativity, 1); + + /* Skip the first element in the associativity array */ + return __associativity_to_nid((associativity + 1), array_sz); +} + +static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) +{ + int dist; + int node1, node2; + + node1 = associativity_to_nid(cpu1_assoc); + node2 = associativity_to_nid(cpu2_assoc); + + dist = numa_distance_table[node1][node2]; + if (dist <= LOCAL_DISTANCE) + return 0; + else if (dist <= REMOTE_DISTANCE) + return 1; + else + return 2; +} + static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) { int dist = 0; @@ -186,8 +239,9 @@ int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) { /* We should not get called with FORM0 */ VM_WARN_ON(affinity_form == FORM0_AFFINITY); - - return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc); + if (affinity_form == FORM1_AFFINITY) + return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc); + return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc); } /* must hold reference to node during call */ @@ -201,7 +255,9 @@ int __node_distance(int a, int b) int i; int distance = LOCAL_DISTANCE; - if (affinity_form == FORM0_AFFINITY) + if (affinity_form == FORM2_AFFINITY) + return numa_distance_table[a][b]; + else if (affinity_form == FORM0_AFFINITY) return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE); for (i = 0; i < distance_ref_points_depth; i++) { @@ -216,37 +272,6 @@ int __node_distance(int a, int b) } EXPORT_SYMBOL(__node_distance); -static int __associativity_to_nid(const __be32 *associativity, - int max_array_sz) -{ - int nid; - /* - * primary_domain_index is 1 based array index. - */ - int index = primary_domain_index - 1; - - if (!numa_enabled || index >= max_array_sz) - return NUMA_NO_NODE; - - nid = of_read_number(&associativity[index], 1); - - /* POWER4 LPAR uses 0xffff as invalid node */ - if (nid == 0xffff || nid >= nr_node_ids) - nid = NUMA_NO_NODE; - return nid; -} -/* - * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA - * info is found. - */ -static int associativity_to_nid(const __be32 *associativity) -{ - int array_sz = of_read_number(associativity, 1); - - /* Skip the first element in the associativity array */ - return __associativity_to_nid((associativity + 1), array_sz); -} - /* Returns the nid associated with the given device tree node, * or -1 if not found. */ @@ -320,6 +345,8 @@ static void initialize_form1_numa_distance(const __be32 *associativity) */ void update_numa_distance(struct device_node *node) { + int nid; + if (affinity_form == FORM0_AFFINITY) return; else if (affinity_form == FORM1_AFFINITY) { @@ -332,6 +359,84 @@ void update_numa_distance(struct device_node *node) initialize_form1_numa_distance(associativity); return; } + + /* FORM2 affinity */ + nid = of_node_to_nid_single(node); + if (nid == NUMA_NO_NODE) + return; + + /* + * With FORM2 we expect NUMA distance of all possible NUMA + * nodes to be provided during boot. + */ + WARN(numa_distance_table[nid][nid] == -1, + "NUMA distance details for node %d not provided\n", nid); +} + +/* + * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} + * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements} + */ +static void initialize_form2_numa_distance_lookup_table(void) +{ + int i, j; + struct device_node *root; + const __u8 *numa_dist_table; + const __be32 *numa_lookup_index; + int numa_dist_table_length; + int max_numa_index, distance_index; + + if (firmware_has_feature(FW_FEATURE_OPAL)) + root = of_find_node_by_path("/ibm,opal"); + else + root = of_find_node_by_path("/rtas"); + if (!root) + root = of_find_node_by_path("/"); + + numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL); + max_numa_index = of_read_number(&numa_lookup_index[0], 1); + + /* first element of the array is the size and is encode-int */ + numa_dist_table = of_get_property(root, "ibm,numa-distance-table", NULL); + numa_dist_table_length = of_read_number((const __be32 *)&numa_dist_table[0], 1); + /* Skip the size which is encoded int */ + numa_dist_table += sizeof(__be32); + + pr_debug("numa_dist_table_len = %d, numa_dist_indexes_len = %d\n", + numa_dist_table_length, max_numa_index); + + for (i = 0; i < max_numa_index; i++) + /* +1 skip the max_numa_index in the property */ + numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1); + + + if (numa_dist_table_length != max_numa_index * max_numa_index) { + WARN(1, "Wrong NUMA distance information\n"); + /* consider everybody else just remote. */ + for (i = 0; i < max_numa_index; i++) { + for (j = 0; j < max_numa_index; j++) { + int nodeA = numa_id_index_table[i]; + int nodeB = numa_id_index_table[j]; + + if (nodeA == nodeB) + numa_distance_table[nodeA][nodeB] = LOCAL_DISTANCE; + else + numa_distance_table[nodeA][nodeB] = REMOTE_DISTANCE; + } + } + } + + distance_index = 0; + for (i = 0; i < max_numa_index; i++) { + for (j = 0; j < max_numa_index; j++) { + int nodeA = numa_id_index_table[i]; + int nodeB = numa_id_index_table[j]; + + numa_distance_table[nodeA][nodeB] = numa_dist_table[distance_index++]; + pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, numa_distance_table[nodeA][nodeB]); + } + } + of_node_put(root); } static int __init find_primary_domain_index(void) @@ -344,6 +449,9 @@ static int __init find_primary_domain_index(void) */ if (firmware_has_feature(FW_FEATURE_OPAL)) { affinity_form = FORM1_AFFINITY; + } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) { + dbg("Using form 2 affinity\n"); + affinity_form = FORM2_AFFINITY; } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) { dbg("Using form 1 affinity\n"); affinity_form = FORM1_AFFINITY; @@ -388,9 +496,12 @@ static int __init find_primary_domain_index(void) index = of_read_number(&distance_ref_points[1], 1); } else { + /* + * Both FORM1 and FORM2 affinity find the primary domain details + * at the same offset. + */ index = of_read_number(distance_ref_points, 1); } - /* * Warn and cap if the hardware supports more than * MAX_DISTANCE_REF_POINTS domains. @@ -819,6 +930,12 @@ static int __init parse_numa_properties(void) dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); + /* + * If it is FORM2 initialize the distance table here. + */ + if (affinity_form == FORM2_AFFINITY) + initialize_form2_numa_distance_lookup_table(); + /* * Even though we connect cpus to numa domains later in SMP * init, we need to know the node ids now. This is because diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 5d4c2bc20bba..f162156b7b68 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -123,6 +123,7 @@ vec5_fw_features_table[] = { {FW_FEATURE_PRRN, OV5_PRRN}, {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, {FW_FEATURE_DRC_INFO, OV5_DRC_INFO}, + {FW_FEATURE_FORM2_AFFINITY, OV5_FORM2_AFFINITY}, }; static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) -- cgit From fb31f0a499332a053477ed57312b214e42476e6d Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Wed, 28 Jul 2021 15:15:57 +0800 Subject: riscv: fix the global name pfn_base confliction error RISCV uses a global variable pfn_base for page/pfn translation. But this is a common name and will be used elsewhere. In those cases, the page-pfn macros which refer to this name will be referred to the local/input variable instead. (such as in vfio_pin_pages_remote). This make everything wrong. This patch changes the name from pfn_base to riscv_pfn_base to fix this problem. Signed-off-by: Kenneth Lee Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 4 ++-- arch/riscv/mm/init.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 4da92cf28a19..d34327be7574 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -79,8 +79,8 @@ typedef struct page *pgtable_t; #endif #ifdef CONFIG_MMU -extern unsigned long pfn_base; -#define ARCH_PFN_OFFSET (pfn_base) +extern unsigned long riscv_pfn_base; +#define ARCH_PFN_OFFSET (riscv_pfn_base) #else #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #endif /* CONFIG_MMU */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fdf093d01c6f..888dff9530b8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -214,8 +214,8 @@ static struct pt_alloc_ops _pt_ops __initdata; #define pt_ops _pt_ops #endif -unsigned long pfn_base __ro_after_init; -EXPORT_SYMBOL(pfn_base); +unsigned long riscv_pfn_base __ro_after_init; +EXPORT_SYMBOL(riscv_pfn_base); pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; @@ -587,7 +587,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; - pfn_base = PFN_DOWN(kernel_map.phys_addr); + riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); -- cgit From 8ba1a8b77ba1eb3aef441ed2caf28ab2b1261f5f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 30 Jul 2021 20:48:41 +0800 Subject: riscv: Support allocating gigantic hugepages using CMA This patch adds support to allocate gigantic hugepages using CMA by specifying the hugetlb_cma= kernel parameter. This is only supported on RV64. Reviewed-by: Alexandre Ghiti Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 888dff9530b8..248f9ec393c1 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,8 @@ static void __init setup_bootmem(void) early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); + if (IS_ENABLED(CONFIG_64BIT)) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); memblock_allow_resize(); } -- cgit From db87a7199229b75c9996bf78117eceb81854fce2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 13 Apr 2021 16:38:09 +0000 Subject: powerpc/bug: Remove specific powerpc BUG_ON() and WARN_ON() on PPC32 powerpc BUG_ON() and WARN_ON() are based on using twnei instruction. For catching simple conditions like a variable having value 0, this is efficient because it does the test and the trap at the same time. But most conditions used with BUG_ON or WARN_ON are more complex and forces GCC to format the condition into a 0 or 1 value in a register. This will usually require 2 to 3 instructions. The most efficient solution would be to use __builtin_trap() because GCC is able to optimise the use of the different trap instructions based on the requested condition, but this is complex if not impossible for the following reasons: - __builtin_trap() is a non-recoverable instruction, so it can't be used for WARN_ON - Knowing which line of code generated the trap would require the analysis of DWARF information. This is not a feature we have today. As mentioned in commit 8d4fbcfbe0a4 ("Fix WARN_ON() on bitfield ops") the way WARN_ON() is implemented is suboptimal. That commit also mentions an issue with 'long long' condition. It fixed it for WARN_ON() but the same problem still exists today with BUG_ON() on PPC32. It will be fixed by using the generic implementation. By using the generic implementation, gcc will naturally generate a branch to the unconditional trap generated by BUG(). As modern powerpc implement zero-cycle branch, that's even more efficient. And for the functions using WARN_ON() and its return, the test on return from WARN_ON() is now also used for the WARN_ON() itself. On PPC64 we don't want it because we want to be able to use CFAR register to track how we entered the code that trapped. The CFAR register would be clobbered by the branch. A simple test function: unsigned long test9w(unsigned long a, unsigned long b) { if (WARN_ON(!b)) return 0; return a / b; } Before the patch: 0000046c : 46c: 7c 89 00 34 cntlzw r9,r4 470: 55 29 d9 7e rlwinm r9,r9,27,5,31 474: 0f 09 00 00 twnei r9,0 478: 2c 04 00 00 cmpwi r4,0 47c: 41 82 00 0c beq 488 480: 7c 63 23 96 divwu r3,r3,r4 484: 4e 80 00 20 blr 488: 38 60 00 00 li r3,0 48c: 4e 80 00 20 blr After the patch: 00000468 : 468: 2c 04 00 00 cmpwi r4,0 46c: 41 82 00 0c beq 478 470: 7c 63 23 96 divwu r3,r3,r4 474: 4e 80 00 20 blr 478: 0f e0 00 00 twui r0,0 47c: 38 60 00 00 li r3,0 480: 4e 80 00 20 blr So we see before the patch we need 3 instructions on the likely path to handle the WARN_ON(). With the patch the trap goes on the unlikely path. See below the difference at the entry of system_call_exception where we have several BUG_ON(), allthough less impressing. With the patch: 00000000 : 0: 81 6a 00 84 lwz r11,132(r10) 4: 90 6a 00 88 stw r3,136(r10) 8: 71 60 00 02 andi. r0,r11,2 c: 41 82 00 70 beq 7c 10: 71 60 40 00 andi. r0,r11,16384 14: 41 82 00 6c beq 80 18: 71 6b 80 00 andi. r11,r11,32768 1c: 41 82 00 68 beq 84 20: 94 21 ff e0 stwu r1,-32(r1) 24: 93 e1 00 1c stw r31,28(r1) 28: 7d 8c 42 e6 mftb r12 ... 7c: 0f e0 00 00 twui r0,0 80: 0f e0 00 00 twui r0,0 84: 0f e0 00 00 twui r0,0 Without the patch: 00000000 : 0: 94 21 ff e0 stwu r1,-32(r1) 4: 93 e1 00 1c stw r31,28(r1) 8: 90 6a 00 88 stw r3,136(r10) c: 81 6a 00 84 lwz r11,132(r10) 10: 69 60 00 02 xori r0,r11,2 14: 54 00 ff fe rlwinm r0,r0,31,31,31 18: 0f 00 00 00 twnei r0,0 1c: 69 60 40 00 xori r0,r11,16384 20: 54 00 97 fe rlwinm r0,r0,18,31,31 24: 0f 00 00 00 twnei r0,0 28: 69 6b 80 00 xori r11,r11,32768 2c: 55 6b 8f fe rlwinm r11,r11,17,31,31 30: 0f 0b 00 00 twnei r11,0 34: 7d 8c 42 e6 mftb r12 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b286e07fb771a664b631cd07a40b09c06f26e64b.1618331881.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/bug.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 0b2162890d8b..d844de5adfcb 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -68,7 +68,11 @@ BUG_ENTRY("twi 31, 0, 0", 0); \ unreachable(); \ } while (0) +#define HAVE_ARCH_BUG + +#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags)) +#ifdef CONFIG_PPC64 #define BUG_ON(x) do { \ if (__builtin_constant_p(x)) { \ if (x) \ @@ -78,8 +82,6 @@ } \ } while (0) -#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags)) - #define WARN_ON(x) ({ \ int __ret_warn_on = !!(x); \ if (__builtin_constant_p(__ret_warn_on)) { \ @@ -93,9 +95,10 @@ unlikely(__ret_warn_on); \ }) -#define HAVE_ARCH_BUG #define HAVE_ARCH_BUG_ON #define HAVE_ARCH_WARN_ON +#endif + #endif /* __ASSEMBLY __ */ #else #ifdef __ASSEMBLY__ -- cgit From 1e688dd2a3d6759d416616ff07afc4bb836c4213 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 13 Apr 2021 16:38:10 +0000 Subject: powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto Using asm goto in __WARN_FLAGS() and WARN_ON() allows more flexibility to GCC. For that add an entry to the exception table so that program_check_exception() knowns where to resume execution after a WARNING. Here are two exemples. The first one is done on PPC32 (which benefits from the previous patch), the second is on PPC64. unsigned long test(struct pt_regs *regs) { int ret; WARN_ON(regs->msr & MSR_PR); return regs->gpr[3]; } unsigned long test9w(unsigned long a, unsigned long b) { if (WARN_ON(!b)) return 0; return a / b; } Before the patch: 000003a8 : 3a8: 81 23 00 84 lwz r9,132(r3) 3ac: 71 29 40 00 andi. r9,r9,16384 3b0: 40 82 00 0c bne 3bc 3b4: 80 63 00 0c lwz r3,12(r3) 3b8: 4e 80 00 20 blr 3bc: 0f e0 00 00 twui r0,0 3c0: 80 63 00 0c lwz r3,12(r3) 3c4: 4e 80 00 20 blr 0000000000000bf0 <.test9w>: bf0: 7c 89 00 74 cntlzd r9,r4 bf4: 79 29 d1 82 rldicl r9,r9,58,6 bf8: 0b 09 00 00 tdnei r9,0 bfc: 2c 24 00 00 cmpdi r4,0 c00: 41 82 00 0c beq c0c <.test9w+0x1c> c04: 7c 63 23 92 divdu r3,r3,r4 c08: 4e 80 00 20 blr c0c: 38 60 00 00 li r3,0 c10: 4e 80 00 20 blr After the patch: 000003a8 : 3a8: 81 23 00 84 lwz r9,132(r3) 3ac: 71 29 40 00 andi. r9,r9,16384 3b0: 40 82 00 0c bne 3bc 3b4: 80 63 00 0c lwz r3,12(r3) 3b8: 4e 80 00 20 blr 3bc: 0f e0 00 00 twui r0,0 0000000000000c50 <.test9w>: c50: 7c 89 00 74 cntlzd r9,r4 c54: 79 29 d1 82 rldicl r9,r9,58,6 c58: 0b 09 00 00 tdnei r9,0 c5c: 7c 63 23 92 divdu r3,r3,r4 c60: 4e 80 00 20 blr c70: 38 60 00 00 li r3,0 c74: 4e 80 00 20 blr In the first exemple, we see GCC doesn't need to duplicate what happens after the trap. In the second exemple, we see that GCC doesn't need to emit a test and a branch in the likely path in addition to the trap. We've got some WARN_ON() in .softirqentry.text section so it needs to be added in the OTHER_TEXT_SECTIONS in modpost.c Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/389962b1b702e3c78d169e59bcfac56282889173.1618331882.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/kup.h | 2 +- arch/powerpc/include/asm/bug.h | 54 ++++++++++++++++++++++++++------ arch/powerpc/include/asm/extable.h | 14 +++++++++ arch/powerpc/include/asm/ppc_asm.h | 11 +------ arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/traps.c | 9 ++++-- 7 files changed, 70 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index a1cc73a88710..170339969b7c 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -90,7 +90,7 @@ /* Prevent access to userspace using any key values */ LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) 999: tdne \gpr1, \gpr2 - EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) #endif .endm diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index d844de5adfcb..1ee0f22313ee 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include +#include #ifdef CONFIG_BUG @@ -30,6 +31,11 @@ .endm #endif /* verbose */ +.macro EMIT_WARN_ENTRY addr,file,line,flags + EX_TABLE(\addr,\addr+4) + EMIT_BUG_ENTRY \addr,\file,\line,\flags +.endm + #else /* !__ASSEMBLY__ */ /* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and sizeof(struct bug_entry), respectively */ @@ -58,6 +64,16 @@ "i" (sizeof(struct bug_entry)), \ ##__VA_ARGS__) +#define WARN_ENTRY(insn, flags, label, ...) \ + asm_volatile_goto( \ + "1: " insn "\n" \ + EX_TABLE(1b, %l[label]) \ + _EMIT_BUG_ENTRY \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (flags), \ + "i" (sizeof(struct bug_entry)), \ + ##__VA_ARGS__ : : label) + /* * BUG_ON() and WARN_ON() do their best to cooperate with compile-time * optimisations. However depending on the complexity of the condition @@ -70,7 +86,15 @@ } while (0) #define HAVE_ARCH_BUG -#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags)) +#define __WARN_FLAGS(flags) do { \ + __label__ __label_warn_on; \ + \ + WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \ + unreachable(); \ + \ +__label_warn_on: \ + break; \ +} while (0) #ifdef CONFIG_PPC64 #define BUG_ON(x) do { \ @@ -83,15 +107,24 @@ } while (0) #define WARN_ON(x) ({ \ - int __ret_warn_on = !!(x); \ - if (__builtin_constant_p(__ret_warn_on)) { \ - if (__ret_warn_on) \ + bool __ret_warn_on = false; \ + do { \ + if (__builtin_constant_p((x))) { \ + if (!(x)) \ + break; \ __WARN(); \ - } else { \ - BUG_ENTRY(PPC_TLNEI " %4, 0", \ - BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \ - "r" (__ret_warn_on)); \ - } \ + __ret_warn_on = true; \ + } else { \ + __label__ __label_warn_on; \ + \ + WARN_ENTRY(PPC_TLNEI " %4, 0", \ + BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \ + __label_warn_on, "r" (x)); \ + break; \ +__label_warn_on: \ + __ret_warn_on = true; \ + } \ + } while (0); \ unlikely(__ret_warn_on); \ }) @@ -104,8 +137,11 @@ #ifdef __ASSEMBLY__ .macro EMIT_BUG_ENTRY addr,file,line,flags .endm +.macro EMIT_WARN_ENTRY addr,file,line,flags +.endm #else /* !__ASSEMBLY__ */ #define _EMIT_BUG_ENTRY +#define _EMIT_WARN_ENTRY #endif #endif /* CONFIG_BUG */ diff --git a/arch/powerpc/include/asm/extable.h b/arch/powerpc/include/asm/extable.h index eb91b2d2935a..26ce2e5c0fa8 100644 --- a/arch/powerpc/include/asm/extable.h +++ b/arch/powerpc/include/asm/extable.h @@ -17,6 +17,8 @@ #define ARCH_HAS_RELATIVE_EXTABLE +#ifndef __ASSEMBLY__ + struct exception_table_entry { int insn; int fixup; @@ -28,3 +30,15 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) } #endif + +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + stringify_in_c(.section __ex_table,"a";)\ + stringify_in_c(.balign 4;) \ + stringify_in_c(.long (_fault) - . ;) \ + stringify_in_c(.long (_target) - . ;) \ + stringify_in_c(.previous) + +#endif diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 116c1519728a..ffe712307e11 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef __ASSEMBLY__ @@ -752,16 +753,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) #endif /* __ASSEMBLY__ */ -/* - * Helper macro for exception table entries - */ -#define EX_TABLE(_fault, _target) \ - stringify_in_c(.section __ex_table,"a";)\ - stringify_in_c(.balign 4;) \ - stringify_in_c(.long (_fault) - . ;) \ - stringify_in_c(.long (_target) - . ;) \ - stringify_in_c(.previous) - #define SOFT_MASK_TABLE(_start, _end) \ stringify_in_c(.section __soft_mask_table,"a";)\ stringify_in_c(.balign 8;) \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 15720f8661a1..70cff7b49e17 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -309,7 +309,7 @@ _GLOBAL(enter_rtas) */ lbz r0,PACAIRQSOFTMASK(r13) 1: tdeqi r0,IRQS_ENABLED - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING + EMIT_WARN_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif /* Hard-disable interrupts */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 39ab15419592..d8645efff902 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -237,7 +237,7 @@ _GLOBAL(copy_page) addi r3,r3,-4 0: twnei r5, 0 /* WARN if r3 is not cache aligned */ - EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING addi r4,r4,-4 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index c8f648727d36..51d4f5faf425 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1477,8 +1477,13 @@ static void do_program_check(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { - regs_add_return_ip(regs, 4); - return; + const struct exception_table_entry *entry; + + entry = search_exception_tables(bugaddr); + if (entry) { + regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr); + return; + } } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); return; -- cgit From 0355785313e2191be4e1108cdbda94ddb0238c48 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 13 Aug 2021 13:05:11 -0700 Subject: powerpc: Add "-z notext" flag to disable diagnostic Object files used to link .tmp_vmlinux.kallsyms1 have many R_PPC64_ADDR64 relocations in non-SHF_WRITE sections. There are many text relocations (e.g. in .rela___ksymtab_gpl+* and .rela__mcount_loc sections) in a -pie link and are disallowed by LLD: ld.lld: error: can't create dynamic relocation R_PPC64_ADDR64 against local symbol in readonly segment; recompile object files with -fPIC or pass '-Wl,-z,notext' to allow text relocations in the output >>> defined in arch/powerpc/kernel/head_64.o >>> referenced by arch/powerpc/kernel/head_64.o:(__restart_table+0x10) Newer GNU ld configured with "--enable-textrel-check=error" will report an error as well: $ ld-new -EL -m elf64lppc -pie ... -o .tmp_vmlinux.kallsyms1 ... ld-new: read-only segment has dynamic relocations Add "-z notext" to suppress the errors. Non-CONFIG_RELOCATABLE builds use the default -no-pie mode and thus R_PPC64_ADDR64 relocations can be resolved at link-time. Reported-by: Itaru Kitayama Co-developed-by: Bill Wendling Signed-off-by: Fangrui Song Signed-off-by: Bill Wendling Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210813200511.1905703-1-morbo@google.com --- arch/powerpc/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 9aaf1abbc641..aa6808e70647 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -122,6 +122,7 @@ endif LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie +LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) ifdef CONFIG_PPC64 -- cgit From 8b893ef190b0c440877de04f767efca4bf4d6af8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Aug 2021 12:30:11 +1000 Subject: powerpc/pseries: Fix build error when NUMA=n As reported by lkp, if NUMA=n we see a build error: arch/powerpc/platforms/pseries/hotplug-cpu.c: In function 'pseries_cpu_hotplug_init': arch/powerpc/platforms/pseries/hotplug-cpu.c:1022:8: error: 'node_to_cpumask_map' undeclared 1022 | node_to_cpumask_map[node]); Use cpumask_of_node() which has an empty stub for NUMA=n, and when NUMA=y does a lookup from node_to_cpumask_map[]. Fixes: bd1dd4c5f528 ("powerpc/pseries: Prevent free CPU ids being reused on another node") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210816041032.2839343-1-mpe@ellerman.id.au --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 1ef40ef699a6..d646c22e94ab 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -1021,7 +1021,7 @@ static int __init pseries_cpu_hotplug_init(void) /* Record ids of CPU added at boot time */ cpumask_or(node_recorded_ids_map[node], node_recorded_ids_map[node], - node_to_cpumask_map[node]); + cpumask_of_node(node)); } of_reconfig_notifier_register(&pseries_smp_nb); -- cgit From 47c258d71ebfc832a760a1dc6540cf3c33968023 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 15 Aug 2021 15:23:34 -0700 Subject: powerpc/head_check: use stdout for error messages Prefer stderr instead of stdout for error messages. This is a good practice and can help CI error detecting and reporting (0day in this case). Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210815222334.9575-1-rdunlap@infradead.org --- arch/powerpc/tools/head_check.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh index e32d3162e5ed..e477837fdc58 100644 --- a/arch/powerpc/tools/head_check.sh +++ b/arch/powerpc/tools/head_check.sh @@ -56,9 +56,9 @@ expected_start_head_addr=$vma start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1) if [ "$start_head_addr" != "$expected_start_head_addr" ]; then - echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" - echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" - echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" + echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 1>&2 + echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2 + echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2 exit 1 fi @@ -70,9 +70,9 @@ expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1) if [ "$start_text_addr" != "$expected_start_text_addr" ]; then - echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" - echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" - echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" + echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 1>&2 + echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 1>&2 + echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 1>&2 exit 1 fi -- cgit From 4aae683f132777a92b80d6971d56174d5f4cb3f3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 31 Jul 2021 14:22:32 +0900 Subject: tracing: Refactor TRACE_IRQFLAGS_SUPPORT in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make architectures select TRACE_IRQFLAGS_SUPPORT instead of having many defines. Link: https://lkml.kernel.org/r/20210731052233.4703-2-masahiroy@kernel.org Acked-by: Heiko Carstens Acked-by: Vineet Gupta    #arch/arc Acked-by: Michael Ellerman (powerpc) Acked-by: Catalin Marinas Acked-by: Max Filippov Signed-off-by: Masahiro Yamada Signed-off-by: Steven Rostedt (VMware) --- arch/Kconfig | 3 +++ arch/arc/Kconfig | 4 +--- arch/arm/Kconfig | 5 +---- arch/arm64/Kconfig | 4 +--- arch/csky/Kconfig | 4 +--- arch/hexagon/Kconfig | 4 +--- arch/microblaze/Kconfig | 1 + arch/microblaze/Kconfig.debug | 5 ----- arch/mips/Kconfig | 1 + arch/mips/Kconfig.debug | 4 ---- arch/nds32/Kconfig | 4 +--- arch/nios2/Kconfig | 3 --- arch/openrisc/Kconfig | 4 +--- arch/parisc/Kconfig | 1 + arch/parisc/Kconfig.debug | 3 --- arch/powerpc/Kconfig | 5 +---- arch/riscv/Kconfig | 4 +--- arch/s390/Kconfig | 1 + arch/s390/Kconfig.debug | 3 --- arch/sh/Kconfig | 1 + arch/sh/Kconfig.debug | 3 --- arch/sparc/Kconfig | 1 + arch/sparc/Kconfig.debug | 4 ---- arch/um/Kconfig | 5 +---- arch/x86/Kconfig | 1 + arch/x86/Kconfig.debug | 3 --- arch/xtensa/Kconfig | 4 +--- 27 files changed, 21 insertions(+), 64 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 129df498a8e1..9471a0feecaf 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -197,6 +197,9 @@ config HAVE_FUNCTION_ERROR_INJECTION config HAVE_NMI bool +config TRACE_IRQFLAGS_SUPPORT + bool + # # An arch should select this if it provides all these things: # diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d8f51eb8963b..0c81df3a5c7a 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -49,9 +49,7 @@ config ARC select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select SET_FS - -config TRACE_IRQFLAGS_SUPPORT - def_bool y + select TRACE_IRQFLAGS_SUPPORT config LOCKDEP_SUPPORT def_bool y diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 82f908fa5676..3564647283e1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -128,6 +128,7 @@ config ARM select RTC_LIB select SET_FS select SYS_SUPPORTS_APM_EMULATION + select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M # Above selects are sorted alphabetically; please add new ones # according to that. Thanks. help @@ -191,10 +192,6 @@ config LOCKDEP_SUPPORT bool default y -config TRACE_IRQFLAGS_SUPPORT - bool - default !CPU_V7M - config ARCH_HAS_ILOG2_U32 bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..67b04ae5d010 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -221,6 +221,7 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD + select TRACE_IRQFLAGS_SUPPORT help ARM 64-bit (AArch64) Linux support. @@ -288,9 +289,6 @@ config ILLEGAL_POINTER_VALUE config LOCKDEP_SUPPORT def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 2716f6395ba7..9d4d898df76b 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -82,6 +82,7 @@ config CSKY select PCI_SYSCALL if PCI select PCI_MSI if PCI select SET_FS + select TRACE_IRQFLAGS_SUPPORT config LOCKDEP_SUPPORT def_bool y @@ -139,9 +140,6 @@ config STACKTRACE_SUPPORT config TIME_LOW_RES def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config CPU_TLB_SIZE int default "128" if (CPU_CK610 || CPU_CK807 || CPU_CK810) diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index e5a852080730..f993c4deaf23 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -31,6 +31,7 @@ config HEXAGON select GENERIC_CPU_DEVICES select SET_FS select ARCH_WANT_LD_ORPHAN_WARN + select TRACE_IRQFLAGS_SUPPORT help Qualcomm Hexagon is a processor architecture designed for high performance and low power across a wide variety of applications. @@ -52,9 +53,6 @@ config EARLY_PRINTK config MMU def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config GENERIC_CSUM def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 14a67a42fcae..59798e43cdb0 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -44,6 +44,7 @@ config MICROBLAZE select SPARSE_IRQ select SET_FS select ZONE_DMA + select TRACE_IRQFLAGS_SUPPORT # Endianness selection choice diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug index 865527ac332a..a4e40e534e6a 100644 --- a/arch/microblaze/Kconfig.debug +++ b/arch/microblaze/Kconfig.debug @@ -1,6 +1 @@ # SPDX-License-Identifier: GPL-2.0-only -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.rst. - -config TRACE_IRQFLAGS_SUPPORT - def_bool y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee6087cd686..3017c9e8bb07 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -97,6 +97,7 @@ config MIPS select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select RTC_LIB select SYSCTL_EXCEPTION_TRACE + select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS select ARCH_HAS_ELFCORE_COMPAT diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 43dbf5930796..f4ae7900fcd3 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_SUPPORT - bool - default y - config EARLY_PRINTK bool "Early printk" if EXPERT depends on SYS_HAS_EARLY_PRINTK diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 62313902d75d..dd50123335d8 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -48,6 +48,7 @@ config NDS32 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select SET_FS + select TRACE_IRQFLAGS_SUPPORT help Andes(nds32) Linux support. @@ -64,9 +65,6 @@ config GENERIC_LOCKBREAK def_bool y depends on PREEMPTION -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config STACKTRACE_SUPPORT def_bool y diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index c24955c81c92..eabdc41f67f6 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -43,9 +43,6 @@ config NO_IOPORT_MAP config FPU def_bool n -config TRACE_IRQFLAGS_SUPPORT - def_bool n - menu "Kernel features" source "kernel/Kconfig.hz" diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 591acc5990dc..ebc5e7eafc5c 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -39,6 +39,7 @@ config OPENRISC select GENERIC_IRQ_MULTI_HANDLER select MMU_GATHER_NO_RANGE if MMU select SET_FS + select TRACE_IRQFLAGS_SUPPORT config CPU_BIG_ENDIAN def_bool y @@ -52,9 +53,6 @@ config GENERIC_HWEIGHT config NO_IOPORT_MAP def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - # For now, use generic checksum functions #These can be reimplemented in assembly later if so inclined config GENERIC_CSUM diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index bde9907bc5b2..57a0d0896ef6 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -67,6 +67,7 @@ config PARISC select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS select SET_FS + select TRACE_IRQFLAGS_SUPPORT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 1478ded0e247..f66554cd5c45 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -1,4 +1 @@ # SPDX-License-Identifier: GPL-2.0 - -config TRACE_IRQFLAGS_SUPPORT - def_bool y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d..76a28452c042 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -94,10 +94,6 @@ config STACKTRACE_SUPPORT bool default y -config TRACE_IRQFLAGS_SUPPORT - bool - default y - config LOCKDEP_SUPPORT bool default y @@ -271,6 +267,7 @@ config PPC select STRICT_KERNEL_RWX if STRICT_MODULE_RWX select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS if !PPC64 # # Please keep this list sorted alphabetically. diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8fcceb8eda07..15f012b9c6d7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -109,6 +109,7 @@ config RISCV select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT @@ -178,9 +179,6 @@ config ARCH_SUPPORTS_UPROBES config STACKTRACE_SUPPORT def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config GENERIC_BUG def_bool y depends on BUG diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a0e2130f0100..8bce206e93a3 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -207,6 +207,7 @@ config S390 select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select TRACE_IRQFLAGS_SUPPORT select TTY select VIRT_CPU_ACCOUNTING select ZONE_DMA diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 9ea6e61d5858..e94a2a7f6bf4 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config EARLY_PRINTK def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 45a0549421cd..8efeffcdf4cf 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -72,6 +72,7 @@ config SUPERH select RTC_LIB select SET_FS select SPARSE_IRQ + select TRACE_IRQFLAGS_SUPPORT help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 28a43d63bde1..958f790273ab 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config SH_STANDARD_BIOS bool "Use LinuxSH standard BIOS" help diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c5fa7932b550..fb8bdd40c72a 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -50,6 +50,7 @@ config SPARC select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH select SET_FS + select TRACE_IRQFLAGS_SUPPORT config SPARC32 def_bool !64BIT diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 50a918d496c8..6b2bec1888b3 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_SUPPORT - bool - default y - config DEBUG_DCFLUSH bool "D-cache flush debugging" depends on SPARC64 && DEBUG_KERNEL diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 0561b73cfd9a..0a699440c4c5 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -20,6 +20,7 @@ config UML select GENERIC_CPU_DEVICES select HAVE_GCC_PLUGINS select SET_FS + select TRACE_IRQFLAGS_SUPPORT select TTY # Needed for line.c config MMU @@ -50,10 +51,6 @@ config ISA config SBUS bool -config TRACE_IRQFLAGS_SUPPORT - bool - default y - config LOCKDEP_SUPPORT bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 49270655e827..171faee05035 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -261,6 +261,7 @@ config X86 select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE) select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK + select TRACE_IRQFLAGS_SUPPORT select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select HAVE_ARCH_KCSAN if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 80b57e7f4947..d3a6f74a94bd 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config TRACE_IRQFLAGS_NMI_SUPPORT def_bool y diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2332b2156993..90aabb27f7d7 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -41,6 +41,7 @@ config XTENSA select MODULES_USE_ELF_RELA select PERF_USE_VMALLOC select SET_FS + select TRACE_IRQFLAGS_SUPPORT select VIRT_TO_BUS help Xtensa processors are 32-bit RISC machines designed by Tensilica @@ -72,9 +73,6 @@ config LOCKDEP_SUPPORT config STACKTRACE_SUPPORT def_bool y -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config MMU def_bool n -- cgit From e95ad5f21693a37c0d318b5988c5a0de324eb3e3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Aug 2021 16:36:02 +1000 Subject: powerpc/head_check: Fix shellcheck errors Replace "cat file | grep pattern" with "grep pattern file", and quote a few variables. Together that fixes all shellcheck errors. Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817125154.3369884-1-mpe@ellerman.id.au --- arch/powerpc/tools/head_check.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh index e477837fdc58..689907cda996 100644 --- a/arch/powerpc/tools/head_check.sh +++ b/arch/powerpc/tools/head_check.sh @@ -49,11 +49,11 @@ vmlinux="$2" $nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" > .tmp_symbols.txt -vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1) +vma=$(grep -e " [TA] _stext$" .tmp_symbols.txt | cut -d' ' -f1) -expected_start_head_addr=$vma +expected_start_head_addr="$vma" -start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1) +start_head_addr=$(grep " t start_first_256B$" .tmp_symbols.txt | cut -d' ' -f1) if [ "$start_head_addr" != "$expected_start_head_addr" ]; then echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 1>&2 @@ -63,11 +63,11 @@ if [ "$start_head_addr" != "$expected_start_head_addr" ]; then exit 1 fi -top_vma=$(echo $vma | cut -d'0' -f1) +top_vma=$(echo "$vma" | cut -d'0' -f1) -expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d' ' -f1 | sed "s/^0/$top_vma/") +expected_start_text_addr=$(grep " a text_start$" .tmp_symbols.txt | cut -d' ' -f1 | sed "s/^0/$top_vma/") -start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1) +start_text_addr=$(grep " t start_text$" .tmp_symbols.txt | cut -d' ' -f1) if [ "$start_text_addr" != "$expected_start_text_addr" ]; then echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 1>&2 -- cgit From c5ac55b6cbc604ad4144c380feae20f69453df91 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 Aug 2021 14:24:05 +0930 Subject: powerpc/config: Fix IPV6 warning in mpc855_ads When building this config there's a warning: 79:warning: override: reassigning to symbol IPV6 Commit 9a1762a4a4ff ("powerpc/8xx: Update mpc885_ads_defconfig to improve CI") added CONFIG_IPV6=y, but left '# CONFIG_IPV6 is not set' in. IPV6 is default y, so remove both to clean up the build. Fixes: 9a1762a4a4ff ("powerpc/8xx: Update mpc885_ads_defconfig to improve CI") Signed-off-by: Joel Stanley Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817045407.2445664-2-joel@jms.id.au --- arch/powerpc/configs/mpc885_ads_defconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index d21f266cea9a..5cd17adf903f 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -21,7 +21,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_SYN_COOKIES=y -# CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y @@ -76,7 +75,6 @@ CONFIG_PERF_EVENTS=y CONFIG_MATH_EMULATION=y CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y CONFIG_STRICT_KERNEL_RWX=y -CONFIG_IPV6=y CONFIG_BPF_JIT=y CONFIG_DEBUG_VM_PGTABLE=y CONFIG_BDI_SWITCH=y -- cgit From d0e28a6145c3455b69991245e7f6147eb914b34a Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 Aug 2021 14:24:06 +0930 Subject: powerpc/config: Renable MTD_PHYSMAP_OF CONFIG_MTD_PHYSMAP_OF is not longer enabled as it depends on MTD_PHYSMAP which is not enabled. This is a regression from commit 642b1e8dbed7 ("mtd: maps: Merge physmap_of.c into physmap-core.c"), which added the extra dependency. Add CONFIG_MTD_PHYSMAP=y so this stays in the config, as Christophe said it is useful for build coverage. Fixes: 642b1e8dbed7 ("mtd: maps: Merge physmap_of.c into physmap-core.c") Signed-off-by: Joel Stanley Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817045407.2445664-3-joel@jms.id.au --- arch/powerpc/configs/mpc885_ads_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 5cd17adf903f..cd08f9ed2c8d 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -33,6 +33,7 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I2 is not set CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y -- cgit From 87e0d46bf68913f4c87dba94aadc00da658a874b Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 Aug 2021 14:24:07 +0930 Subject: powerpc/configs: Regenerate mpc885_ads_defconfig Regenerate atop v5.14-rc6 by doing a make savedefconfig. The changes a re-ordering except for the following (which are still set indirectly): - CONFIG_DEBUG_KERNEL=y selected by EXPERT - CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008 which is the default setting Signed-off-by: Joel Stanley Acked-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817045407.2445664-4-joel@jms.id.au --- arch/powerpc/configs/mpc885_ads_defconfig | 46 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index cd08f9ed2c8d..c74dc76b1d0d 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -1,19 +1,30 @@ -CONFIG_PPC_8xx=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_JIT=y +CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set +CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y +CONFIG_PPC_8xx=y +CONFIG_8xx_GPIO=y +CONFIG_SMC_UCODE_PATCH=y +CONFIG_PIN_TLB=y CONFIG_GEN_RTC=y CONFIG_HZ_100=y +CONFIG_MATH_EMULATION=y +CONFIG_PPC_16K_PAGES=y +CONFIG_ADVANCED_OPTIONS=y # CONFIG_SECCOMP is not set +CONFIG_STRICT_KERNEL_RWX=y +CONFIG_MODULES=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -46,38 +57,25 @@ CONFIG_DAVICOM_PHY=y # CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y +CONFIG_SPI=y +CONFIG_SPI_FSL_SPI=y # CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_8xxx_WDT=y # CONFIG_USB_SUPPORT is not set # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_DEV_TALITOS=y CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_PPC_16K_PAGES=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_FS=y -CONFIG_PPC_PTDUMP=y -CONFIG_MODULES=y -CONFIG_SPI=y -CONFIG_SPI_FSL_SPI=y -CONFIG_CRYPTO=y -CONFIG_CRYPTO_DEV_TALITOS=y -CONFIG_8xx_GPIO=y -CONFIG_WATCHDOG=y -CONFIG_8xxx_WDT=y -CONFIG_SMC_UCODE_PATCH=y -CONFIG_ADVANCED_OPTIONS=y -CONFIG_PIN_TLB=y -CONFIG_PERF_EVENTS=y -CONFIG_MATH_EMULATION=y -CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y -CONFIG_STRICT_KERNEL_RWX=y -CONFIG_BPF_JIT=y CONFIG_DEBUG_VM_PGTABLE=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008 +CONFIG_PPC_PTDUMP=y -- cgit From e225c4d6bc389701f2f63fc246420a1da3465ab5 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 23 Mar 2021 14:29:05 +0800 Subject: powerpc: Remove duplicate includes interrupt.c: asm/interrupt.h has been included at line 12, so remove the duplicate one at line 10. time.c: linux/sched/clock.h has been included at line 33,so remove the duplicate one at line 56 and move sched/cputime.h under sched including segament. Signed-off-by: Wan Jiabing Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210323062916.295346-1-wanjiabing@vivo.com --- arch/powerpc/kernel/interrupt.c | 1 - arch/powerpc/kernel/time.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 21bbd615ca41..abc2b3dc3f20 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e45ce427bffb..77bae85e2fae 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -52,8 +53,6 @@ #include #include #include -#include -#include #include #include -- cgit From 504c6295b998effa682089747a96d7bb5933d4db Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Aug 2021 10:39:50 +0530 Subject: arm64/mm: Add remaining ID_AA64MMFR0_PARANGE_ macros Currently there are macros only for 48 and 52 bits parange value extracted from the ID_AA64MMFR0.PARANGE field. This change completes the enumeration and updates the helper id_aa64mmfr0_parange_to_phys_shift(). While here it also defines ARM64_MIN_PARANGE_BITS as the absolute minimum shift value PA range which could be supported on a given platform. Cc: Marc Zyngier Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628744994-16623-2-git-send-email-anshuman.khandual@arm.com --- arch/arm64/include/asm/cpufeature.h | 14 +++++++------- arch/arm64/include/asm/sysreg.h | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9bb9d11750d7..8633bdb21f33 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -781,13 +781,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) { switch (parange) { - case 0: return 32; - case 1: return 36; - case 2: return 40; - case 3: return 42; - case 4: return 44; - case 5: return 48; - case 6: return 52; + case ID_AA64MMFR0_PARANGE_32: return 32; + case ID_AA64MMFR0_PARANGE_36: return 36; + case ID_AA64MMFR0_PARANGE_40: return 40; + case ID_AA64MMFR0_PARANGE_42: return 42; + case ID_AA64MMFR0_PARANGE_44: return 44; + case ID_AA64MMFR0_PARANGE_48: return 48; + case ID_AA64MMFR0_PARANGE_52: return 52; /* * A future PE could use a value unknown to the kernel. * However, by the "D10.1.4 Principles of the ID scheme diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 943d31d92b5b..1972e4b9be5c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -853,9 +853,16 @@ #define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 #define ID_AA64MMFR0_TGRAN16_NI 0x0 #define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_AA64MMFR0_PARANGE_32 0x0 +#define ID_AA64MMFR0_PARANGE_36 0x1 +#define ID_AA64MMFR0_PARANGE_40 0x2 +#define ID_AA64MMFR0_PARANGE_42 0x3 +#define ID_AA64MMFR0_PARANGE_44 0x4 #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 +#define ARM64_MIN_PARANGE_BITS 32 + #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE 0x1 #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN 0x2 -- cgit From 9788c14060f3c179c376b2a87af1a430d4d84973 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Aug 2021 10:39:51 +0530 Subject: KVM: arm64: Use ARM64_MIN_PARANGE_BITS as the minimum supported IPA Drop hard coded value for the minimum supported IPA range bits (i.e 32). Instead use ARM64_MIN_PARANGE_BITS which improves the code readability. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628744994-16623-3-git-send-email-anshuman.khandual@arm.com --- arch/arm64/kvm/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 18ffc6ad67b8..08fd487d5f95 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -364,7 +364,7 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); if (phys_shift) { if (phys_shift > kvm_ipa_limit || - phys_shift < 32) + phys_shift < ARM64_MIN_PARANGE_BITS) return -EINVAL; } else { phys_shift = KVM_PHYS_SHIFT; -- cgit From bf249d9e362f1011a839d57e771b4b1a7eed9656 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Aug 2021 10:39:52 +0530 Subject: KVM: arm64: Drop init_common_resources() Could do without this additional indirection via init_common_resources() by just calling kvm_set_ipa_limit() directly instead. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628744994-16623-4-git-send-email-anshuman.khandual@arm.com --- arch/arm64/kvm/arm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..19560e457c11 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1696,11 +1696,6 @@ static bool init_psci_relay(void) return true; } -static int init_common_resources(void) -{ - return kvm_set_ipa_limit(); -} - static int init_subsystems(void) { int err = 0; @@ -2102,7 +2097,7 @@ int kvm_arch_init(void *opaque) } } - err = init_common_resources(); + err = kvm_set_ipa_limit(); if (err) return err; -- cgit From 6b7982fefc1fdcaa31b712f5fbc2e993cc99ad23 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Aug 2021 10:39:53 +0530 Subject: KVM: arm64: Drop check_kvm_target_cpu() based percpu probe kvm_target_cpu() never returns a negative error code, so check_kvm_target() would never have 'ret' filled with a negative error code. Hence the percpu probe via check_kvm_target_cpu() does not make sense as its never going to find an unsupported CPU, forcing kvm_arch_init() to exit early. Hence lets just drop this percpu probe (and also check_kvm_target_cpu()) altogether. While here, this also changes kvm_target_cpu() return type to a u32, making it explicit that an error code will not be returned from this function. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Acked-by: Will Deacon Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628744994-16623-5-git-send-email-anshuman.khandual@arm.com --- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/arm.c | 16 +--------------- arch/arm64/kvm/guest.c | 4 ++-- 3 files changed, 4 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 41911585ae0c..26bbd84a02de 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -66,7 +66,7 @@ DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void); -int __attribute_const__ kvm_target_cpu(void); +u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 19560e457c11..ee9b1166f330 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1035,7 +1035,7 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, const struct kvm_vcpu_init *init) { unsigned int i, ret; - int phys_target = kvm_target_cpu(); + u32 phys_target = kvm_target_cpu(); if (init->target != phys_target) return -EINVAL; @@ -2010,11 +2010,6 @@ static int finalize_hyp_mode(void) return 0; } -static void check_kvm_target_cpu(void *ret) -{ - *(int *)ret = kvm_target_cpu(); -} - struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) { struct kvm_vcpu *vcpu; @@ -2074,7 +2069,6 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) int kvm_arch_init(void *opaque) { int err; - int ret, cpu; bool in_hyp_mode; if (!is_hyp_mode_available()) { @@ -2089,14 +2083,6 @@ int kvm_arch_init(void *opaque) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ "Only trusted guests should be used on this system.\n"); - for_each_online_cpu(cpu) { - smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); - if (ret < 0) { - kvm_err("Error, CPU %d not supported!\n", cpu); - return -ENODEV; - } - } - err = kvm_set_ipa_limit(); if (err) return err; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1dfb83578277..8ce850fa7b49 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -842,7 +842,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, return 0; } -int __attribute_const__ kvm_target_cpu(void) +u32 __attribute_const__ kvm_target_cpu(void) { unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); @@ -874,7 +874,7 @@ int __attribute_const__ kvm_target_cpu(void) int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { - int target = kvm_target_cpu(); + u32 target = kvm_target_cpu(); if (target < 0) return -ENODEV; -- cgit From 9329752bc8659e3934e2b13434b2fddb0df0bb13 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Aug 2021 10:39:54 +0530 Subject: KVM: arm64: Drop unused REQUIRES_VIRT This seems like a residue from the past. REQUIRES_VIRT is no more available . Hence it can just be dropped along with the related code section. Cc: Marc Zyngier Cc: James Morse Cc: Alexandru Elisei Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: kvmarm@lists.cs.columbia.edu Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1628744994-16623-6-git-send-email-anshuman.khandual@arm.com --- arch/arm64/kvm/arm.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ee9b1166f330..d779a29c5607 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -42,10 +42,6 @@ #include #include -#ifdef REQUIRES_VIRT -__asm__(".arch_extension virt"); -#endif - static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); -- cgit From ccac96977243d7916053550f62e6489760ad0adc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Aug 2021 14:03:36 +0100 Subject: KVM: arm64: Make hyp_panic() more robust when protected mode is enabled When protected mode is enabled, the host is unable to access most parts of the EL2 hypervisor image, including 'hyp_physvirt_offset' and the contents of the hypervisor's '.rodata.str' section. Unfortunately, nvhe_hyp_panic_handler() tries to read from both of these locations when handling a BUG() triggered at EL2; the former for converting the ELR to a physical address and the latter for displaying the name of the source file where the BUG() occurred. Hack the EL2 panic asm to pass both physical and virtual ELR values to the host and utilise the newly introduced CONFIG_NVHE_EL2_DEBUG so that we disable stage-2 protection for the host before returning to the EL1 panic handler. If the debug option is not enabled, display the address instead of the source file:line information. Cc: Andrew Scull Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210813130336.8139-1-will@kernel.org --- arch/arm64/kvm/handle_exit.c | 23 ++++++++++++++--------- arch/arm64/kvm/hyp/nvhe/host.S | 21 +++++++++++++++++---- 2 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 6f48336b1d86..04ebab299aa4 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -292,11 +292,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } -void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr, +void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, + u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar) { - u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr)); - u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr; + u64 elr_in_kimg = __phys_to_kimg(elr_phys); + u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt; u64 mode = spsr & PSR_MODE_MASK; /* @@ -309,20 +310,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr, kvm_err("Invalid host exception to nVHE hyp!\n"); } else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && (esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) { - struct bug_entry *bug = find_bug(elr_in_kimg); const char *file = NULL; unsigned int line = 0; /* All hyp bugs, including warnings, are treated as fatal. */ - if (bug) - bug_get_file_line(bug, &file, &line); + if (!is_protected_kvm_enabled() || + IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) { + struct bug_entry *bug = find_bug(elr_in_kimg); + + if (bug) + bug_get_file_line(bug, &file, &line); + } if (file) kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else - kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset); + kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset); } else { - kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset); + kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset); } /* @@ -334,5 +339,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr, kvm_err("Hyp Offset: 0x%llx\n", hyp_offset); panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n", - spsr, elr, esr, far, hpfar, par, vcpu); + spsr, elr_virt, esr, far, hpfar, par, vcpu); } diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 2b23400e0fb3..4b652ffb591d 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -85,12 +86,24 @@ SYM_FUNC_START(__hyp_do_panic) mov x29, x0 +#ifdef CONFIG_NVHE_EL2_DEBUG + /* Ensure host stage-2 is disabled */ + mrs x0, hcr_el2 + bic x0, x0, #HCR_VM + msr hcr_el2, x0 + isb + tlbi vmalls12e1 + dsb nsh +#endif + /* Load the panic arguments into x0-7 */ mrs x0, esr_el2 - get_vcpu_ptr x4, x5 - mrs x5, far_el2 - mrs x6, hpfar_el2 - mov x7, xzr // Unused argument + mov x4, x3 + mov x3, x2 + hyp_pa x3, x6 + get_vcpu_ptr x5, x6 + mrs x6, far_el2 + mrs x7, hpfar_el2 /* Enter the host, conditionally restoring the host context. */ cbz x29, __host_enter_without_restoring -- cgit From 51ed00e71f0130e0f3534b8e7d78facd16829426 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 Aug 2021 08:47:28 +0000 Subject: powerpc/32: Remove unneccessary calculations in load_up_{fpu/altivec} No need to re-read SPRN_THREAD, we can calculate thread address from current (r2). And remove a reload of value 1 into r4 as r4 is already 1. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c04cce578b97a76a9e69a096698b1d89f721768a.1629276437.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/fpu.S | 3 +-- arch/powerpc/kernel/vector.S | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6010adcee16e..ba4afe3b5a9c 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -91,8 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) isync /* enable use of FP after return */ #ifdef CONFIG_PPC32 - mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ - tovirt(r5, r5) + addi r5,r2,THREAD lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index fc120fac1910..ba03eedfdcd8 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -65,9 +65,8 @@ _GLOBAL(load_up_altivec) 1: /* enable use of VMX after return */ #ifdef CONFIG_PPC32 - mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ + addi r5,r2,THREAD oris r9,r9,MSR_VEC@h - tovirt(r5, r5) #else ld r4,PACACURRENT(r13) addi r5,r4,THREAD /* Get THREAD */ @@ -81,7 +80,6 @@ _GLOBAL(load_up_altivec) li r4,1 stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE - li r4,1 li r10,VRSTATE_VSCR stw r4,THREAD_USED_VR(r5) lvx v0,r10,r6 -- cgit From 31b089bbc15a53ca7c90c77ef51390d87f612088 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 14:04:48 +0200 Subject: ARM/nommu: use the generic dma-direct code for non-coherent devices Select the right options to just use the generic dma-direct code instead of reimplementing it. Signed-off-by: Christoph Hellwig Tested-by: Dillon Min --- arch/arm/Kconfig | 5 +- arch/arm/mm/dma-mapping-nommu.c | 173 ++-------------------------------------- 2 files changed, 9 insertions(+), 169 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2fb7012c3246..4faa11d02521 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -18,8 +18,8 @@ config ARM select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU - select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB - select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB + select ARCH_HAS_SYNC_DMA_FOR_DEVICE if SWIOTLB || !MMU + select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB || !MMU select ARCH_HAS_TEARDOWN_DMA_OPS if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H @@ -44,6 +44,7 @@ config ARM select CPU_PM if SUSPEND || CPU_IDLE select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS select DMA_DECLARE_COHERENT + select DMA_GLOBAL_POOL if !MMU select DMA_OPS select DMA_REMAP if MMU select EDAC_SUPPORT diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 6bfd2b884505..cfd9c933d2f0 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -5,12 +5,7 @@ * Copyright (C) 2000-2004 Russell King */ -#include -#include -#include #include -#include - #include #include #include @@ -18,65 +13,8 @@ #include "dma.h" -/* - * The generic direct mapping code is used if - * - MMU/MPU is off - * - cpu is v7m w/o cache support - * - device is coherent - * otherwise arm_nommu_dma_ops is used. - * - * arm_nommu_dma_ops rely on consistent DMA memory (please, refer to - * [1] on how to declare such memory). - * - * [1] Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt - */ - -static void *arm_nommu_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) - -{ - void *ret = dma_alloc_from_global_coherent(dev, size, dma_handle); - - /* - * dma_alloc_from_global_coherent() may fail because: - * - * - no consistent DMA region has been defined, so we can't - * continue. - * - there is no space left in consistent DMA region, so we - * only can fallback to generic allocator if we are - * advertised that consistency is not required. - */ - - WARN_ON_ONCE(ret == NULL); - return ret; -} - -static void arm_nommu_dma_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); - - WARN_ON_ONCE(ret == 0); -} - -static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - int ret; - - if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret)) - return ret; - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - return -ENXIO; -} - - -static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { dmac_map_area(__va(paddr), size, dir); @@ -86,8 +24,8 @@ static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size, outer_clean_range(paddr, paddr + size); } -static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { if (dir != DMA_TO_DEVICE) { outer_inv_range(paddr, paddr + size); @@ -95,102 +33,6 @@ static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size, } } -static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t handle = page_to_phys(page) + offset; - - __dma_page_cpu_to_dev(handle, size, dir); - - return handle; -} - -static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - __dma_page_dev_to_cpu(handle, size, dir); -} - - -static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - sg_dma_address(sg) = sg_phys(sg); - sg_dma_len(sg) = sg->length; - __dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir); - } - - return nents; -} - -static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - __dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir); -} - -static void arm_nommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - __dma_page_cpu_to_dev(handle, size, dir); -} - -static void arm_nommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - __dma_page_cpu_to_dev(handle, size, dir); -} - -static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - __dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir); -} - -static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - __dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir); -} - -const struct dma_map_ops arm_nommu_dma_ops = { - .alloc = arm_nommu_dma_alloc, - .free = arm_nommu_dma_free, - .alloc_pages = dma_direct_alloc_pages, - .free_pages = dma_direct_free_pages, - .mmap = arm_nommu_dma_mmap, - .map_page = arm_nommu_dma_map_page, - .unmap_page = arm_nommu_dma_unmap_page, - .map_sg = arm_nommu_dma_map_sg, - .unmap_sg = arm_nommu_dma_unmap_sg, - .sync_single_for_device = arm_nommu_dma_sync_single_for_device, - .sync_single_for_cpu = arm_nommu_dma_sync_single_for_cpu, - .sync_sg_for_device = arm_nommu_dma_sync_sg_for_device, - .sync_sg_for_cpu = arm_nommu_dma_sync_sg_for_cpu, -}; -EXPORT_SYMBOL(arm_nommu_dma_ops); - void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { @@ -201,14 +43,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, * enough to check if MPU is in use or not since in absense of * MPU system memory map is used. */ - dev->archdata.dma_coherent = (cacheid) ? coherent : true; + dev->dma_coherent = cacheid ? coherent : true; } else { /* * Assume coherent DMA in case MMU/MPU has not been set up. */ - dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true; + dev->dma_coherent = (get_cr() & CR_M) ? coherent : true; } - - if (!dev->archdata.dma_coherent) - set_dma_ops(dev, &arm_nommu_dma_ops); } -- cgit From 39f75da7bcc829ddc4d40bb60d0e95520de7898b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 2 Aug 2021 23:40:31 +0300 Subject: isystem: trim/fixup stdarg.h and other headers Delete/fixup few includes in anticipation of global -isystem compile option removal. Note: crypto/aegis128-neon-inner.c keeps due to redefinition of uintptr_t error (one definition comes from , another from ). Signed-off-by: Alexey Dobriyan Signed-off-by: Masahiro Yamada --- arch/arm/kernel/process.c | 2 -- arch/arm/mach-bcm/bcm_kona_smc.c | 2 -- arch/arm64/kernel/process.c | 3 --- arch/openrisc/kernel/process.c | 2 -- arch/parisc/kernel/process.c | 3 --- arch/powerpc/kernel/prom.c | 1 - arch/sparc/kernel/process_32.c | 3 --- arch/sparc/kernel/process_64.c | 3 --- arch/um/drivers/rtc_user.c | 1 + arch/um/drivers/vector_user.c | 1 + arch/um/include/shared/irq_user.h | 1 - arch/um/include/shared/os.h | 1 - arch/um/os-Linux/signal.c | 2 +- arch/um/os-Linux/util.c | 1 + 14 files changed, 4 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index fc9e8b37eaa8..bb5ad8a6a4c3 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -5,8 +5,6 @@ * Copyright (C) 1996-2000 Russell King - Converted to ARM. * Original Copyright (C) 1995 Linus Torvalds */ -#include - #include #include #include diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c index 43a16f922b53..43829e49ad93 100644 --- a/arch/arm/mach-bcm/bcm_kona_smc.c +++ b/arch/arm/mach-bcm/bcm_kona_smc.c @@ -10,8 +10,6 @@ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ - -#include #include #include #include diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c8989b999250..5f7ac9a0f9a3 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -6,9 +6,6 @@ * Copyright (C) 1996-2000 Russell King - Converted to ARM. * Copyright (C) 2012 ARM Ltd. */ - -#include - #include #include #include diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index eb62429681fc..b0698d9ce14f 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -14,8 +14,6 @@ */ #define __KERNEL_SYSCALLS__ -#include - #include #include #include diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 184ec3c1eae4..38ec4ae81239 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -17,9 +17,6 @@ * Copyright (C) 2001-2014 Helge Deller * Copyright (C) 2002 Randolph Chung */ - -#include - #include #include #include diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f620e04dc9bf..a1e7ba0fad09 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -11,7 +11,6 @@ #undef DEBUG -#include #include #include #include diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 93983d6d431d..bbbe0cfef746 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -8,9 +8,6 @@ /* * This file handles the architecture-dependent parts of process handling.. */ - -#include - #include #include #include diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index d33c58a58d4f..0cabcdfb23fd 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -9,9 +9,6 @@ /* * This file handles the architecture-dependent parts of process handling.. */ - -#include - #include #include #include diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c index 4016bc1d577e..7c3cec4c68cf 100644 --- a/arch/um/drivers/rtc_user.c +++ b/arch/um/drivers/rtc_user.c @@ -3,6 +3,7 @@ * Copyright (C) 2020 Intel Corporation * Author: Johannes Berg */ +#include #include #include #include diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index bae53220ce26..e4ffeb9a1fa4 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -3,6 +3,7 @@ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ +#include #include #include #include diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index 065829f443ae..86a8a573b65c 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -7,7 +7,6 @@ #define __IRQ_USER_H__ #include -#include enum um_irq_type { IRQ_READ, diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 60b84edc8a68..96d400387c93 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -8,7 +8,6 @@ #ifndef __OS_H__ #define __OS_H__ -#include #include #include #include diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 6de99bb16113..6cf098c23a39 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -67,7 +67,7 @@ int signals_enabled; #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT static int signals_blocked; #else -#define signals_blocked false +#define signals_blocked 0 #endif static unsigned int signals_pending; static unsigned int signals_active = 0; diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 07327425d06e..41297ec404bf 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -3,6 +3,7 @@ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ +#include #include #include #include -- cgit From c0891ac15f0428ffa81b2e818d416bdf3cb74ab6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 2 Aug 2021 23:40:32 +0300 Subject: isystem: ship and use stdarg.h Ship minimal stdarg.h (1 type, 4 macros) as . stdarg.h is the only userspace header commonly used in the kernel. GPL 2 version of can be extracted from http://archive.debian.org/debian/pool/main/g/gcc-4.2/gcc-4.2_4.2.4.orig.tar.gz Signed-off-by: Alexey Dobriyan Acked-by: Rafael J. Wysocki Acked-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- arch/parisc/kernel/firmware.c | 2 +- arch/powerpc/kernel/prom_init.c | 2 +- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/udbg.c | 2 +- arch/s390/boot/pgm_check_info.c | 2 +- arch/x86/boot/boot.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 665b70086685..7034227dbdf3 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -51,7 +51,7 @@ * prumpf 991016 */ -#include +#include #include #include diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a5bf355ce1d6..10664633f7e3 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -14,7 +14,7 @@ /* we cannot use FORTIFY as it brings in new symbols */ #define __NO_FORTIFY -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 99f2cce635fb..ff80bbad22a5 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,7 +7,7 @@ * Copyright (C) 2001 IBM. */ -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 01595e8cafe7..b1544b2f6321 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -5,7 +5,7 @@ * c 2001 PPC 64 Team, IBM Corp */ -#include +#include #include #include #include diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 3a46abed2549..b7d8dd88bbf2 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -8,7 +9,6 @@ #include #include #include -#include #include "boot.h" const char hex_asc[] = "0123456789abcdef"; diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ca866f1cca2e..34c9dbb6a47d 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -18,7 +18,7 @@ #ifndef __ASSEMBLY__ -#include +#include #include #include #include -- cgit From c1dec343d7abdf8e71aab2a289ab45ce8b1afb7e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Jun 2021 16:11:21 +0200 Subject: hexagon: use the generic global coherent pool Switch hexagon to use the generic code for dma_alloc_coherent from a global pre-filled pool. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Cain --- arch/hexagon/Kconfig | 1 + arch/hexagon/kernel/dma.c | 57 +++++++++-------------------------------------- 2 files changed, 12 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index e5a852080730..aab1a40eb653 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -7,6 +7,7 @@ config HEXAGON select ARCH_32BIT_OFF_T select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_PREEMPT + select DMA_GLOBAL_POOL # Other pending projects/to-do items. # select HAVE_REGS_AND_STACK_ACCESS_API # select HAVE_HW_BREAKPOINT if PERF_EVENTS diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 00b9a81075dd..882680e81a30 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -7,54 +7,8 @@ #include #include -#include -#include #include -static struct gen_pool *coherent_pool; - - -/* Allocates from a pool of uncached memory that was reserved at boot time */ - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr, - gfp_t flag, unsigned long attrs) -{ - void *ret; - - /* - * Our max_low_pfn should have been backed off by 16MB in - * mm/init.c to create DMA coherent space. Use that as the VA - * for the pool. - */ - - if (coherent_pool == NULL) { - coherent_pool = gen_pool_create(PAGE_SHIFT, -1); - - if (coherent_pool == NULL) - panic("Can't create %s() memory pool!", __func__); - else - gen_pool_add(coherent_pool, - (unsigned long)pfn_to_virt(max_low_pfn), - hexagon_coherent_pool_size, -1); - } - - ret = (void *) gen_pool_alloc(coherent_pool, size); - - if (ret) { - memset(ret, 0, size); - *dma_addr = (dma_addr_t) virt_to_phys(ret); - } else - *dma_addr = ~0; - - return ret; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - gen_pool_free(coherent_pool, (unsigned long) vaddr, size); -} - void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { @@ -77,3 +31,14 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, BUG(); } } + +/* + * Our max_low_pfn should have been backed off by 16MB in mm/init.c to create + * DMA coherent space. Use that for the pool. + */ +static int __init hexagon_dma_init(void) +{ + return dma_init_global_coherent(PFN_PHYS(max_low_pfn), + hexagon_coherent_pool_size); +} +core_initcall(hexagon_dma_init); -- cgit From 6654f9dfcb88fea3b9affc180dc3c04333d0f306 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:30 +0000 Subject: KVM: arm64: Fix read-side race on updates to vcpu reset state KVM correctly serializes writes to a vCPU's reset state, however since we do not take the KVM lock on the read side it is entirely possible to read state from two different reset requests. Cure the race for now by taking the KVM lock when reading the reset_state structure. Fixes: 358b28f09f0a ("arm/arm64: KVM: Allow a VCPU to fully reset itself") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-2-oupton@google.com --- arch/arm64/kvm/reset.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index cba7872d69a8..d862441b03b1 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -210,10 +210,16 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { + struct vcpu_reset_state reset_state; int ret; bool loaded; u32 pstate; + mutex_lock(&vcpu->kvm->lock); + reset_state = vcpu->arch.reset_state; + WRITE_ONCE(vcpu->arch.reset_state.reset, false); + mutex_unlock(&vcpu->kvm->lock); + /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -276,8 +282,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) * Additional reset state handling that PSCI may have imposed on us. * Must be done after all the sys_reg reset. */ - if (vcpu->arch.reset_state.reset) { - unsigned long target_pc = vcpu->arch.reset_state.pc; + if (reset_state.reset) { + unsigned long target_pc = reset_state.pc; /* Gracefully handle Thumb2 entry point */ if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { @@ -286,13 +292,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) } /* Propagate caller endianness */ - if (vcpu->arch.reset_state.be) + if (reset_state.be) kvm_vcpu_set_be(vcpu); *vcpu_pc(vcpu) = target_pc; - vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); - - vcpu->arch.reset_state.reset = false; + vcpu_set_reg(vcpu, 0, reset_state.r0); } /* Reset timer */ -- cgit From 6826c6849b46aaa91300201213701eb861af4ba0 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:31 +0000 Subject: KVM: arm64: Handle PSCI resets before userspace touches vCPU state The CPU_ON PSCI call takes a payload that KVM uses to configure a destination vCPU to run. This payload is non-architectural state and not exposed through any existing UAPI. Effectively, we have a race between CPU_ON and userspace saving/restoring a guest: if the target vCPU isn't ran again before the VMM saves its state, the requested PC and context ID are lost. When restored, the target vCPU will be runnable and start executing at its old PC. We can avoid this race by making sure the reset payload is serviced before userspace can access a vCPU's state. Fixes: 358b28f09f0a ("arm/arm64: KVM: Allow a VCPU to fully reset itself") Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-3-oupton@google.com --- arch/arm64/kvm/arm.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..8ce9996bcbfd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1215,6 +1215,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(®, argp, sizeof(reg))) break; + /* + * We could owe a reset due to PSCI. Handle the pending reset + * here to ensure userspace register accesses are ordered after + * the reset. + */ + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + if (ioctl == KVM_SET_ONE_REG) r = kvm_arm_set_reg(vcpu, ®); else -- cgit From e10ecb4d6c0761ca545b3946df1707a41f9f845e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 18 Aug 2021 20:21:32 +0000 Subject: KVM: arm64: Enforce reserved bits for PSCI target affinities According to the PSCI specification, ARM DEN 0022D, 5.1.4 "CPU_ON", the CPU_ON function takes a target_cpu argument that is bit-compatible with the affinity fields in MPIDR_EL1. All other bits in the argument are RES0. Note that the same constraints apply to the target_affinity argument for the AFFINITY_INFO call. Enforce the spec by returning INVALID_PARAMS if a guest incorrectly sets a RES0 bit. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818202133.1106786-4-oupton@google.com --- arch/arm64/kvm/psci.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index db4056ecccfd..74c47d420253 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -59,6 +59,12 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) kvm_vcpu_kick(vcpu); } +static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, + unsigned long affinity) +{ + return !(affinity & ~MPIDR_HWID_BITMASK); +} + static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct vcpu_reset_state *reset_state; @@ -66,9 +72,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) struct kvm_vcpu *vcpu = NULL; unsigned long cpu_id; - cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; - if (vcpu_mode_is_32bit(source_vcpu)) - cpu_id &= ~((u32) 0); + cpu_id = smccc_get_arg1(source_vcpu); + if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) + return PSCI_RET_INVALID_PARAMS; vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); @@ -126,6 +132,9 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) target_affinity = smccc_get_arg1(vcpu); lowest_affinity_level = smccc_get_arg2(vcpu); + if (!kvm_psci_valid_affinity(vcpu, target_affinity)) + return PSCI_RET_INVALID_PARAMS; + /* Determine target affinity mask */ target_affinity_mask = psci_affinity_mask(lowest_affinity_level); if (!target_affinity_mask) -- cgit From fe5161d2c39b8c2801f0e786631460c6e8a1cae4 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 2 Aug 2021 19:28:07 +0000 Subject: KVM: arm64: Record number of signal exits as a vCPU stat Most other architectures that implement KVM record a statistic indicating the number of times a vCPU has exited due to a pending signal. Add support for that stat to arm64. Reviewed-by: Jing Zhang Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210802192809.1851010-2-oupton@google.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/guest.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 41911585ae0c..70e129f2b574 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -576,6 +576,7 @@ struct kvm_vcpu_stat { u64 wfi_exit_stat; u64 mmio_exit_user; u64 mmio_exit_kernel; + u64 signal_exits; u64 exits; }; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..60d0a546d7fd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -783,6 +783,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (signal_pending(current)) { ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; } /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1dfb83578277..853d1e8d2e73 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -50,6 +50,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, wfi_exit_stat), STATS_DESC_COUNTER(VCPU, mmio_exit_user), STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), + STATS_DESC_COUNTER(VCPU, signal_exits), STATS_DESC_COUNTER(VCPU, exits) }; static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == -- cgit From 6caa5812e2d126a0aa8a17816c1ba6f0a0c2b309 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 2 Aug 2021 19:28:09 +0000 Subject: KVM: arm64: Use generic KVM xfer to guest work function Clean up handling of checks for pending work by switching to the generic infrastructure to do so. We pick up handling for TIF_NOTIFY_RESUME from this switch, meaning that task work will be correctly handled. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210802192809.1851010-4-oupton@google.com --- arch/arm64/kvm/Kconfig | 1 + arch/arm64/kvm/arm.c | 72 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 45 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a4eba0908bfa..8bc1fac5fa26 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -26,6 +26,7 @@ menuconfig KVM select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select KVM_XFER_TO_GUEST_WORK select SRCU select KVM_VFIO select HAVE_KVM_EVENTFD diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 60d0a546d7fd..8245efc6e88f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -714,6 +715,45 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) static_branch_unlikely(&arm64_mismatched_32bit_el0); } +/** + * kvm_vcpu_exit_request - returns true if the VCPU should *not* enter the guest + * @vcpu: The VCPU pointer + * @ret: Pointer to write optional return code + * + * Returns: true if the VCPU needs to return to a preemptible + interruptible + * and skip guest entry. + * + * This function disambiguates between two different types of exits: exits to a + * preemptible + interruptible kernel context and exits to userspace. For an + * exit to userspace, this function will write the return code to ret and return + * true. For an exit to preemptible + interruptible kernel context (i.e. check + * for pending work and re-enter), return true without writing to ret. + */ +static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) +{ + struct kvm_run *run = vcpu->run; + + /* + * If we're using a userspace irqchip, then check if we need + * to tell a userspace irqchip about timer or PMU level + * changes and if so, exit to userspace (the actual level + * state gets updated in kvm_timer_update_run and + * kvm_pmu_update_run below). + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) { + if (kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { + *ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + return true; + } + } + + return kvm_request_pending(vcpu) || + need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || + xfer_to_guest_mode_work_pending(); +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -757,7 +797,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* * Check conditions before entering the guest */ - cond_resched(); + ret = xfer_to_guest_mode_handle_work(vcpu); + if (!ret) + ret = 1; update_vmid(&vcpu->arch.hw_mmu->vmid); @@ -776,31 +818,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vgic_flush_hwstate(vcpu); - /* - * Exit if we have a signal pending so that we can deliver the - * signal to user space. - */ - if (signal_pending(current)) { - ret = -EINTR; - run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - } - - /* - * If we're using a userspace irqchip, then check if we need - * to tell a userspace irqchip about timer or PMU level - * changes and if so, exit to userspace (the actual level - * state gets updated in kvm_timer_update_run and - * kvm_pmu_update_run below). - */ - if (static_branch_unlikely(&userspace_irqchip_in_use)) { - if (kvm_timer_should_notify_user(vcpu) || - kvm_pmu_should_notify_user(vcpu)) { - ret = -EINTR; - run->exit_reason = KVM_EXIT_INTR; - } - } - /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. @@ -809,8 +826,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) || - kvm_request_pending(vcpu)) { + if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); -- cgit From b9a51949cebcd57bfb9385d9da62ace52564898c Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 18 Aug 2021 14:32:05 -0700 Subject: KVM: arm64: vgic: Drop WARN from vgic_get_irq vgic_get_irq(intid) is used all over the vgic code in order to get a reference to a struct irq. It warns whenever intid is not a valid number (like when it's a reserved IRQ number). The issue is that this warning can be triggered from userspace (e.g., KVM_IRQ_LINE for intid 1020). Drop the WARN call from vgic_get_irq. Signed-off-by: Ricardo Koller Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818213205.598471-1-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 111bff47e471..81cec508d413 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -106,7 +106,6 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, if (intid >= VGIC_MIN_LPI) return vgic_get_lpi(kvm, intid); - WARN(1, "Looking up struct vgic_irq for reserved INTID"); return NULL; } -- cgit From c26d4c5d4f0df7207da3975458261927f9305465 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 19 Aug 2021 13:39:53 +0200 Subject: powerpc/kvm: Remove obsolete and unneeded select Commit a278e7ea608b ("powerpc: Fix compile issue with force DAWR") selects the non-existing config PPC_DAWR_FORCE_ENABLE for config KVM_BOOK3S_64_HANDLER. As this commit also introduces a config PPC_DAWR and this config PPC_DAWR is selected with PPC if PPC64, there is no need for any further select in the KVM_BOOK3S_64_HANDLER. Remove an obsolete and unneeded select in config KVM_BOOK3S_64_HANDLER. The issue was identified with ./scripts/checkkconfigsymbols.py. Fixes: a278e7ea608b ("powerpc: Fix compile issue with force DAWR") Signed-off-by: Lukas Bulwahn Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210819113954.17515-2-lukas.bulwahn@gmail.com --- arch/powerpc/kvm/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index e45644657d49..ff581d70f20c 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -38,7 +38,6 @@ config KVM_BOOK3S_32_HANDLER config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER - select PPC_DAWR_FORCE_ENABLE config KVM_BOOK3S_PR_POSSIBLE bool -- cgit From 6cd717fe9b3a787f8e8f9d0bc9b7634a9c104b3e Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 19 Aug 2021 10:46:54 +1000 Subject: powerpc/tau: Add 'static' storage qualifier to 'tau_work' definition This patch prevents the following sparse warning. arch/powerpc/kernel/tau_6xx.c:199:1: sparse: sparse: symbol 'tau_work' was not declared. Should it be static? Reported-by: kernel test robot Signed-off-by: Finn Thain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/44ab381741916a51e783c4a50d0b186abdd8f280.1629334014.git.fthain@linux-m68k.org --- arch/powerpc/kernel/tau_6xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index b9a047d92ec0..8e83d19fe8fa 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -164,7 +164,7 @@ static void tau_work_func(struct work_struct *work) queue_work(tau_workq, work); } -DECLARE_WORK(tau_work, tau_work_func); +static DECLARE_WORK(tau_work, tau_work_func); /* * setup the TAU -- cgit From f9addd85fbfacf0d155e83dbee8696d6df5ed0c7 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 13 Aug 2021 13:51:58 +0530 Subject: powerpc/perf/hv-gpci: Fix counter value parsing H_GetPerformanceCounterInfo (0xF080) hcall returns the counter data in the result buffer. Result buffer has specific format defined in the PAPR specification. One of the fields is counter offset and width of the counter data returned. Counter data are returned in a unsigned char array in big endian byte order. To get the final counter data, the values must be left shifted byte at a time. But commit 220a0c609ad17 ("powerpc/perf: Add support for the hv gpci (get performance counter info) interface") made the shifting bitwise and also assumed little endian order. Because of that, hcall counters values are reported incorrectly. In particular this can lead to counters go backwards which messes up the counter prev vs now calculation and leads to huge counter value reporting: #: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ -C 0 -I 1000 time counts unit events 1.000078854 18,446,744,073,709,535,232 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 2.000213293 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 3.000320107 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 4.000428392 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 5.000537864 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 6.000649087 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 7.000760312 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 8.000865218 16,448 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 9.000978985 18,446,744,073,709,535,232 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 10.001088891 16,384 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 11.001201435 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ 12.001307937 18,446,744,073,709,535,232 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ Fix the shifting logic to correct match the format, ie. read bytes in big endian order. Fixes: e4f226b1580b ("powerpc/perf/hv-gpci: Increase request buffer size") Cc: stable@vger.kernel.org # v4.6+ Reported-by: Nageswara R Sastry Signed-off-by: Kajol Jain Tested-by: Nageswara R Sastry Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210813082158.429023-1-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index d48413e28c39..c756228a081f 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -175,7 +175,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, */ count = 0; for (i = offset; i < offset + length; i++) - count |= arg->bytes[i] << (i - offset); + count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); *value = count; out: -- cgit From 3134cc8beb69d0db9de651081707c4651c011621 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 19 Aug 2021 19:03:05 +0100 Subject: KVM: arm64: vgic: Resample HW pending state on deactivation When a mapped level interrupt (a timer, for example) is deactivated by the guest, the corresponding host interrupt is equally deactivated. However, the fate of the pending state still needs to be dealt with in SW. This is specially true when the interrupt was in the active+pending state in the virtual distributor at the point where the guest was entered. On exit, the pending state is potentially stale (the guest may have put the interrupt in a non-pending state). If we don't do anything, the interrupt will be spuriously injected in the guest. Although this shouldn't have any ill effect (spurious interrupts are always possible), we can improve the emulation by detecting the deactivation-while-pending case and resample the interrupt. While we're at it, move the logic into a common helper that can be shared between the two GIC implementations. Fixes: e40cc57bac79 ("KVM: arm/arm64: vgic: Support level-triggered mapped interrupts") Reported-by: Raghavendra Rao Ananta Tested-by: Raghavendra Rao Ananta Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210819180305.1670525-1-maz@kernel.org --- arch/arm64/kvm/vgic/vgic-v2.c | 36 +++++------------------------------- arch/arm64/kvm/vgic/vgic-v3.c | 36 +++++------------------------------- arch/arm64/kvm/vgic/vgic.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic.h | 2 ++ 4 files changed, 50 insertions(+), 62 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 2c580204f1dc..95a18cec14a3 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -60,6 +60,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) u32 val = cpuif->vgic_lr[lr]; u32 cpuid, intid = val & GICH_LR_VIRTUALID; struct vgic_irq *irq; + bool deactivated; /* Extract the source vCPU id from the LR */ cpuid = val & GICH_LR_PHYSID_CPUID; @@ -75,7 +76,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) raw_spin_lock(&irq->irq_lock); - /* Always preserve the active bit */ + /* Always preserve the active bit, note deactivation */ + deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT); irq->active = !!(val & GICH_LR_ACTIVE_BIT); if (irq->active && vgic_irq_is_sgi(intid)) @@ -96,36 +98,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) irq->pending_latch = false; - /* - * Level-triggered mapped IRQs are special because we only - * observe rising edges as input to the VGIC. - * - * If the guest never acked the interrupt we have to sample - * the physical line and set the line level, because the - * device state could have changed or we simply need to - * process the still pending interrupt later. - * - * If this causes us to lower the level, we have to also clear - * the physical active state, since we will otherwise never be - * told when the interrupt becomes asserted again. - * - * Another case is when the interrupt requires a helping hand - * on deactivation (no HW deactivation, for example). - */ - if (vgic_irq_is_mapped_level(irq)) { - bool resample = false; - - if (val & GICH_LR_PENDING_BIT) { - irq->line_level = vgic_get_phys_line_level(irq); - resample = !irq->line_level; - } else if (vgic_irq_needs_resampling(irq) && - !(irq->active || irq->pending_latch)) { - resample = true; - } - - if (resample) - vgic_irq_set_phys_active(irq, false); - } + /* Handle resampling for mapped interrupts if required */ + vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT); raw_spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 66004f61cd83..21a6207fb2ee 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -46,6 +46,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) u32 intid, cpuid; struct vgic_irq *irq; bool is_v2_sgi = false; + bool deactivated; cpuid = val & GICH_LR_PHYSID_CPUID; cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; @@ -68,7 +69,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) raw_spin_lock(&irq->irq_lock); - /* Always preserve the active bit */ + /* Always preserve the active bit, note deactivation */ + deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT); irq->active = !!(val & ICH_LR_ACTIVE_BIT); if (irq->active && is_v2_sgi) @@ -89,36 +91,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) irq->pending_latch = false; - /* - * Level-triggered mapped IRQs are special because we only - * observe rising edges as input to the VGIC. - * - * If the guest never acked the interrupt we have to sample - * the physical line and set the line level, because the - * device state could have changed or we simply need to - * process the still pending interrupt later. - * - * If this causes us to lower the level, we have to also clear - * the physical active state, since we will otherwise never be - * told when the interrupt becomes asserted again. - * - * Another case is when the interrupt requires a helping hand - * on deactivation (no HW deactivation, for example). - */ - if (vgic_irq_is_mapped_level(irq)) { - bool resample = false; - - if (val & ICH_LR_PENDING_BIT) { - irq->line_level = vgic_get_phys_line_level(irq); - resample = !irq->line_level; - } else if (vgic_irq_needs_resampling(irq) && - !(irq->active || irq->pending_latch)) { - resample = true; - } - - if (resample) - vgic_irq_set_phys_active(irq, false); - } + /* Handle resampling for mapped interrupts if required */ + vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT); raw_spin_unlock(&irq->irq_lock); vgic_put_irq(vcpu->kvm, irq); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 81cec508d413..5dad4996cfb2 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -1021,3 +1021,41 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) return map_is_active; } + +/* + * Level-triggered mapped IRQs are special because we only observe rising + * edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample the physical + * line and set the line level, because the device state could have changed + * or we simply need to process the still pending interrupt later. + * + * We could also have entered the guest with the interrupt active+pending. + * On the next exit, we need to re-evaluate the pending state, as it could + * otherwise result in a spurious interrupt by injecting a now potentially + * stale pending state. + * + * If this causes us to lower the level, we have to also clear the physical + * active state, since we will otherwise never be told when the interrupt + * becomes asserted again. + * + * Another case is when the interrupt requires a helping hand on + * deactivation (no HW deactivation, for example). + */ +void vgic_irq_handle_resampling(struct vgic_irq *irq, + bool lr_deactivated, bool lr_pending) +{ + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (unlikely(vgic_irq_needs_resampling(irq))) { + resample = !(irq->active || irq->pending_latch); + } else if (lr_pending || (lr_deactivated && irq->line_level)) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } + + if (resample) + vgic_irq_set_phys_active(irq, false); + } +} diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index dc1f3d1657ee..14a9218641f5 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -169,6 +169,8 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, unsigned long flags); void vgic_kick_vcpus(struct kvm *kvm); +void vgic_irq_handle_resampling(struct vgic_irq *irq, + bool lr_deactivated, bool lr_pending); int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment); -- cgit From 923a547d71b967c808a596968cf8022102f8b5b2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Aug 2021 12:31:06 +0100 Subject: KVM: arm64: Move kern_hyp_va() usage in __load_guest_stage2() into the callers It is a bit awkward to use kern_hyp_va() in __load_guest_stage2(), specially as the helper is shared between VHE and nVHE. Instead, move the use of kern_hyp_va() in the nVHE code, and pass a pointer to the kvm->arch structure instead. Although this may look a bit awkward, it allows for some further simplification. Cc: Catalin Marinas Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20210806113109.2475-4-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 5 +++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +++- arch/arm64/kvm/hyp/nvhe/tlb.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/tlb.c | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b52c5c4b9a3d..05e089653a1a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -280,9 +280,10 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu) +static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, + struct kvm_arch *arch) { - __load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr); + __load_stage2(mmu, arch->vtcr); } static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f7af9688c1f7..e50a49082923 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -170,6 +170,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; + struct kvm_s2_mmu *mmu; bool pmu_switch_needed; u64 exit_code; @@ -213,7 +214,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg32_restore_state(vcpu); __sysreg_restore_state_nvhe(guest_ctxt); - __load_guest_stage2(kern_hyp_va(vcpu->arch.hw_mmu)); + mmu = kern_hyp_va(vcpu->arch.hw_mmu); + __load_guest_stage2(mmu, kern_hyp_va(mmu->arch)); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 38ed0f6f2703..76229407d8f0 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -39,7 +39,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_guest_stage2(mmu); + __load_guest_stage2(mmu, kern_hyp_va(mmu->arch)); asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b3229924d243..0cb7523a501a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -128,7 +128,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * __load_guest_stage2 configures stage 2 translation, and * __activate_traps clear HCR_EL2.TGE (among other things). */ - __load_guest_stage2(vcpu->arch.hw_mmu); + __load_guest_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 66f17349f0c3..5e9fb3989e0b 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -53,7 +53,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * place before clearing TGE. __load_guest_stage2() already * has an ISB in order to deal with this. */ - __load_guest_stage2(mmu); + __load_guest_stage2(mmu, mmu->arch); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg(val, hcr_el2); -- cgit From 4efc0ede4f31d7ec25c3dee0c8f07f93735cee6d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Aug 2021 12:31:07 +0100 Subject: KVM: arm64: Unify stage-2 programming behind __load_stage2() The protected mode relies on a separate helper to load the S2 context. Move over to the __load_guest_stage2() helper instead, and rename it to __load_stage2() to present a unified interface. Cc: Catalin Marinas Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20210806113109.2475-5-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 11 +++-------- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 6 +++--- arch/arm64/kvm/hyp/vhe/tlb.c | 4 ++-- 7 files changed, 13 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 05e089653a1a..08bc81f6944b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -267,9 +267,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) * Must be called from hyp code running at EL2 with an updated VTTBR * and interrupts disabled. */ -static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr) +static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, + struct kvm_arch *arch) { - write_sysreg(vtcr, vtcr_el2); + write_sysreg(arch->vtcr, vtcr_el2); write_sysreg(kvm_get_vttbr(mmu), vttbr_el2); /* @@ -280,12 +281,6 @@ static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu, - struct kvm_arch *arch) -{ - __load_stage2(mmu, arch->vtcr); -} - static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu) { return container_of(mmu->arch, struct kvm, arch); diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 9c227d87c36d..8901dc95d7de 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -29,7 +29,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) - __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr); + __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); else write_sysreg(0, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..36aea13c9e5a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -126,7 +126,7 @@ int __pkvm_prot_finalize(void) kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); - __load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr); + __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); /* * Make sure to have an ISB before the TLB maintenance below but only diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index e50a49082923..3e7ad32b3f0d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -215,7 +215,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(guest_ctxt); mmu = kern_hyp_va(vcpu->arch.hw_mmu); - __load_guest_stage2(mmu, kern_hyp_va(mmu->arch)); + __load_stage2(mmu, kern_hyp_va(mmu->arch)); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 76229407d8f0..d296d617f589 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -34,12 +34,12 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, } /* - * __load_guest_stage2() includes an ISB only when the AT + * __load_stage2() includes an ISB only when the AT * workaround is applied. Take care of the opposite condition, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_guest_stage2(mmu, kern_hyp_va(mmu->arch)); + __load_stage2(mmu, kern_hyp_va(mmu->arch)); asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 0cb7523a501a..709f6438283e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -124,11 +124,11 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) * * We have already configured the guest's stage 1 translation in * kvm_vcpu_load_sysregs_vhe above. We must now call - * __load_guest_stage2 before __activate_traps, because - * __load_guest_stage2 configures stage 2 translation, and + * __load_stage2 before __activate_traps, because + * __load_stage2 configures stage 2 translation, and * __activate_traps clear HCR_EL2.TGE (among other things). */ - __load_guest_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); + __load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch); __activate_traps(vcpu); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index 5e9fb3989e0b..24cef9b87f9e 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -50,10 +50,10 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * * ARM erratum 1165522 requires some special handling (again), * as we need to make sure both stages of translation are in - * place before clearing TGE. __load_guest_stage2() already + * place before clearing TGE. __load_stage2() already * has an ISB in order to deal with this. */ - __load_guest_stage2(mmu, mmu->arch); + __load_stage2(mmu, mmu->arch); val = read_sysreg(hcr_el2); val &= ~HCR_TGE; write_sysreg(val, hcr_el2); -- cgit From cf364e08ea1c5dd217afb658d510aaef7d0cc6f4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 6 Aug 2021 12:31:08 +0100 Subject: KVM: arm64: Upgrade VMID accesses to {READ,WRITE}_ONCE Since TLB invalidation can run in parallel with VMID allocation, we need to be careful and avoid any sort of load/store tearing. Use {READ,WRITE}_ONCE consistently to avoid any surprise. Cc: Catalin Marinas Cc: Jade Alglave Cc: Shameer Kolothum Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Reviewed-by: Quentin Perret Link: https://lore.kernel.org/r/20210806113109.2475-6-will@kernel.org --- arch/arm64/include/asm/kvm_mmu.h | 7 ++++++- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++-- arch/arm64/kvm/mmu.c | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 08bc81f6944b..02d378887743 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -252,6 +252,11 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) +/* + * When this is (directly or indirectly) used on the TLB invalidation + * path, we rely on a previously issued DSB so that page table updates + * and VMID reads are correctly ordered. + */ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { struct kvm_vmid *vmid = &mmu->vmid; @@ -259,7 +264,7 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; baddr = mmu->pgd_phys; - vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT; return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..658f76067f46 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -571,7 +571,7 @@ static void update_vmid(struct kvm_vmid *vmid) kvm_call_hyp(__kvm_flush_vm_context); } - vmid->vmid = kvm_next_vmid; + WRITE_ONCE(vmid->vmid, kvm_next_vmid); kvm_next_vmid++; kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 36aea13c9e5a..7a0c4af186a0 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -109,8 +109,8 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); mmu->arch = &host_kvm.arch; mmu->pgt = &host_kvm.pgt; - mmu->vmid.vmid_gen = 0; - mmu->vmid.vmid = 0; + WRITE_ONCE(mmu->vmid.vmid_gen, 0); + WRITE_ONCE(mmu->vmid.vmid, 0); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..d9152717cbd9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -485,7 +485,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) mmu->arch = &kvm->arch; mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); - mmu->vmid.vmid_gen = 0; + WRITE_ONCE(mmu->vmid.vmid_gen, 0); return 0; out_destroy_pgtable: -- cgit From 2ea7f655800b00b109951f22539fe2025add210b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:20 +0100 Subject: KVM: arm64: placeholder to check if VM is protected Add a function to check whether a VM is protected (under pKVM). Since the creation of protected VMs isn't enabled yet, this is a placeholder that always returns false. The intention is for this to become a check for protected VMs in the future (see Will's RFC). No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/kvmarm/20210603183347.1695-1-will@kernel.org/ Link: https://lore.kernel.org/r/20210817081134.2918285-2-tabba@google.com --- arch/arm64/include/asm/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 41911585ae0c..347781f99b6a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -771,6 +771,11 @@ void kvm_arch_free_vm(struct kvm *kvm); int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); +static inline bool kvm_vm_is_protected(struct kvm *kvm) +{ + return false; +} + int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); -- cgit From e6bc555c96990046d680ff92c8e2e7b6b43b509f Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:21 +0100 Subject: KVM: arm64: Remove trailing whitespace in comment Remove trailing whitespace from comment in trap_dbgauthstatus_el1(). No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-3-tabba@google.com --- arch/arm64/kvm/sys_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6f126eb6ac1..80a6e41cadad 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -318,14 +318,14 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, /* * We want to avoid world-switching all the DBG registers all the * time: - * + * * - If we've touched any debug register, it is likely that we're * going to touch more of them. It then makes sense to disable the * traps and start doing the save/restore dance * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is * then mandatory to save/restore the registers, as the guest * depends on them. - * + * * For this, we use a DIRTY bit, indicating the guest has modified the * debug registers, used as follow: * -- cgit From d6c850dd6ce9ce4b410142a600d8c34dc041d860 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:22 +0100 Subject: KVM: arm64: MDCR_EL2 is a 64-bit register Fix the places in KVM that treat MDCR_EL2 as a 32-bit register. More recent features (e.g., FEAT_SPEv1p2) use bits above 31. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-4-tabba@google.com --- arch/arm64/include/asm/kvm_arm.h | 20 ++++++++++---------- arch/arm64/include/asm/kvm_asm.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- arch/arm64/kvm/debug.c | 2 +- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 2 +- arch/arm64/kvm/hyp/vhe/debug-sr.c | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index d436831dd706..6a523ec83415 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -281,18 +281,18 @@ /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_E2TB_MASK (UL(0x3)) #define MDCR_EL2_E2TB_SHIFT (UL(24)) -#define MDCR_EL2_TTRF (1 << 19) -#define MDCR_EL2_TPMS (1 << 14) +#define MDCR_EL2_TTRF (UL(1) << 19) +#define MDCR_EL2_TPMS (UL(1) << 14) #define MDCR_EL2_E2PB_MASK (UL(0x3)) #define MDCR_EL2_E2PB_SHIFT (UL(12)) -#define MDCR_EL2_TDRA (1 << 11) -#define MDCR_EL2_TDOSA (1 << 10) -#define MDCR_EL2_TDA (1 << 9) -#define MDCR_EL2_TDE (1 << 8) -#define MDCR_EL2_HPME (1 << 7) -#define MDCR_EL2_TPM (1 << 6) -#define MDCR_EL2_TPMCR (1 << 5) -#define MDCR_EL2_HPMN_MASK (0x1F) +#define MDCR_EL2_TDRA (UL(1) << 11) +#define MDCR_EL2_TDOSA (UL(1) << 10) +#define MDCR_EL2_TDA (UL(1) << 9) +#define MDCR_EL2_TDE (UL(1) << 8) +#define MDCR_EL2_HPME (UL(1) << 7) +#define MDCR_EL2_TPM (UL(1) << 6) +#define MDCR_EL2_TPMCR (UL(1) << 5) +#define MDCR_EL2_HPMN_MASK (UL(0x1F)) /* For compatibility with fault code shared with 32-bit */ #define FSC_FAULT ESR_ELx_FSC_FAULT diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9f0bf2109be7..63ead9060ab5 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -210,7 +210,7 @@ extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); -extern u32 __kvm_get_mdcr_el2(void); +extern u64 __kvm_get_mdcr_el2(void); #define __KVM_EXTABLE(from, to) \ " .pushsection __kvm_ex_table, \"a\"\n" \ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 347781f99b6a..4d2d974c1522 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -289,7 +289,7 @@ struct kvm_vcpu_arch { /* HYP configuration */ u64 hcr_el2; - u32 mdcr_el2; + u64 mdcr_el2; /* Exception Information */ struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index d5e79d7ee6e9..db9361338b2a 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -21,7 +21,7 @@ DBG_MDSCR_KDE | \ DBG_MDSCR_MDE) -static DEFINE_PER_CPU(u32, mdcr_el2); +static DEFINE_PER_CPU(u64, mdcr_el2); /** * save/restore_guest_debug_regs diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7d3f25868cae..df361d839902 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -109,7 +109,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu) __debug_switch_to_host_common(vcpu); } -u32 __kvm_get_mdcr_el2(void) +u64 __kvm_get_mdcr_el2(void) { return read_sysreg(mdcr_el2); } diff --git a/arch/arm64/kvm/hyp/vhe/debug-sr.c b/arch/arm64/kvm/hyp/vhe/debug-sr.c index f1e2e5a00933..289689b2682d 100644 --- a/arch/arm64/kvm/hyp/vhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/vhe/debug-sr.c @@ -20,7 +20,7 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu) __debug_switch_to_host_common(vcpu); } -u32 __kvm_get_mdcr_el2(void) +u64 __kvm_get_mdcr_el2(void) { return read_sysreg(mdcr_el2); } -- cgit From dabb1667d8573302712a75530cccfee8f3ffff84 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:23 +0100 Subject: KVM: arm64: Fix names of config register fields Change the names of hcr_el2 register fields to match the Arm Architecture Reference Manual. Easier for cross-referencing and for grepping. Also, change the name of CPTR_EL2_RES1 to CPTR_NVHE_EL2_RES1, because res1 bits are different for VHE. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-5-tabba@google.com --- arch/arm64/include/asm/kvm_arm.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6a523ec83415..a928b2dc0b0f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -32,9 +32,9 @@ #define HCR_TVM (UL(1) << 26) #define HCR_TTLB (UL(1) << 25) #define HCR_TPU (UL(1) << 24) -#define HCR_TPC (UL(1) << 23) +#define HCR_TPC (UL(1) << 23) /* HCR_TPCP if FEAT_DPB */ #define HCR_TSW (UL(1) << 22) -#define HCR_TAC (UL(1) << 21) +#define HCR_TACR (UL(1) << 21) #define HCR_TIDCP (UL(1) << 20) #define HCR_TSC (UL(1) << 19) #define HCR_TID3 (UL(1) << 18) @@ -61,7 +61,7 @@ * The bits we set in HCR: * TLOR: Trap LORegion register accesses * RW: 64bit by default, can be overridden for 32bit VMs - * TAC: Trap ACTLR + * TACR: Trap ACTLR * TSC: Trap SMC * TSW: Trap cache operations by set/way * TWE: Trap WFE @@ -76,7 +76,7 @@ * PTW: Take a stage2 fault if a stage1 walk steps in device memory */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ - HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_BSU_IS | HCR_FB | HCR_TACR | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) @@ -275,8 +275,8 @@ #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) #define CPTR_EL2_TZ (1 << 8) -#define CPTR_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 */ -#define CPTR_EL2_DEFAULT CPTR_EL2_RES1 +#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ +#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1 /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_E2TB_MASK (UL(0x3)) -- cgit From f76f89e2f73d93720cfcad7fb7b24d022b2846bf Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:24 +0100 Subject: KVM: arm64: Refactor sys_regs.h,c for nVHE reuse Refactor sys_regs.h and sys_regs.c to make it easier to reuse common code. It will be used in nVHE in a later patch. Note that the refactored code uses __inline_bsearch for find_reg instead of bsearch to avoid copying the bsearch code for nVHE. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-6-tabba@google.com --- arch/arm64/include/asm/sysreg.h | 5 ++++ arch/arm64/kvm/sys_regs.c | 60 +++++++++++------------------------------ arch/arm64/kvm/sys_regs.h | 31 +++++++++++++++++++++ 3 files changed, 52 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7b9c3acba684..53a93a9c5253 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1153,6 +1153,11 @@ #define ICH_VTR_A3V_SHIFT 21 #define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) +#define ARM64_FEATURE_FIELD_BITS 4 + +/* Create a mask for the feature bits of the specified feature. */ +#define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT)) + #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 80a6e41cadad..b6a2f8e890db 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -44,10 +44,6 @@ * 64bit interface. */ -#define reg_to_encoding(x) \ - sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ - (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) - static bool read_from_write_only(struct kvm_vcpu *vcpu, struct sys_reg_params *params, const struct sys_reg_desc *r) @@ -1026,8 +1022,6 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } -#define FEATURE(x) (GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT)) - /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) @@ -1038,40 +1032,40 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, switch (id) { case SYS_ID_AA64PFR0_EL1: if (!vcpu_has_sve(vcpu)) - val &= ~FEATURE(ID_AA64PFR0_SVE); - val &= ~FEATURE(ID_AA64PFR0_AMU); - val &= ~FEATURE(ID_AA64PFR0_CSV2); - val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); - val &= ~FEATURE(ID_AA64PFR0_CSV3); - val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); break; case SYS_ID_AA64PFR1_EL1: - val &= ~FEATURE(ID_AA64PFR1_MTE); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); if (kvm_has_mte(vcpu->kvm)) { u64 pfr, mte; pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); - val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte); } break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) - val &= ~(FEATURE(ID_AA64ISAR1_APA) | - FEATURE(ID_AA64ISAR1_API) | - FEATURE(ID_AA64ISAR1_GPA) | - FEATURE(ID_AA64ISAR1_GPI)); + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ - val &= ~FEATURE(ID_AA64DFR0_DEBUGVER); - val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6); /* Limit guests to PMUv3 for ARMv8.4 */ val = cpuid_feature_cap_perfmon_field(val, ID_AA64DFR0_PMUVER_SHIFT, kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0); /* Hide SPE from guests */ - val &= ~FEATURE(ID_AA64DFR0_PMSVER); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER); break; case SYS_ID_DFR0_EL1: /* Limit guests to PMUv3 for ARMv8.4 */ @@ -2106,23 +2100,6 @@ static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, return 0; } -static int match_sys_reg(const void *key, const void *elt) -{ - const unsigned long pval = (unsigned long)key; - const struct sys_reg_desc *r = elt; - - return pval - reg_to_encoding(r); -} - -static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, - const struct sys_reg_desc table[], - unsigned int num) -{ - unsigned long pval = reg_to_encoding(params); - - return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); -} - int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); @@ -2365,13 +2342,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); - params.Op0 = (esr >> 20) & 3; - params.Op1 = (esr >> 14) & 0x7; - params.CRn = (esr >> 10) & 0xf; - params.CRm = (esr >> 1) & 0xf; - params.Op2 = (esr >> 17) & 0x7; + params = esr_sys64_to_params(esr); params.regval = vcpu_get_reg(vcpu, Rt); - params.is_write = !(esr & 1); ret = emulate_sys_reg(vcpu, ¶ms); diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 9d0621417c2a..cc0cc95a0280 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -11,6 +11,12 @@ #ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__ #define __ARM64_KVM_SYS_REGS_LOCAL_H__ +#include + +#define reg_to_encoding(x) \ + sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) + struct sys_reg_params { u8 Op0; u8 Op1; @@ -21,6 +27,14 @@ struct sys_reg_params { bool is_write; }; +#define esr_sys64_to_params(esr) \ + ((struct sys_reg_params){ .Op0 = ((esr) >> 20) & 3, \ + .Op1 = ((esr) >> 14) & 0x7, \ + .CRn = ((esr) >> 10) & 0xf, \ + .CRm = ((esr) >> 1) & 0xf, \ + .Op2 = ((esr) >> 17) & 0x7, \ + .is_write = !((esr) & 1) }) + struct sys_reg_desc { /* Sysreg string for debug */ const char *name; @@ -152,6 +166,23 @@ static inline int cmp_sys_reg(const struct sys_reg_desc *i1, return i1->Op2 - i2->Op2; } +static inline int match_sys_reg(const void *key, const void *elt) +{ + const unsigned long pval = (unsigned long)key; + const struct sys_reg_desc *r = elt; + + return pval - reg_to_encoding(r); +} + +static inline const struct sys_reg_desc * +find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[], + unsigned int num) +{ + unsigned long pval = reg_to_encoding(params); + + return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); +} + const struct sys_reg_desc *find_reg_by_id(u64 id, struct sys_reg_params *params, const struct sys_reg_desc table[], -- cgit From 1460b4b25fde52cbee746c11a4b1d3185f2e2847 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:25 +0100 Subject: KVM: arm64: Restore mdcr_el2 from vcpu On deactivating traps, restore the value of mdcr_el2 from the newly created and preserved host value vcpu context, rather than directly reading the hardware register. Up until and including this patch the two values are the same, i.e., the hardware register and the vcpu one. A future patch will be changing the value of mdcr_el2 on activating traps, and this ensures that its value will be restored. No functional change intended. Signed-off-by: Fuad Tabba Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-7-tabba@google.com --- arch/arm64/include/asm/kvm_host.h | 5 ++++- arch/arm64/include/asm/kvm_hyp.h | 2 +- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 +++++- arch/arm64/kvm/hyp/nvhe/switch.c | 13 +++++-------- arch/arm64/kvm/hyp/vhe/switch.c | 14 +++++--------- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 2 +- 6 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 4d2d974c1522..76462c6a91ee 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -287,10 +287,13 @@ struct kvm_vcpu_arch { /* Stage 2 paging state used by the hardware on next switch */ struct kvm_s2_mmu *hw_mmu; - /* HYP configuration */ + /* Values of trap registers for the guest. */ u64 hcr_el2; u64 mdcr_el2; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2_host; + /* Exception Information */ struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 9d60b3006efc..657d0c94cf82 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -95,7 +95,7 @@ void __sve_restore_state(void *sve_pffr, u32 *fpsr); #ifndef __KVM_NVHE_HYPERVISOR__ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); -void deactivate_traps_vhe_put(void); +void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu); #endif u64 __guest_enter(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e4a2f295a394..a0e78a6027be 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -92,11 +92,15 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); } + + vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); } -static inline void __deactivate_traps_common(void) +static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) write_sysreg(0, pmuserenr_el0); diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f7af9688c1f7..2ea764a48958 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -69,12 +69,10 @@ static void __activate_traps(struct kvm_vcpu *vcpu) static void __deactivate_traps(struct kvm_vcpu *vcpu) { extern char __kvm_hyp_host_vector[]; - u64 mdcr_el2, cptr; + u64 cptr; ___deactivate_traps(vcpu); - mdcr_el2 = read_sysreg(mdcr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; @@ -92,13 +90,12 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) isb(); } - __deactivate_traps_common(); + vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; - mdcr_el2 &= MDCR_EL2_HPMN_MASK; - mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; - mdcr_el2 |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; + __deactivate_traps_common(vcpu); - write_sysreg(mdcr_el2, mdcr_el2); write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); cptr = CPTR_EL2_DEFAULT; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b3229924d243..ec158fa41ae6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -91,17 +91,13 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) __activate_traps_common(vcpu); } -void deactivate_traps_vhe_put(void) +void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) { - u64 mdcr_el2 = read_sysreg(mdcr_el2); + vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; - mdcr_el2 &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - - write_sysreg(mdcr_el2, mdcr_el2); - - __deactivate_traps_common(); + __deactivate_traps_common(vcpu); } /* Switch to the guest for VHE systems running in EL2 */ diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 2a0b8c88d74f..007a12dd4351 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -101,7 +101,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *host_ctxt; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; - deactivate_traps_vhe_put(); + deactivate_traps_vhe_put(vcpu); __sysreg_save_el1_state(guest_ctxt); __sysreg_save_user_state(guest_ctxt); -- cgit From 12849badc6d2456f15f8f2c93037628d5176810b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:26 +0100 Subject: KVM: arm64: Keep mdcr_el2's value as set by __init_el2_debug __init_el2_debug configures mdcr_el2 at initialization based on, among other things, available hardware support. Trap deactivation doesn't check that, so keep the initial value. No functional change intended. Signed-off-by: Fuad Tabba Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-8-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ---- arch/arm64/kvm/hyp/vhe/switch.c | 4 ---- 2 files changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 2ea764a48958..1778593a08a9 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -90,10 +90,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) isb(); } - vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT; - __deactivate_traps_common(vcpu); write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index ec158fa41ae6..0d0c9550fb08 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -93,10 +93,6 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) { - vcpu->arch.mdcr_el2_host &= MDCR_EL2_HPMN_MASK | - MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | - MDCR_EL2_TPMS; - __deactivate_traps_common(vcpu); } -- cgit From cd496228fd8de2e82b6636d3d89105631ea2b69c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:27 +0100 Subject: KVM: arm64: Track value of cptr_el2 in struct kvm_vcpu_arch Track the baseline guest value for cptr_el2 in struct kvm_vcpu_arch, similar to the other registers that control traps. Use this value when setting cptr_el2 for the guest. Currently this value is unchanged (CPTR_EL2_DEFAULT), but future patches will set trapping bits based on features supported for the guest. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-9-tabba@google.com --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 76462c6a91ee..ac67d5699c68 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -290,6 +290,7 @@ struct kvm_vcpu_arch { /* Values of trap registers for the guest. */ u64 hcr_el2; u64 mdcr_el2; + u64 cptr_el2; /* Values of trap registers for the host before guest entry. */ u64 mdcr_el2_host; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..14b12f2c08c0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1104,6 +1104,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, } vcpu_reset_hcr(vcpu); + vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT; /* * Handle the "start in power-off" case. diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 1778593a08a9..86f3d6482935 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -41,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ___activate_traps(vcpu); __activate_traps_common(vcpu); - val = CPTR_EL2_DEFAULT; + val = vcpu->arch.cptr_el2; val |= CPTR_EL2_TTA | CPTR_EL2_TAM; if (!update_fp_enabled(vcpu)) { val |= CPTR_EL2_TFP | CPTR_EL2_TZ; -- cgit From 95b54c3e4c92b9185b15c83e8baab9ba312195f6 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:28 +0100 Subject: KVM: arm64: Add feature register flag definitions Add feature register flag definitions to clarify which features might be supported. Consolidate the various ID_AA64PFR0_ELx flags for all ELs. No functional change intended. Signed-off-by: Fuad Tabba Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-10-tabba@google.com --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/include/asm/sysreg.h | 12 ++++++++---- arch/arm64/kernel/cpufeature.c | 8 ++++---- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9bb9d11750d7..b7d9bb17908d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -602,14 +602,14 @@ static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT); - return val == ID_AA64PFR0_EL1_32BIT_64BIT; + return val == ID_AA64PFR0_ELx_32BIT_64BIT; } static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); - return val == ID_AA64PFR0_EL0_32BIT_64BIT; + return val == ID_AA64PFR0_ELx_32BIT_64BIT; } static inline bool id_aa64pfr0_sve(u64 pfr0) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 53a93a9c5253..f84a00f5874d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -784,14 +784,13 @@ #define ID_AA64PFR0_AMU 0x1 #define ID_AA64PFR0_SVE 0x1 #define ID_AA64PFR0_RAS_V1 0x1 +#define ID_AA64PFR0_RAS_V1P1 0x2 #define ID_AA64PFR0_FP_NI 0xf #define ID_AA64PFR0_FP_SUPPORTED 0x0 #define ID_AA64PFR0_ASIMD_NI 0xf #define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 -#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 -#define ID_AA64PFR0_EL1_32BIT_64BIT 0x2 -#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 -#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 +#define ID_AA64PFR0_ELx_64BIT_ONLY 0x1 +#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2 /* id_aa64pfr1 */ #define ID_AA64PFR1_MPAMFRAC_SHIFT 16 @@ -847,12 +846,16 @@ #define ID_AA64MMFR0_ASID_SHIFT 4 #define ID_AA64MMFR0_PARANGE_SHIFT 0 +#define ID_AA64MMFR0_ASID_8 0x0 +#define ID_AA64MMFR0_ASID_16 0x2 + #define ID_AA64MMFR0_TGRAN4_NI 0xf #define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 #define ID_AA64MMFR0_TGRAN64_NI 0xf #define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 #define ID_AA64MMFR0_TGRAN16_NI 0x0 #define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_AA64MMFR0_PARANGE_40 0x2 #define ID_AA64MMFR0_PARANGE_48 0x5 #define ID_AA64MMFR0_PARANGE_52 0x6 @@ -900,6 +903,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_MTPMU_SHIFT 48 #define ID_AA64DFR0_TRBE_SHIFT 44 #define ID_AA64DFR0_TRACE_FILT_SHIFT 40 #define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0ead8bfedf20..5b59fe5e26e4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -239,8 +239,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY), ARM64_FTR_END, }; @@ -1956,7 +1956,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, - .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, + .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM { @@ -1967,7 +1967,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SHIFT, - .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, + .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, { .desc = "Protected KVM", -- cgit From 2d701243b9f231b5d7f9a8cb81870650d3eb32bc Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 17 Aug 2021 09:11:29 +0100 Subject: KVM: arm64: Add config register bit definitions Add hardware configuration register bit definitions for HCR_EL2 and MDCR_EL2. Future patches toggle these hyp configuration register bits to trap on certain accesses. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210817081134.2918285-11-tabba@google.com --- arch/arm64/include/asm/kvm_arm.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index a928b2dc0b0f..327120c0089f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,8 +12,13 @@ #include /* Hyp Configuration Register (HCR) bits */ + +#define HCR_TID5 (UL(1) << 58) +#define HCR_DCT (UL(1) << 57) #define HCR_ATA_SHIFT 56 #define HCR_ATA (UL(1) << HCR_ATA_SHIFT) +#define HCR_AMVOFFEN (UL(1) << 51) +#define HCR_FIEN (UL(1) << 47) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) @@ -56,6 +61,7 @@ #define HCR_PTW (UL(1) << 2) #define HCR_SWIO (UL(1) << 1) #define HCR_VM (UL(1) << 0) +#define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39)) /* * The bits we set in HCR: @@ -277,11 +283,21 @@ #define CPTR_EL2_TZ (1 << 8) #define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */ #define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1 +#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \ + GENMASK(29, 21) | \ + GENMASK(19, 14) | \ + BIT(11)) /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_E2TB_MASK (UL(0x3)) #define MDCR_EL2_E2TB_SHIFT (UL(24)) +#define MDCR_EL2_HPMFZS (UL(1) << 36) +#define MDCR_EL2_HPMFZO (UL(1) << 29) +#define MDCR_EL2_MTPME (UL(1) << 28) +#define MDCR_EL2_TDCC (UL(1) << 27) +#define MDCR_EL2_HCCD (UL(1) << 23) #define MDCR_EL2_TTRF (UL(1) << 19) +#define MDCR_EL2_HPMD (UL(1) << 17) #define MDCR_EL2_TPMS (UL(1) << 14) #define MDCR_EL2_E2PB_MASK (UL(0x3)) #define MDCR_EL2_E2PB_SHIFT (UL(12)) @@ -293,6 +309,12 @@ #define MDCR_EL2_TPM (UL(1) << 6) #define MDCR_EL2_TPMCR (UL(1) << 5) #define MDCR_EL2_HPMN_MASK (UL(0x1F)) +#define MDCR_EL2_RES0 (GENMASK(63, 37) | \ + GENMASK(35, 30) | \ + GENMASK(25, 24) | \ + GENMASK(22, 20) | \ + BIT(18) | \ + GENMASK(16, 15)) /* For compatibility with fault code shared with 32-bit */ #define FSC_FAULT ESR_ELx_FSC_FAULT -- cgit From 411d63d8c64c2f3b0c497fe4658f13b3bca951e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 20 Aug 2021 10:46:42 +0100 Subject: KVM: arm64: Upgrade trace_kvm_arm_set_dreg32() to 64bit A number of registers pased to trace_kvm_arm_set_dreg32() are actually 64bit. Upgrade the tracepoint to take a 64bit value, despite the name... Signed-off-by: Marc Zyngier --- arch/arm64/kvm/trace_handle_exit.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h index 8d78acc4fba7..064a58c19f48 100644 --- a/arch/arm64/kvm/trace_handle_exit.h +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -78,13 +78,17 @@ TRACE_EVENT(kvm_arm_clear_debug, TP_printk("flags: 0x%08x", __entry->guest_debug) ); +/* + * The dreg32 name is a leftover from a distant past. This will really + * output a 64bit value... + */ TRACE_EVENT(kvm_arm_set_dreg32, - TP_PROTO(const char *name, __u32 value), + TP_PROTO(const char *name, __u64 value), TP_ARGS(name, value), TP_STRUCT__entry( __field(const char *, name) - __field(__u32, value) + __field(__u64, value) ), TP_fast_assign( @@ -92,7 +96,7 @@ TRACE_EVENT(kvm_arm_set_dreg32, __entry->value = value; ), - TP_printk("%s: 0x%08x", __entry->name, __entry->value) + TP_printk("%s: 0x%llx", __entry->name, __entry->value) ); TRACE_DEFINE_SIZEOF(__u64); -- cgit From 344179fc7ef427910de438affbf3703fed51fe5a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:19 +0100 Subject: ARM: 9106/1: traps: use get_kernel_nofault instead of set_fs() ARM uses set_fs() and __get_user() to allow the stack dumping code to access possibly invalid pointers carefully. These can be changed to the simpler get_kernel_nofault(), and allow the eventual removal of set_fs(). dump_instr() will print either kernel or user space pointers, depending on how it was called. For dump_mem(), I assume we are only interested in kernel pointers, and the only time that this is called with user_mode(regs)==true is when the regs themselves are unreliable as a result of the condition that caused the trap. Reviewed-by: Christoph Hellwig Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/traps.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 64308e3a5d0c..10dd3ef1f398 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -122,17 +122,8 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, unsigned long top) { unsigned long first; - mm_segment_t fs; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - printk("%s%s(0x%08lx to 0x%08lx)\n", lvl, str, bottom, top); for (first = bottom & ~31; first < top; first += 32) { @@ -145,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { if (p >= bottom && p < top) { unsigned long val; - if (__get_user(val, (unsigned long *)p) == 0) + if (get_kernel_nofault(val, (unsigned long *)p)) sprintf(str + i * 9, " %08lx", val); else sprintf(str + i * 9, " ????????"); @@ -153,11 +144,9 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, } printk("%s%04lx:%s\n", lvl, first & 0xffff, str); } - - set_fs(fs); } -static void __dump_instr(const char *lvl, struct pt_regs *regs) +static void dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); const int thumb = thumb_mode(regs); @@ -173,10 +162,20 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs) for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; - if (thumb) - bad = get_user(val, &((u16 *)addr)[i]); - else - bad = get_user(val, &((u32 *)addr)[i]); + if (!user_mode(regs)) { + if (thumb) { + u16 val16; + bad = get_kernel_nofault(val16, &((u16 *)addr)[i]); + val = val16; + } else { + bad = get_kernel_nofault(val, &((u32 *)addr)[i]); + } + } else { + if (thumb) + bad = get_user(val, &((u16 *)addr)[i]); + else + bad = get_user(val, &((u32 *)addr)[i]); + } if (!bad) p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ", @@ -189,20 +188,6 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs) printk("%sCode: %s\n", lvl, str); } -static void dump_instr(const char *lvl, struct pt_regs *regs) -{ - mm_segment_t fs; - - if (!user_mode(regs)) { - fs = get_fs(); - set_fs(KERNEL_DS); - __dump_instr(lvl, regs); - set_fs(fs); - } else { - __dump_instr(lvl, regs); - } -} - #ifdef CONFIG_ARM_UNWIND static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, const char *loglvl) -- cgit From b6e47f3c11c17965acb2a12001af3b1cd5658f37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:20 +0100 Subject: ARM: 9109/1: oabi-compat: add epoll_pwait handler The epoll_wait() syscall has a special version for OABI compat mode to convert the arguments to the EABI structure layout of the kernel. However, the later epoll_pwait() syscall was added in arch/arm in linux-2.6.32 without this conversion. Use the same kind of handler for both. Fixes: 369842658a36 ("ARM: 5677/1: ARM support for TIF_RESTORE_SIGMASK/pselect6/ppoll/epoll_pwait") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/sys_oabi-compat.c | 38 +++++++++++++++++++++++++++++++++++--- arch/arm/tools/syscall.tbl | 2 +- 2 files changed, 36 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 075a2e0ed2c1..443203fafb6b 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -265,9 +265,8 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, return do_epoll_ctl(epfd, op, fd, &kernel, false); } -asmlinkage long sys_oabi_epoll_wait(int epfd, - struct oabi_epoll_event __user *events, - int maxevents, int timeout) +static long do_oabi_epoll_wait(int epfd, struct oabi_epoll_event __user *events, + int maxevents, int timeout) { struct epoll_event *kbuf; struct oabi_epoll_event e; @@ -314,6 +313,39 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, } #endif +SYSCALL_DEFINE4(oabi_epoll_wait, int, epfd, + struct oabi_epoll_event __user *, events, + int, maxevents, int, timeout) +{ + return do_oabi_epoll_wait(epfd, events, maxevents, timeout); +} + +/* + * Implement the event wait interface for the eventpoll file. It is the kernel + * part of the user space epoll_pwait(2). + */ +SYSCALL_DEFINE6(oabi_epoll_pwait, int, epfd, + struct oabi_epoll_event __user *, events, int, maxevents, + int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) +{ + int error; + + /* + * If the caller wants a certain signal mask to be set during the wait, + * we apply it here. + */ + error = set_user_sigmask(sigmask, sigsetsize); + if (error) + return error; + + error = do_oabi_epoll_wait(epfd, events, maxevents, timeout); + restore_saved_sigmask_unless(error == -EINTR); + + return error; +} +#endif + struct oabi_sembuf { unsigned short sem_num; short sem_op; diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c5df1179fc5d..11d0b960b2c2 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -360,7 +360,7 @@ 343 common vmsplice sys_vmsplice 344 common move_pages sys_move_pages 345 common getcpu sys_getcpu -346 common epoll_pwait sys_epoll_pwait +346 common epoll_pwait sys_epoll_pwait sys_oabi_epoll_pwait 347 common kexec_load sys_kexec_load 348 common utimensat sys_utimensat_time32 349 common signalfd sys_signalfd -- cgit From 4e57a4ddf6b0d9cce1cf2ffd153df1ad3c2c9cc2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:21 +0100 Subject: ARM: 9107/1: syscall: always store thread_info->abi_syscall The system call number is used in a a couple of places, in particular ptrace, seccomp and /proc//syscall. The last one apparently never worked reliably on ARM for tasks that are not currently getting traced. Storing the syscall number in the normal entry path makes it work, as well as allowing us to see if the current system call is for OABI compat mode, which is the next thing I want to hook into. Since the thread_info->syscall field is not just the number any more, it is now renamed to abi_syscall. In kernels that enable both OABI and EABI, the upper bits of this field encode 0x900000 (__NR_OABI_SYSCALL_BASE) for OABI tasks, while normal EABI tasks do not set the upper bits. This makes it possible to implement the in_oabi_syscall() helper later. All other users of thread_info->syscall go through the syscall_get_nr() helper, which in turn filters out the ABI bits. Note that the ABI information is lost with PTRACE_SET_SYSCALL, so one cannot set the internal number to a particular version, but this was already the case. We could change it to let gdb encode the ABI type along with the syscall in a CONFIG_OABI_COMPAT-enabled kernel, but that itself would be a (backwards-compatible) ABI change, so I don't do it here. Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/syscall.h | 5 ++++- arch/arm/include/asm/thread_info.h | 2 +- arch/arm/include/uapi/asm/unistd.h | 1 + arch/arm/kernel/asm-offsets.c | 1 + arch/arm/kernel/entry-common.S | 8 ++++++-- arch/arm/kernel/ptrace.c | 14 ++++++++------ 6 files changed, 21 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index fd02761ba06c..f055e846a5cc 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -22,7 +22,10 @@ extern const unsigned long sys_call_table[]; static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return task_thread_info(task)->syscall; + if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT)) + return task_thread_info(task)->abi_syscall; + + return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK; } static inline void syscall_rollback(struct task_struct *task, diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 70d4cbc49ae1..17c56051747b 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -62,7 +62,7 @@ struct thread_info { unsigned long stack_canary; #endif struct cpu_context_save cpu_context; /* cpu context */ - __u32 syscall; /* syscall number */ + __u32 abi_syscall; /* ABI type and syscall nr */ __u8 used_cp[16]; /* thread used copro */ unsigned long tp_value[2]; /* TLS registers */ #ifdef CONFIG_CRUNCH diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index ae7749e15726..a1149911464c 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -15,6 +15,7 @@ #define _UAPI__ASM_ARM_UNISTD_H #define __NR_OABI_SYSCALL_BASE 0x900000 +#define __NR_SYSCALL_MASK 0x0fffff #if defined(__thumb__) || defined(__ARM_EABI__) #define __NR_SYSCALL_BASE 0 diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 70993af22d80..a0945b898ca3 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -48,6 +48,7 @@ int main(void) DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); + DEFINE(TI_ABI_SYSCALL, offsetof(struct thread_info, abi_syscall)); DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp)); DEFINE(TI_TP_VALUE, offsetof(struct thread_info, tp_value)); DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate)); diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7f0b7aba1498..e837af90cd44 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -226,6 +226,7 @@ ENTRY(vector_swi) /* saved_psr and saved_pc are now dead */ uaccess_disable tbl + get_thread_info tsk adr tbl, sys_call_table @ load syscall table pointer @@ -237,13 +238,17 @@ ENTRY(vector_swi) * get the old ABI syscall table address. */ bics r10, r10, #0xff000000 + strne r10, [tsk, #TI_ABI_SYSCALL] + streq scno, [tsk, #TI_ABI_SYSCALL] eorne scno, r10, #__NR_OABI_SYSCALL_BASE ldrne tbl, =sys_oabi_call_table #elif !defined(CONFIG_AEABI) bic scno, scno, #0xff000000 @ mask off SWI op-code + str scno, [tsk, #TI_ABI_SYSCALL] eor scno, scno, #__NR_SYSCALL_BASE @ check OS number +#else + str scno, [tsk, #TI_ABI_SYSCALL] #endif - get_thread_info tsk /* * Reload the registers that may have been corrupted on entry to * the syscall assembly (by tracing or context tracking.) @@ -288,7 +293,6 @@ ENDPROC(vector_swi) * context switches, and waiting for our parent to respond. */ __sys_trace: - mov r1, scno add r0, sp, #S_OFF bl syscall_trace_enter mov scno, r0 diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2771e682220b..d886ea8910cb 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -25,6 +25,7 @@ #include #include +#include #include #define CREATE_TRACE_POINTS @@ -811,7 +812,8 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_SET_SYSCALL: - task_thread_info(child)->syscall = data; + task_thread_info(child)->abi_syscall = data & + __NR_SYSCALL_MASK; ret = 0; break; @@ -880,14 +882,14 @@ static void tracehook_report_syscall(struct pt_regs *regs, if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) - current_thread_info()->syscall = -1; + current_thread_info()->abi_syscall = -1; regs->ARM_ip = ip; } -asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) +asmlinkage int syscall_trace_enter(struct pt_regs *regs) { - current_thread_info()->syscall = scno; + int scno; if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); @@ -898,11 +900,11 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) return -1; #else /* XXX: remove this once OABI gets fixed */ - secure_computing_strict(current_thread_info()->syscall); + secure_computing_strict(syscall_get_nr(current, regs)); #endif /* Tracer or seccomp may have changed syscall. */ - scno = current_thread_info()->syscall; + scno = syscall_get_nr(current, regs); if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, scno); -- cgit From 249dbe74d3c4b568a623fb55c56cddf19fdf0b89 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:22 +0100 Subject: ARM: 9108/1: oabi-compat: rework epoll_wait/epoll_pwait emulation The epoll_wait() system call wrapper is one of the remaining users of the set_fs() infrasturcture for Arm. Changing it to not require set_fs() is rather complex unfortunately. The approach I'm taking here is to allow architectures to override the code that copies the output to user space, and let the oabi-compat implementation check whether it is getting called from an EABI or OABI system call based on the thread_info->syscall value. The in_oabi_syscall() check here mirrors the in_compat_syscall() and in_x32_syscall() helpers for 32-bit compat implementations on other architectures. Overall, the amount of code goes down, at least with the newly added sys_oabi_epoll_pwait() helper getting removed again. The downside is added complexity in the source code for the native implementation. There should be no difference in runtime performance except for Arm kernels with CONFIG_OABI_COMPAT enabled that now have to go through an external function call to check which of the two variants to use. Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/syscall.h | 11 ++++++ arch/arm/kernel/sys_oabi-compat.c | 83 ++++++++------------------------------- arch/arm/tools/syscall.tbl | 4 +- 3 files changed, 29 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index f055e846a5cc..24c19d63ff0a 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -28,6 +28,17 @@ static inline int syscall_get_nr(struct task_struct *task, return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK; } +static inline bool __in_oabi_syscall(struct task_struct *task) +{ + return IS_ENABLED(CONFIG_OABI_COMPAT) && + (task_thread_info(task)->abi_syscall & __NR_OABI_SYSCALL_BASE); +} + +static inline bool in_oabi_syscall(void) +{ + return __in_oabi_syscall(current); +} + static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 443203fafb6b..1f6a433200f1 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -83,6 +83,8 @@ #include #include +#include + struct oldabi_stat64 { unsigned long long st_dev; unsigned int __pad1; @@ -264,87 +266,34 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, return do_epoll_ctl(epfd, op, fd, &kernel, false); } - -static long do_oabi_epoll_wait(int epfd, struct oabi_epoll_event __user *events, - int maxevents, int timeout) -{ - struct epoll_event *kbuf; - struct oabi_epoll_event e; - mm_segment_t fs; - long ret, err, i; - - if (maxevents <= 0 || - maxevents > (INT_MAX/sizeof(*kbuf)) || - maxevents > (INT_MAX/sizeof(*events))) - return -EINVAL; - if (!access_ok(events, sizeof(*events) * maxevents)) - return -EFAULT; - kbuf = kmalloc_array(maxevents, sizeof(*kbuf), GFP_KERNEL); - if (!kbuf) - return -ENOMEM; - fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout); - set_fs(fs); - err = 0; - for (i = 0; i < ret; i++) { - e.events = kbuf[i].events; - e.data = kbuf[i].data; - err = __copy_to_user(events, &e, sizeof(e)); - if (err) - break; - events++; - } - kfree(kbuf); - return err ? -EFAULT : ret; -} #else asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, struct oabi_epoll_event __user *event) { return -EINVAL; } - -asmlinkage long sys_oabi_epoll_wait(int epfd, - struct oabi_epoll_event __user *events, - int maxevents, int timeout) -{ - return -EINVAL; -} #endif -SYSCALL_DEFINE4(oabi_epoll_wait, int, epfd, - struct oabi_epoll_event __user *, events, - int, maxevents, int, timeout) +struct epoll_event __user * +epoll_put_uevent(__poll_t revents, __u64 data, + struct epoll_event __user *uevent) { - return do_oabi_epoll_wait(epfd, events, maxevents, timeout); -} + if (in_oabi_syscall()) { + struct oabi_epoll_event __user *oevent = (void __user *)uevent; -/* - * Implement the event wait interface for the eventpoll file. It is the kernel - * part of the user space epoll_pwait(2). - */ -SYSCALL_DEFINE6(oabi_epoll_pwait, int, epfd, - struct oabi_epoll_event __user *, events, int, maxevents, - int, timeout, const sigset_t __user *, sigmask, - size_t, sigsetsize) -{ - int error; + if (__put_user(revents, &oevent->events) || + __put_user(data, &oevent->data)) + return NULL; - /* - * If the caller wants a certain signal mask to be set during the wait, - * we apply it here. - */ - error = set_user_sigmask(sigmask, sigsetsize); - if (error) - return error; + return (void __user *)(oevent+1); + } - error = do_oabi_epoll_wait(epfd, events, maxevents, timeout); - restore_saved_sigmask_unless(error == -EINTR); + if (__put_user(revents, &uevent->events) || + __put_user(data, &uevent->data)) + return NULL; - return error; + return uevent+1; } -#endif struct oabi_sembuf { unsigned short sem_num; diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 11d0b960b2c2..344424a9611f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -266,7 +266,7 @@ 249 common lookup_dcookie sys_lookup_dcookie 250 common epoll_create sys_epoll_create 251 common epoll_ctl sys_epoll_ctl sys_oabi_epoll_ctl -252 common epoll_wait sys_epoll_wait sys_oabi_epoll_wait +252 common epoll_wait sys_epoll_wait 253 common remap_file_pages sys_remap_file_pages # 254 for set_thread_area # 255 for get_thread_area @@ -360,7 +360,7 @@ 343 common vmsplice sys_vmsplice 344 common move_pages sys_move_pages 345 common getcpu sys_getcpu -346 common epoll_pwait sys_epoll_pwait sys_oabi_epoll_pwait +346 common epoll_pwait sys_epoll_pwait 347 common kexec_load sys_kexec_load 348 common utimensat sys_utimensat_time32 349 common signalfd sys_signalfd -- cgit From bdec0145286f7e6be9b3134aa35f0f335fa27c38 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:23 +0100 Subject: ARM: 9114/1: oabi-compat: rework sys_semtimedop emulation sys_oabi_semtimedop() is one of the last users of set_fs() on Arm. To remove this one, expose the internal code of the actual implementation that operates on a kernel pointer and call it directly after copying. There should be no measurable impact on the normal execution of this function, and it makes the overly long function a little shorter, which may help readability. While reworking the oabi version, make it behave a little more like the native one, using kvmalloc_array() and restructure the code flow in a similar way. The naming of __do_semtimedop() is not very good, I hope someone can come up with a better name. One regression was spotted by kernel test robot and fixed before the first mailing list submission. Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/sys_oabi-compat.c | 60 ++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 1f6a433200f1..5ea365c35ca5 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include @@ -302,46 +303,52 @@ struct oabi_sembuf { unsigned short __pad; }; +#define sc_semopm sem_ctls[2] + +#ifdef CONFIG_SYSVIPC asmlinkage long sys_oabi_semtimedop(int semid, struct oabi_sembuf __user *tsops, unsigned nsops, const struct old_timespec32 __user *timeout) { + struct ipc_namespace *ns; struct sembuf *sops; - struct old_timespec32 local_timeout; long err; int i; + ns = current->nsproxy->ipc_ns; + if (nsops > ns->sc_semopm) + return -E2BIG; if (nsops < 1 || nsops > SEMOPM) return -EINVAL; - if (!access_ok(tsops, sizeof(*tsops) * nsops)) - return -EFAULT; - sops = kmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); + sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); if (!sops) return -ENOMEM; err = 0; for (i = 0; i < nsops; i++) { struct oabi_sembuf osb; - err |= __copy_from_user(&osb, tsops, sizeof(osb)); + err |= copy_from_user(&osb, tsops, sizeof(osb)); sops[i].sem_num = osb.sem_num; sops[i].sem_op = osb.sem_op; sops[i].sem_flg = osb.sem_flg; tsops++; } - if (timeout) { - /* copy this as well before changing domain protection */ - err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout)); - timeout = &local_timeout; - } if (err) { err = -EFAULT; - } else { - mm_segment_t fs = get_fs(); - set_fs(KERNEL_DS); - err = sys_semtimedop_time32(semid, sops, nsops, timeout); - set_fs(fs); + goto out; + } + + if (timeout) { + struct timespec64 ts; + err = get_old_timespec32(&ts, timeout); + if (err) + goto out; + err = __do_semtimedop(semid, sops, nsops, &ts, ns); + goto out; } - kfree(sops); + err = __do_semtimedop(semid, sops, nsops, NULL, ns); +out: + kvfree(sops); return err; } @@ -368,6 +375,27 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, return sys_ipc(call, first, second, third, ptr, fifth); } } +#else +asmlinkage long sys_oabi_semtimedop(int semid, + struct oabi_sembuf __user *tsops, + unsigned nsops, + const struct old_timespec32 __user *timeout) +{ + return -ENOSYS; +} + +asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops, + unsigned nsops) +{ + return -ENOSYS; +} + +asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third, + void __user *ptr, long fifth) +{ + return -ENOSYS; +} +#endif asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen) { -- cgit From 7e2d8c29ecdd86afbcedb9d9a977bab8af527add Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:24 +0100 Subject: ARM: 9111/1: oabi-compat: rework fcntl64() emulation This is one of the last users of get_fs(), and this is fairly easy to change, since the infrastructure for it is already there. The replacement here is essentially a copy of the existing fcntl64() syscall entry function. Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/sys_oabi-compat.c | 93 +++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 5ea365c35ca5..223ee46b6e75 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -194,56 +194,83 @@ struct oabi_flock64 { pid_t l_pid; } __attribute__ ((packed,aligned(4))); -static long do_locks(unsigned int fd, unsigned int cmd, - unsigned long arg) +static int get_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg) { - struct flock64 kernel; struct oabi_flock64 user; - mm_segment_t fs; - long ret; if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, sizeof(user))) return -EFAULT; - kernel.l_type = user.l_type; - kernel.l_whence = user.l_whence; - kernel.l_start = user.l_start; - kernel.l_len = user.l_len; - kernel.l_pid = user.l_pid; - - fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel); - set_fs(fs); - - if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) { - user.l_type = kernel.l_type; - user.l_whence = kernel.l_whence; - user.l_start = kernel.l_start; - user.l_len = kernel.l_len; - user.l_pid = kernel.l_pid; - if (copy_to_user((struct oabi_flock64 __user *)arg, - &user, sizeof(user))) - ret = -EFAULT; - } - return ret; + + kernel->l_type = user.l_type; + kernel->l_whence = user.l_whence; + kernel->l_start = user.l_start; + kernel->l_len = user.l_len; + kernel->l_pid = user.l_pid; + + return 0; +} + +static int put_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg) +{ + struct oabi_flock64 user; + + user.l_type = kernel->l_type; + user.l_whence = kernel->l_whence; + user.l_start = kernel->l_start; + user.l_len = kernel->l_len; + user.l_pid = kernel->l_pid; + + if (copy_to_user((struct oabi_flock64 __user *)arg, + &user, sizeof(user))) + return -EFAULT; + + return 0; } asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; + struct fd f = fdget_raw(fd); + struct flock64 flock; + long err = -EBADF; + + if (!f.file) + goto out; + switch (cmd) { - case F_OFD_GETLK: - case F_OFD_SETLK: - case F_OFD_SETLKW: case F_GETLK64: + case F_OFD_GETLK: + err = security_file_fcntl(f.file, cmd, arg); + if (err) + break; + err = get_oabi_flock(&flock, argp); + if (err) + break; + err = fcntl_getlk64(f.file, cmd, &flock); + if (!err) + err = put_oabi_flock(&flock, argp); + break; case F_SETLK64: case F_SETLKW64: - return do_locks(fd, cmd, arg); - + case F_OFD_SETLK: + case F_OFD_SETLKW: + err = security_file_fcntl(f.file, cmd, arg); + if (err) + break; + err = get_oabi_flock(&flock, argp); + if (err) + break; + err = fcntl_setlk64(fd, f.file, cmd, &flock); + break; default: - return sys_fcntl64(fd, cmd, arg); + err = sys_fcntl64(fd, cmd, arg); + break; } + fdput(f); +out: + return err; } struct oabi_epoll_event { -- cgit From 2df4c9a741a0b5e2883cc356c1b724d1f739fb55 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:25 +0100 Subject: ARM: 9112/1: uaccess: add __{get,put}_kernel_nofault These mimic the behavior of get_user and put_user, except for domain switching, address limit checking and handling of mismatched sizes, none of which are relevant here. To work with pre-Armv6 kernels, this has to avoid TUSER() inside of the new macros, the new approach passes the "t" string along with the opcode, which is a bit uglier but avoids duplicating more code. As there is no __get_user_asm_dword(), I work around it by copying 32 bit at a time, which is possible because the output size is known. Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/uaccess.h | 123 +++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index a13d90206472..4f60638755c4 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -308,11 +308,11 @@ static inline void set_fs(mm_segment_t fs) #define __get_user(x, ptr) \ ({ \ long __gu_err = 0; \ - __get_user_err((x), (ptr), __gu_err); \ + __get_user_err((x), (ptr), __gu_err, TUSER()); \ __gu_err; \ }) -#define __get_user_err(x, ptr, err) \ +#define __get_user_err(x, ptr, err, __t) \ do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ unsigned long __gu_val; \ @@ -321,18 +321,19 @@ do { \ might_fault(); \ __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ - case 1: __get_user_asm_byte(__gu_val, __gu_addr, err); break; \ - case 2: __get_user_asm_half(__gu_val, __gu_addr, err); break; \ - case 4: __get_user_asm_word(__gu_val, __gu_addr, err); break; \ + case 1: __get_user_asm_byte(__gu_val, __gu_addr, err, __t); break; \ + case 2: __get_user_asm_half(__gu_val, __gu_addr, err, __t); break; \ + case 4: __get_user_asm_word(__gu_val, __gu_addr, err, __t); break; \ default: (__gu_val) = __get_user_bad(); \ } \ uaccess_restore(__ua_flags); \ (x) = (__typeof__(*(ptr)))__gu_val; \ } while (0) +#endif #define __get_user_asm(x, addr, err, instr) \ __asm__ __volatile__( \ - "1: " TUSER(instr) " %1, [%2], #0\n" \ + "1: " instr " %1, [%2], #0\n" \ "2:\n" \ " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ @@ -348,40 +349,38 @@ do { \ : "r" (addr), "i" (-EFAULT) \ : "cc") -#define __get_user_asm_byte(x, addr, err) \ - __get_user_asm(x, addr, err, ldrb) +#define __get_user_asm_byte(x, addr, err, __t) \ + __get_user_asm(x, addr, err, "ldrb" __t) #if __LINUX_ARM_ARCH__ >= 6 -#define __get_user_asm_half(x, addr, err) \ - __get_user_asm(x, addr, err, ldrh) +#define __get_user_asm_half(x, addr, err, __t) \ + __get_user_asm(x, addr, err, "ldrh" __t) #else #ifndef __ARMEB__ -#define __get_user_asm_half(x, __gu_addr, err) \ +#define __get_user_asm_half(x, __gu_addr, err, __t) \ ({ \ unsigned long __b1, __b2; \ - __get_user_asm_byte(__b1, __gu_addr, err); \ - __get_user_asm_byte(__b2, __gu_addr + 1, err); \ + __get_user_asm_byte(__b1, __gu_addr, err, __t); \ + __get_user_asm_byte(__b2, __gu_addr + 1, err, __t); \ (x) = __b1 | (__b2 << 8); \ }) #else -#define __get_user_asm_half(x, __gu_addr, err) \ +#define __get_user_asm_half(x, __gu_addr, err, __t) \ ({ \ unsigned long __b1, __b2; \ - __get_user_asm_byte(__b1, __gu_addr, err); \ - __get_user_asm_byte(__b2, __gu_addr + 1, err); \ + __get_user_asm_byte(__b1, __gu_addr, err, __t); \ + __get_user_asm_byte(__b2, __gu_addr + 1, err, __t); \ (x) = (__b1 << 8) | __b2; \ }) #endif #endif /* __LINUX_ARM_ARCH__ >= 6 */ -#define __get_user_asm_word(x, addr, err) \ - __get_user_asm(x, addr, err, ldr) -#endif - +#define __get_user_asm_word(x, addr, err, __t) \ + __get_user_asm(x, addr, err, "ldr" __t) #define __put_user_switch(x, ptr, __err, __fn) \ do { \ @@ -425,7 +424,7 @@ do { \ #define __put_user_nocheck(x, __pu_ptr, __err, __size) \ do { \ unsigned long __pu_addr = (unsigned long)__pu_ptr; \ - __put_user_nocheck_##__size(x, __pu_addr, __err); \ + __put_user_nocheck_##__size(x, __pu_addr, __err, TUSER());\ } while (0) #define __put_user_nocheck_1 __put_user_asm_byte @@ -433,9 +432,11 @@ do { \ #define __put_user_nocheck_4 __put_user_asm_word #define __put_user_nocheck_8 __put_user_asm_dword +#endif /* !CONFIG_CPU_SPECTRE */ + #define __put_user_asm(x, __pu_addr, err, instr) \ __asm__ __volatile__( \ - "1: " TUSER(instr) " %1, [%2], #0\n" \ + "1: " instr " %1, [%2], #0\n" \ "2:\n" \ " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ @@ -450,36 +451,36 @@ do { \ : "r" (x), "r" (__pu_addr), "i" (-EFAULT) \ : "cc") -#define __put_user_asm_byte(x, __pu_addr, err) \ - __put_user_asm(x, __pu_addr, err, strb) +#define __put_user_asm_byte(x, __pu_addr, err, __t) \ + __put_user_asm(x, __pu_addr, err, "strb" __t) #if __LINUX_ARM_ARCH__ >= 6 -#define __put_user_asm_half(x, __pu_addr, err) \ - __put_user_asm(x, __pu_addr, err, strh) +#define __put_user_asm_half(x, __pu_addr, err, __t) \ + __put_user_asm(x, __pu_addr, err, "strh" __t) #else #ifndef __ARMEB__ -#define __put_user_asm_half(x, __pu_addr, err) \ +#define __put_user_asm_half(x, __pu_addr, err, __t) \ ({ \ unsigned long __temp = (__force unsigned long)(x); \ - __put_user_asm_byte(__temp, __pu_addr, err); \ - __put_user_asm_byte(__temp >> 8, __pu_addr + 1, err); \ + __put_user_asm_byte(__temp, __pu_addr, err, __t); \ + __put_user_asm_byte(__temp >> 8, __pu_addr + 1, err, __t);\ }) #else -#define __put_user_asm_half(x, __pu_addr, err) \ +#define __put_user_asm_half(x, __pu_addr, err, __t) \ ({ \ unsigned long __temp = (__force unsigned long)(x); \ - __put_user_asm_byte(__temp >> 8, __pu_addr, err); \ - __put_user_asm_byte(__temp, __pu_addr + 1, err); \ + __put_user_asm_byte(__temp >> 8, __pu_addr, err, __t); \ + __put_user_asm_byte(__temp, __pu_addr + 1, err, __t); \ }) #endif #endif /* __LINUX_ARM_ARCH__ >= 6 */ -#define __put_user_asm_word(x, __pu_addr, err) \ - __put_user_asm(x, __pu_addr, err, str) +#define __put_user_asm_word(x, __pu_addr, err, __t) \ + __put_user_asm(x, __pu_addr, err, "str" __t) #ifndef __ARMEB__ #define __reg_oper0 "%R2" @@ -489,12 +490,12 @@ do { \ #define __reg_oper1 "%R2" #endif -#define __put_user_asm_dword(x, __pu_addr, err) \ +#define __put_user_asm_dword(x, __pu_addr, err, __t) \ __asm__ __volatile__( \ - ARM( "1: " TUSER(str) " " __reg_oper1 ", [%1], #4\n" ) \ - ARM( "2: " TUSER(str) " " __reg_oper0 ", [%1]\n" ) \ - THUMB( "1: " TUSER(str) " " __reg_oper1 ", [%1]\n" ) \ - THUMB( "2: " TUSER(str) " " __reg_oper0 ", [%1, #4]\n" ) \ + ARM( "1: str" __t " " __reg_oper1 ", [%1], #4\n" ) \ + ARM( "2: str" __t " " __reg_oper0 ", [%1]\n" ) \ + THUMB( "1: str" __t " " __reg_oper1 ", [%1]\n" ) \ + THUMB( "2: str" __t " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ " .pushsection .text.fixup,\"ax\"\n" \ " .align 2\n" \ @@ -510,7 +511,49 @@ do { \ : "r" (x), "i" (-EFAULT) \ : "cc") -#endif /* !CONFIG_CPU_SPECTRE */ +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + const type *__pk_ptr = (src); \ + unsigned long __src = (unsigned long)(__pk_ptr); \ + type __val; \ + int __err = 0; \ + switch (sizeof(type)) { \ + case 1: __get_user_asm_byte(__val, __src, __err, ""); break; \ + case 2: __get_user_asm_half(__val, __src, __err, ""); break; \ + case 4: __get_user_asm_word(__val, __src, __err, ""); break; \ + case 8: { \ + u32 *__v32 = (u32*)&__val; \ + __get_user_asm_word(__v32[0], __src, __err, ""); \ + if (__err) \ + break; \ + __get_user_asm_word(__v32[1], __src+4, __err, ""); \ + break; \ + } \ + default: __err = __get_user_bad(); break; \ + } \ + *(type *)(dst) = __val; \ + if (__err) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + const type *__pk_ptr = (dst); \ + unsigned long __dst = (unsigned long)__pk_ptr; \ + int __err = 0; \ + type __val = *(type *)src; \ + switch (sizeof(type)) { \ + case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \ + case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \ + case 4: __put_user_asm_word(__val, __dst, __err, ""); break; \ + case 8: __put_user_asm_dword(__val, __dst, __err, ""); break; \ + default: __err = __put_user_bad(); break; \ + } \ + if (__err) \ + goto err_label; \ +} while (0) #ifdef CONFIG_MMU extern unsigned long __must_check -- cgit From 8ac6f5d7f84bf362e67591708bcb9788cdc42c50 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:26 +0100 Subject: ARM: 9113/1: uaccess: remove set_fs() implementation There are no remaining callers of set_fs(), so just remove it along with all associated code that operates on thread_info->addr_limit. There are still further optimizations that can be done: - In get_user(), the address check could be moved entirely into the out of line code, rather than passing a constant as an argument, - I assume the DACR handling can be simplified as we now only change it during user access when CONFIG_CPU_SW_DOMAIN_PAN is set, but not during set_fs(). Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 1 - arch/arm/include/asm/ptrace.h | 1 - arch/arm/include/asm/thread_info.h | 4 ---- arch/arm/include/asm/uaccess-asm.h | 6 ----- arch/arm/include/asm/uaccess.h | 46 ++++---------------------------------- arch/arm/kernel/asm-offsets.c | 2 -- arch/arm/kernel/entry-common.S | 12 ---------- arch/arm/kernel/process.c | 7 +----- arch/arm/kernel/signal.c | 8 ------- arch/arm/lib/copy_from_user.S | 3 +-- arch/arm/lib/copy_to_user.S | 3 +-- 11 files changed, 7 insertions(+), 86 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 3ea1c417339f..51897e4cc413 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -126,7 +126,6 @@ config ARM select PCI_SYSCALL if PCI select PERF_USE_VMALLOC select RTC_LIB - select SET_FS select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones # according to that. Thanks. diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 91d6b7856be4..93051e2f402c 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -19,7 +19,6 @@ struct pt_regs { struct svc_pt_regs { struct pt_regs regs; u32 dacr; - u32 addr_limit; }; #define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 17c56051747b..d89931aed59f 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -31,8 +31,6 @@ struct task_struct; #include -typedef unsigned long mm_segment_t; - struct cpu_context_save { __u32 r4; __u32 r5; @@ -54,7 +52,6 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ - mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ __u32 cpu_domain; /* cpu domain */ @@ -80,7 +77,6 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .addr_limit = KERNEL_DS, \ } /* diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h index e6eb7a2aaf1e..6451a433912c 100644 --- a/arch/arm/include/asm/uaccess-asm.h +++ b/arch/arm/include/asm/uaccess-asm.h @@ -84,12 +84,8 @@ * if \disable is set. */ .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable - ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] - ldr \tmp2, =TASK_SIZE - str \tmp2, [\tsk, #TI_ADDR_LIMIT] DACR( mrc p15, 0, \tmp0, c3, c0, 0) DACR( str \tmp0, [sp, #SVC_DACR]) - str \tmp1, [sp, #SVC_ADDR_LIMIT] .if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN) /* kernel=client, user=no access */ mov \tmp2, #DACR_UACCESS_DISABLE @@ -106,9 +102,7 @@ /* Restore the user access state previously saved by uaccess_entry */ .macro uaccess_exit, tsk, tmp0, tmp1 - ldr \tmp1, [sp, #SVC_ADDR_LIMIT] DACR( ldr \tmp0, [sp, #SVC_DACR]) - str \tmp1, [\tsk, #TI_ADDR_LIMIT] DACR( mcr p15, 0, \tmp0, c3, c0, 0) .endm diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 4f60638755c4..084d1c07c2d0 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -52,32 +52,8 @@ static __always_inline void uaccess_restore(unsigned int flags) extern int __get_user_bad(void); extern int __put_user_bad(void); -/* - * Note that this is actually 0x1,0000,0000 - */ -#define KERNEL_DS 0x00000000 - #ifdef CONFIG_MMU -#define USER_DS TASK_SIZE -#define get_fs() (current_thread_info()->addr_limit) - -static inline void set_fs(mm_segment_t fs) -{ - current_thread_info()->addr_limit = fs; - - /* - * Prevent a mispredicted conditional call to set_fs from forwarding - * the wrong address limit to access_ok under speculation. - */ - dsb(nsh); - isb(); - - modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER); -} - -#define uaccess_kernel() (get_fs() == KERNEL_DS) - /* * We use 33-bit arithmetic here. Success returns zero, failure returns * addr_limit. We take advantage that addr_limit will be zero for KERNEL_DS, @@ -89,7 +65,7 @@ static inline void set_fs(mm_segment_t fs) __asm__(".syntax unified\n" \ "adds %1, %2, %3; sbcscc %1, %1, %0; movcc %0, #0" \ : "=&r" (flag), "=&r" (roksum) \ - : "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \ + : "r" (addr), "Ir" (size), "0" (TASK_SIZE) \ : "cc"); \ flag; }) @@ -120,7 +96,7 @@ static inline void __user *__uaccess_mask_range_ptr(const void __user *ptr, " subshs %1, %1, %2\n" " movlo %0, #0\n" : "+r" (safe_ptr), "=&r" (tmp) - : "r" (size), "r" (current_thread_info()->addr_limit) + : "r" (size), "r" (TASK_SIZE) : "cc"); csdb(); @@ -194,7 +170,7 @@ extern int __get_user_64t_4(void *); #define __get_user_check(x, p) \ ({ \ - unsigned long __limit = current_thread_info()->addr_limit - 1; \ + unsigned long __limit = TASK_SIZE - 1; \ register typeof(*(p)) __user *__p asm("r0") = (p); \ register __inttype(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ @@ -245,7 +221,7 @@ extern int __put_user_8(void *, unsigned long long); #define __put_user_check(__pu_val, __ptr, __err, __s) \ ({ \ - unsigned long __limit = current_thread_info()->addr_limit - 1; \ + unsigned long __limit = TASK_SIZE - 1; \ register typeof(__pu_val) __r2 asm("r2") = __pu_val; \ register const void __user *__p asm("r0") = __ptr; \ register unsigned long __l asm("r1") = __limit; \ @@ -262,19 +238,8 @@ extern int __put_user_8(void *, unsigned long long); #else /* CONFIG_MMU */ -/* - * uClinux has only one addr space, so has simplified address limits. - */ -#define USER_DS KERNEL_DS - -#define uaccess_kernel() (true) #define __addr_ok(addr) ((void)(addr), 1) #define __range_ok(addr, size) ((void)(addr), 0) -#define get_fs() (KERNEL_DS) - -static inline void set_fs(mm_segment_t fs) -{ -} #define get_user(x, p) __get_user(x, p) #define __put_user_check __put_user_nocheck @@ -283,9 +248,6 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(addr, size) (__range_ok(addr, size) == 0) -#define user_addr_max() \ - (uaccess_kernel() ? ~0UL : get_fs()) - #ifdef CONFIG_CPU_SPECTRE /* * When mitigating Spectre variant 1, it is not worth fixing the non- diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a0945b898ca3..c60fefabc868 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -43,7 +43,6 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); @@ -92,7 +91,6 @@ int main(void) DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); DEFINE(SVC_DACR, offsetof(struct svc_pt_regs, dacr)); - DEFINE(SVC_ADDR_LIMIT, offsetof(struct svc_pt_regs, addr_limit)); DEFINE(SVC_REGS_SIZE, sizeof(struct svc_pt_regs)); BLANK(); DEFINE(SIGFRAME_RC3_OFFSET, offsetof(struct sigframe, retcode[3])); diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index e837af90cd44..d9c99db50243 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -49,10 +49,6 @@ __ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts - ldr r2, [tsk, #TI_ADDR_LIMIT] - ldr r1, =TASK_SIZE - cmp r2, r1 - blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing movs r1, r1, lsl #16 bne fast_work_pending @@ -87,10 +83,6 @@ __ret_fast_syscall: bl do_rseq_syscall #endif disable_irq_notrace @ disable interrupts - ldr r2, [tsk, #TI_ADDR_LIMIT] - ldr r1, =TASK_SIZE - cmp r2, r1 - blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing movs r1, r1, lsl #16 beq no_work_pending @@ -129,10 +121,6 @@ ret_slow_syscall: #endif disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) - ldr r2, [tsk, #TI_ADDR_LIMIT] - ldr r1, =TASK_SIZE - cmp r2, r1 - blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] movs r1, r1, lsl #16 bne slow_work_pending diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index fc9e8b37eaa8..581542fbf5bf 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -108,7 +108,7 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; #ifndef CONFIG_CPU_V7M - unsigned int domain, fs; + unsigned int domain; #ifdef CONFIG_CPU_SW_DOMAIN_PAN /* * Get the domain register for the parent context. In user @@ -117,14 +117,11 @@ void __show_regs(struct pt_regs *regs) */ if (user_mode(regs)) { domain = DACR_UACCESS_ENABLE; - fs = get_fs(); } else { domain = to_svc_pt_regs(regs)->dacr; - fs = to_svc_pt_regs(regs)->addr_limit; } #else domain = get_domain(); - fs = get_fs(); #endif #endif @@ -160,8 +157,6 @@ void __show_regs(struct pt_regs *regs) if ((domain & domain_mask(DOMAIN_USER)) == domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) segment = "none"; - else if (fs == KERNEL_DS) - segment = "kernel"; else segment = "user"; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index a3a38d0a4c85..3e7ddac086c2 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -711,14 +711,6 @@ struct page *get_signal_page(void) return page; } -/* Defer to generic check */ -asmlinkage void addr_limit_check_failed(void) -{ -#ifdef CONFIG_MMU - addr_limit_user_check(); -#endif -} - #ifdef CONFIG_DEBUG_RSEQ asmlinkage void do_rseq_syscall(struct pt_regs *regs) { diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index f8016e3db65d..480a20766137 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -109,8 +109,7 @@ ENTRY(arm_copy_from_user) #ifdef CONFIG_CPU_SPECTRE - get_thread_info r3 - ldr r3, [r3, #TI_ADDR_LIMIT] + ldr r3, =TASK_SIZE uaccess_mask_range_ptr r1, r2, r3, ip #endif diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index ebfe4cb3d912..842ea5ede485 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -109,8 +109,7 @@ ENTRY(__copy_to_user_std) WEAK(arm_copy_to_user) #ifdef CONFIG_CPU_SPECTRE - get_thread_info r3 - ldr r3, [r3, #TI_ADDR_LIMIT] + ldr r3, =TASK_SIZE uaccess_mask_range_ptr r0, r2, r3, ip #endif -- cgit From da0b9ee43c152401f711e522c19b1468c84907bf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Aug 2021 08:30:27 +0100 Subject: ARM: 9110/1: oabi-compat: fix oabi epoll sparse warning As my patches change the oabi epoll definition, I received a report from the kernel test robot about a pre-existing issue with a mismatched __poll_t type. The OABI code was correct when it was initially added in linux-2.16, but a later (also correct) change to the generic __poll_t triggered a type mismatch warning from sparse. As __poll_t is always 32-bit bits wide and otherwise compatible, using this instead of __u32 in the oabi_epoll_event definition is a valid workaround. Reported-by: kernel test robot Fixes: 8ced390c2b18 ("define __poll_t, annotate constants") Fixes: ee219b946e4b ("uapi: turn __poll_t sparse checks on by default") Fixes: 687ad0191488 ("[ARM] 3109/1: old ABI compat: syscall wrappers for ABI impedance matching") Acked-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/sys_oabi-compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 223ee46b6e75..68112c172025 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -274,7 +274,7 @@ out: } struct oabi_epoll_event { - __u32 events; + __poll_t events; __u64 data; } __attribute__ ((packed,aligned(4))); -- cgit From 88210317eec69d8c06cac6d6751528160e153ffd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 11 Aug 2021 10:27:03 +0100 Subject: ARM: 9116/1: unified: Remove check for gcc < 4 Since commit cafa0010cd51fb71 ("Raise the minimum required gcc version to 4.6"), the kernel can no longer be compiled using gcc-3. Hence this condition is never true, and the check can thus be removed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/unified.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index 1e2c3eb04353..ce9689118dbb 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -24,10 +24,6 @@ __asm__(".syntax unified"); #ifdef CONFIG_THUMB2_KERNEL -#if __GNUC__ < 4 -#error Thumb-2 kernel requires gcc >= 4 -#endif - /* The CPSR bit describing the instruction set (Thumb) */ #define PSR_ISETSTATE PSR_T_BIT -- cgit From 6c974e79d37608bc8fd5cf6b61b4233b82b60428 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 11 Aug 2021 10:27:02 +0100 Subject: ARM: 9118/1: div64: Remove always-true __div64_const32_is_OK() duplicate Since commit cafa0010cd51fb71 ("Raise the minimum required gcc version to 4.6"), the kernel can no longer be compiled using gcc-3. Hence __div64_const32_is_OK() is always true. Moreover, __div64_const32_is_OK() is defined in the same way in include/asm-generic/div64.h, so the ARM-specific definition can be removed regardless. Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/div64.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 595e538f5bfb..4b69cf850451 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -52,17 +52,6 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base) #else -/* - * gcc versions earlier than 4.0 are simply too problematic for the - * __div64_const32() code in asm-generic/div64.h. First there is - * gcc PR 15089 that tend to trig on more complex constructs, spurious - * .global __udivsi3 are inserted even if none of those symbols are - * referenced in the generated code, and those gcc versions are not able - * to do constant propagation on long long values anyway. - */ - -#define __div64_const32_is_OK (__GNUC__ >= 4) - static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) { unsigned long long res; -- cgit From 14ecf075fe5be01860927fdf3aa11d7b18023ab2 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 28 Jul 2021 15:32:32 +0000 Subject: KVM: arm64: Minor optimization of range_is_memory Currently range_is_memory finds the corresponding struct memblock_region for both the lower and upper bounds of the given address range with two rounds of binary search, and then checks that the two memblocks are the same. Simplify this by only doing binary search on the lower bound and then checking that the upper bound is in the same memblock. Signed-off-by: David Brazdil Reviewed-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210728153232.1018911-3-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c11b50dd0050..5af2e28b9cd7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -204,16 +204,19 @@ bool addr_is_memory(phys_addr_t phys) return find_mem_range(phys, &range); } +static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) +{ + return range->start <= addr && addr < range->end; +} + static bool range_is_memory(u64 start, u64 end) { - struct kvm_mem_range r1, r2; + struct kvm_mem_range r; - if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) - return false; - if (r1.start != r2.start) + if (!find_mem_range(start, &r)) return false; - return true; + return is_in_mem_range(end - 1, &r); } static inline int __host_stage2_idmap(u64 start, u64 end, -- cgit From 4cb266074aa17e9cafed3a92e9f43b161516569f Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 19 Aug 2021 14:56:52 +0200 Subject: powerpc/pseries/vas: Declare pseries_vas_fault_thread_fn() as static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. Fixes: 6d0aaf5e0de0 ("powerpc/pseries/vas: Setup IRQ and fault handling") Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210819125656.14498-3-clg@kaod.org --- arch/powerpc/platforms/pseries/vas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b5c1cf1bc64d..b043e3936d21 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -184,7 +184,7 @@ static int h_get_nx_fault(u32 winid, u64 buffer) * Note: The hypervisor forwards an interrupt for each fault request. * So one fault CRB to process for each H_GET_NX_FAULT hcall. */ -irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) { struct pseries_vas_window *txwin = data; struct coprocessor_request_block crb; -- cgit From cb53a93e33e108bfec00a13cd12696e1c0ccc8b6 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 19 Aug 2021 14:56:53 +0200 Subject: KVM: PPC: Book3S PR: Declare kvmppc_handle_exit_pr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210819125656.14498-4-clg@kaod.org --- arch/powerpc/kvm/book3s.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 740e51def5a5..58391b4b32ed 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -23,7 +23,8 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); extern int kvmppc_book3s_init_pr(void); -extern void kvmppc_book3s_exit_pr(void); +void kvmppc_book3s_exit_pr(void); +extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val); -- cgit From b352ddae7b2ccd2cb29f495ca0a7c9b6848b623f Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Thu, 19 Aug 2021 14:56:54 +0200 Subject: KVM: PPC: Book3S PR: Remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210819125656.14498-5-clg@kaod.org --- arch/powerpc/kvm/book3s_64_mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 26b8b27a3755..feee40cb2ba1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -196,7 +196,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, hva_t ptegp; u64 pteg[16]; u64 avpn = 0; - u64 v, r; + u64 r; u64 v_val, v_mask; u64 eaddr_mask; int i; @@ -285,7 +285,6 @@ do_second: goto do_second; } - v = be64_to_cpu(pteg[i]); r = be64_to_cpu(pteg[i+1]); pp = (r & HPTE_R_PP) | key; if (r & HPTE_R_PP0) -- cgit From 898a1ef06ad4a2a8e3c9490ce62624fcd1a7b1f8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 20 Aug 2021 09:28:19 +0000 Subject: powerpc/audit: Avoid unneccessary #ifdef in syscall_get_arguments() Use is_32bit_task() which already handles CONFIG_COMPAT. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ba49cdd574558a0363300c3f6b5b062b397cb071.1629451483.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/syscall.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index ba0f88f3a30d..7ea3c4044186 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,10 +90,9 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long val, mask = -1UL; unsigned int n = 6; -#ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(task, TIF_32BIT)) + if (is_32bit_task()) mask = 0xffffffff; -#endif + while (n--) { if (n == 0) val = regs->orig_gpr3; -- cgit From 770cec16cdc9d15555e67896dd2214a4fec9a3fa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 20 Aug 2021 09:39:14 +0000 Subject: powerpc/audit: Simplify syscall_get_arch() Make use of is_32bit_task() and CONFIG_CPU_LITTLE_ENDIAN to simplify syscall_get_arch(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4be53b9187a4d8c163968f4d224267e41a7fcc33.1629451479.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/syscall.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 7ea3c4044186..c60ebd04b2ed 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -115,16 +115,11 @@ static inline void syscall_set_arguments(struct task_struct *task, static inline int syscall_get_arch(struct task_struct *task) { - int arch; - - if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT)) - arch = AUDIT_ARCH_PPC64; + if (is_32bit_task()) + return AUDIT_ARCH_PPC; + else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + return AUDIT_ARCH_PPC64LE; else - arch = AUDIT_ARCH_PPC; - -#ifdef __LITTLE_ENDIAN__ - arch |= __AUDIT_ARCH_LE; -#endif - return arch; + return AUDIT_ARCH_PPC64; } #endif /* _ASM_SYSCALL_H */ -- cgit From 4139b1972af281e0293c2414a0f1cd59fa5b2980 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Jul 2021 18:04:51 -0400 Subject: KVM: X86: Introduce kvm_mmu_slot_lpages() helpers Introduce kvm_mmu_slot_lpages() to calculcate lpage_info and rmap array size. The other __kvm_mmu_slot_lpages() can take an extra parameter of npages rather than fetching from the memslot pointer. Start to use the latter one in kvm_alloc_memslot_metadata(). Signed-off-by: Peter Xu Message-Id: <20210730220455.26054-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 ------- arch/x86/kvm/mmu.h | 21 +++++++++++++++++++++ arch/x86/kvm/mmu/page_track.c | 1 + arch/x86/kvm/x86.c | 6 ++---- 4 files changed, 24 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a612d213be5c..1881f4c92de9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -124,13 +124,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 12 diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 83e6c6965f1e..59e831a8ab9d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -240,4 +240,25 @@ static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) return smp_load_acquire(&kvm->arch.memslots_have_rmaps); } +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + +static inline unsigned long +__kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, unsigned long npages, + int level) +{ + return gfn_to_index(slot->base_gfn + npages - 1, + slot->base_gfn, level) + 1; +} + +static inline unsigned long +kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level) +{ + return __kvm_mmu_slot_lpages(slot, slot->npages, level); +} + #endif diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 91a9f7e0fd91..269f11f92fd0 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -16,6 +16,7 @@ #include +#include "mmu.h" #include "mmu_internal.h" void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 58a72c7d3330..4e97f7cd412e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11349,8 +11349,7 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot, for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { int level = i + 1; - int lpages = gfn_to_index(slot->base_gfn + npages - 1, - slot->base_gfn, level) + 1; + int lpages = __kvm_mmu_slot_lpages(slot, npages, level); WARN_ON(slot->arch.rmap[i]); @@ -11433,8 +11432,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm, int lpages; int level = i + 1; - lpages = gfn_to_index(slot->base_gfn + npages - 1, - slot->base_gfn, level) + 1; + lpages = __kvm_mmu_slot_lpages(slot, npages, level); linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) -- cgit From 3bcd0662d66fd07e596d2a7445e6b3215631b901 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 30 Jul 2021 18:04:52 -0400 Subject: KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file Use this file to dump rmap statistic information. The statistic is done by calculating the rmap count and the result is log-2-based. An example output of this looks like (idle 6GB guest, right after boot linux): Rmap_Count: 0 1 2-3 4-7 8-15 16-31 32-63 64-127 128-255 256-511 512-1023 Level=4K: 3086676 53045 12330 1272 502 121 76 2 0 0 0 Level=2M: 5947 231 0 0 0 0 0 0 0 0 0 Level=1G: 32 0 0 0 0 0 0 0 0 0 0 Signed-off-by: Peter Xu Message-Id: <20210730220455.26054-5-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/debugfs.c | 111 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu/mmu.c | 20 ++++++++ arch/x86/kvm/mmu/mmu_internal.h | 1 + 3 files changed, 132 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 95a98413dc32..54a83a744538 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -7,6 +7,8 @@ #include #include #include "lapic.h" +#include "mmu.h" +#include "mmu/mmu_internal.h" static int vcpu_get_timer_advance_ns(void *data, u64 *val) { @@ -73,3 +75,112 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_ &vcpu_tsc_scaling_frac_fops); } } + +/* + * This covers statistics <1024 (11=log(1024)+1), which should be enough to + * cover RMAP_RECYCLE_THRESHOLD. + */ +#define RMAP_LOG_SIZE 11 + +static const char *kvm_lpage_str[KVM_NR_PAGE_SIZES] = { "4K", "2M", "1G" }; + +static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v) +{ + struct kvm_rmap_head *rmap; + struct kvm *kvm = m->private; + struct kvm_memory_slot *slot; + struct kvm_memslots *slots; + unsigned int lpage_size, index; + /* Still small enough to be on the stack */ + unsigned int *log[KVM_NR_PAGE_SIZES], *cur; + int i, j, k, l, ret; + + ret = -ENOMEM; + memset(log, 0, sizeof(log)); + for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { + log[i] = kcalloc(RMAP_LOG_SIZE, sizeof(unsigned int), GFP_KERNEL); + if (!log[i]) + goto out; + } + + mutex_lock(&kvm->slots_lock); + write_lock(&kvm->mmu_lock); + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + slots = __kvm_memslots(kvm, i); + for (j = 0; j < slots->used_slots; j++) { + slot = &slots->memslots[j]; + for (k = 0; k < KVM_NR_PAGE_SIZES; k++) { + rmap = slot->arch.rmap[k]; + lpage_size = kvm_mmu_slot_lpages(slot, k + 1); + cur = log[k]; + for (l = 0; l < lpage_size; l++) { + index = ffs(pte_list_count(&rmap[l])); + if (WARN_ON_ONCE(index >= RMAP_LOG_SIZE)) + index = RMAP_LOG_SIZE - 1; + cur[index]++; + } + } + } + } + + write_unlock(&kvm->mmu_lock); + mutex_unlock(&kvm->slots_lock); + + /* index=0 counts no rmap; index=1 counts 1 rmap */ + seq_printf(m, "Rmap_Count:\t0\t1\t"); + for (i = 2; i < RMAP_LOG_SIZE; i++) { + j = 1 << (i - 1); + k = (1 << i) - 1; + seq_printf(m, "%d-%d\t", j, k); + } + seq_printf(m, "\n"); + + for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { + seq_printf(m, "Level=%s:\t", kvm_lpage_str[i]); + cur = log[i]; + for (j = 0; j < RMAP_LOG_SIZE; j++) + seq_printf(m, "%d\t", cur[j]); + seq_printf(m, "\n"); + } + + ret = 0; +out: + for (i = 0; i < KVM_NR_PAGE_SIZES; i++) + kfree(log[i]); + + return ret; +} + +static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file) +{ + struct kvm *kvm = inode->i_private; + + if (!kvm_get_kvm_safe(kvm)) + return -ENOENT; + + return single_open(file, kvm_mmu_rmaps_stat_show, kvm); +} + +static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) +{ + struct kvm *kvm = inode->i_private; + + kvm_put_kvm(kvm); + + return single_release(inode, file); +} + +static const struct file_operations mmu_rmaps_stat_fops = { + .open = kvm_mmu_rmaps_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_mmu_rmaps_stat_release, +}; + +int kvm_arch_create_vm_debugfs(struct kvm *kvm) +{ + debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm, + &mmu_rmaps_stat_fops); + return 0; +} diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d282ccf5f0c5..5daa8a910e8b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1035,6 +1035,26 @@ out: return true; } +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head) +{ + struct pte_list_desc *desc; + unsigned int count = 0; + + if (!rmap_head->val) + return 0; + else if (!(rmap_head->val & 1)) + return 1; + + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); + + while (desc) { + count += desc->spte_count; + desc = desc->more; + } + + return count; +} + static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level, const struct kvm_memory_slot *slot) { diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index ca7b7595bbfc..62bb8f758b3f 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -131,6 +131,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, int min_level); void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, u64 start_gfn, u64 pages); +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head); /* * Return values of handle_mmio_page_fault, mmu.page_fault, and fast_page_fault(). -- cgit From 5a324c24b638d0f3194e1dc8f0cebd28a0745238 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 23:52:36 +0300 Subject: Revert "KVM: x86/mmu: Allow zap gfn range to operate under the mmu read lock" This together with the next patch will fix a future race between kvm_zap_gfn_range and the page fault handler, which will happen when AVIC memslot is going to be only partially disabled. The performance impact is minimal since kvm_zap_gfn_range is only called by users, update_mtrr() and kvm_post_set_cr0(). Both only use it if the guest has non-coherent DMA, in order to honor the guest's UC memtype. MTRR and CD setup only happens at boot, and generally in an area where the page tables should be small (for CD) or should not include the affected GFNs at all (for MTRRs). This is based on a patch suggested by Sean Christopherson: https://lkml.org/lkml/2021/7/22/1025 Signed-off-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 19 ++++++++----------- arch/x86/kvm/mmu/tdp_mmu.c | 15 ++++----------- arch/x86/kvm/mmu/tdp_mmu.h | 11 ++++------- 3 files changed, 16 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5daa8a910e8b..9a7199679f62 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5682,8 +5682,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) int i; bool flush = false; + write_lock(&kvm->mmu_lock); + if (kvm_memslots_have_rmaps(kvm)) { - write_lock(&kvm->mmu_lock); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { slots = __kvm_memslots(kvm, i); kvm_for_each_memslot(memslot, slots) { @@ -5703,22 +5704,18 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) } if (flush) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end); - write_unlock(&kvm->mmu_lock); } if (is_tdp_mmu_enabled(kvm)) { - flush = false; - - read_lock(&kvm->mmu_lock); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start, - gfn_end, flush, true); - if (flush) - kvm_flush_remote_tlbs_with_address(kvm, gfn_start, - gfn_end); - - read_unlock(&kvm->mmu_lock); + gfn_end, flush); } + + if (flush) + kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end); + + write_unlock(&kvm->mmu_lock); } static bool slot_rmap_write_protect(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a3146434c965..fb1b2dc7a6d1 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -818,21 +818,15 @@ retry: * non-root pages mapping GFNs strictly within that range. Returns true if * SPTEs have been cleared and a TLB flush is needed before releasing the * MMU lock. - * - * If shared is true, this thread holds the MMU lock in read mode and must - * account for the possibility that other threads are modifying the paging - * structures concurrently. If shared is false, this thread should hold the - * MMU in write mode. */ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, - gfn_t end, bool can_yield, bool flush, - bool shared) + gfn_t end, bool can_yield, bool flush) { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, shared) + for_each_tdp_mmu_root_yield_safe(kvm, root, as_id, false) flush = zap_gfn_range(kvm, root, start, end, can_yield, flush, - shared); + false); return flush; } @@ -843,8 +837,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) int i; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, - flush, false); + flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush); if (flush) kvm_flush_remote_tlbs(kvm); diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index b224d126adf9..358f447d4012 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -20,14 +20,11 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared); bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, - gfn_t end, bool can_yield, bool flush, - bool shared); + gfn_t end, bool can_yield, bool flush); static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, - gfn_t start, gfn_t end, bool flush, - bool shared) + gfn_t start, gfn_t end, bool flush) { - return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush, - shared); + return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush); } static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) { @@ -44,7 +41,7 @@ static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) */ lockdep_assert_held_write(&kvm->mmu_lock); return __kvm_tdp_mmu_zap_gfn_range(kvm, kvm_mmu_page_as_id(sp), - sp->gfn, end, false, false, false); + sp->gfn, end, false, false); } void kvm_tdp_mmu_zap_all(struct kvm *kvm); -- cgit From 2822da446640d82b7bf65800314ef2a825e8df13 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:37 +0300 Subject: KVM: x86/mmu: fix parameters to kvm_flush_remote_tlbs_with_address kvm_flush_remote_tlbs_with_address expects (start gfn, number of pages), and not (start gfn, end gfn) Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9a7199679f62..3cb2808e1e5c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5703,13 +5703,17 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) } } if (flush) - kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end); + kvm_flush_remote_tlbs_with_address(kvm, gfn_start, + gfn_end - gfn_start); } if (is_tdp_mmu_enabled(kvm)) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start, gfn_end, flush); + if (flush) + kvm_flush_remote_tlbs_with_address(kvm, gfn_start, + gfn_end - gfn_start); } if (flush) -- cgit From 88f585358b5e6aec8586425bdbaaa2157112ffc2 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:38 +0300 Subject: KVM: x86/mmu: add comment explaining arguments to kvm_zap_gfn_range This comment makes it clear that the range of gfns that this function receives is non inclusive. Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 3cb2808e1e5c..e53d09534113 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5675,6 +5675,10 @@ void kvm_mmu_uninit_vm(struct kvm *kvm) kvm_mmu_uninit_tdp_mmu(kvm); } +/* + * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end + * (not including it) + */ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { struct kvm_memslots *slots; -- cgit From edb298c663fccad65fe99fcec6a4f96cc344520d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:39 +0300 Subject: KVM: x86/mmu: bump mmu notifier count in kvm_zap_gfn_range This together with previous patch, ensures that kvm_zap_gfn_range doesn't race with page fault running on another vcpu, and will make this page fault code retry instead. This is based on a patch suggested by Sean Christopherson: https://lkml.org/lkml/2021/7/22/1025 Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e53d09534113..916083eb4036 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5688,6 +5688,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) write_lock(&kvm->mmu_lock); + kvm_inc_notifier_count(kvm, gfn_start, gfn_end); + if (kvm_memslots_have_rmaps(kvm)) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { slots = __kvm_memslots(kvm, i); @@ -5723,6 +5725,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) if (flush) kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end); + kvm_dec_notifier_count(kvm, gfn_start, gfn_end); + write_unlock(&kvm->mmu_lock); } -- cgit From 33a5c0009d14e18ca2fbf610efd6cf2e7e34489a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:40 +0300 Subject: KVM: x86/mmu: rename try_async_pf to kvm_faultin_pfn try_async_pf is a wrong name for this function, since this code is used when asynchronous page fault is not enabled as well. This code is based on a patch from Sean Christopherson: https://lkml.org/lkml/2021/7/19/2970 Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Reviewed-by: Paolo Bonzini Message-Id: <20210810205251.424103-6-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 916083eb4036..e85ec37a8468 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3884,7 +3884,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } -static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, +static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva, bool write, bool *writable) { @@ -3954,7 +3954,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, &hva, + if (kvm_faultin_pfn(vcpu, prefault, gfn, gpa, &pfn, &hva, write, &map_writable)) return RET_PF_RETRY; diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index ee044d357b5f..f349eae69bf3 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -881,7 +881,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva, + if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva, write_fault, &map_writable)) return RET_PF_RETRY; -- cgit From 8f32d5e563cbf0507756aa929134b1a110b47a62 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:41 +0300 Subject: KVM: x86/mmu: allow kvm_faultin_pfn to return page fault handling code This will allow it to return RET_PF_EMULATE for APIC mmio emulation. This code is based on a patch from Sean Christopherson: https://lkml.org/lkml/2021/7/19/2970 Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Reviewed-by: Paolo Bonzini Message-Id: <20210810205251.424103-7-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 17 ++++++++++------- arch/x86/kvm/mmu/paging_tmpl.h | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e85ec37a8468..38e36cff82af 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3886,7 +3886,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva, - bool write, bool *writable) + bool write, bool *writable, int *r) { struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; @@ -3897,7 +3897,7 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, * be zapped before KVM inserts a new MMIO SPTE for the gfn. */ if (slot && (slot->flags & KVM_MEMSLOT_INVALID)) - return true; + goto out_retry; /* Don't expose private memslots to L2. */ if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) { @@ -3917,14 +3917,17 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, if (kvm_find_async_pf_gfn(vcpu, gfn)) { trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); - return true; + goto out_retry; } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) - return true; + goto out_retry; } *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable, hva); - return false; + +out_retry: + *r = RET_PF_RETRY; + return true; } static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, @@ -3955,8 +3958,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, smp_rmb(); if (kvm_faultin_pfn(vcpu, prefault, gfn, gpa, &pfn, &hva, - write, &map_writable)) - return RET_PF_RETRY; + write, &map_writable, &r)) + return r; if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r)) return r; diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f349eae69bf3..7d03e9b7ccfa 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -882,8 +882,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, smp_rmb(); if (kvm_faultin_pfn(vcpu, prefault, walker.gfn, addr, &pfn, &hva, - write_fault, &map_writable)) - return RET_PF_RETRY; + write_fault, &map_writable, &r)) + return r; if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r)) return r; -- cgit From 9cc13d60ba6b9976e01ec6f66fa1ec4a06992929 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:42 +0300 Subject: KVM: x86/mmu: allow APICv memslot to be enabled but invisible on AMD, APIC virtualization needs to dynamicaly inhibit the AVIC in a response to some events, and this is problematic and not efficient to do by enabling/disabling the memslot that covers APIC's mmio range. Plus due to SRCU locking, it makes it more complex to request AVIC inhibition. Instead, the APIC memslot will be always enabled, but be invisible to the guest, such as the MMU code will not install a SPTE for it, when it is inhibited and instead jump straight to emulating the access. When inhibiting the AVIC, this SPTE will be zapped. This code is based on a suggestion from Sean Christopherson: https://lkml.org/lkml/2021/7/19/2970 Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-8-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 38e36cff82af..8568ae42e867 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3899,11 +3899,24 @@ static bool kvm_faultin_pfn(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, if (slot && (slot->flags & KVM_MEMSLOT_INVALID)) goto out_retry; - /* Don't expose private memslots to L2. */ - if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) { - *pfn = KVM_PFN_NOSLOT; - *writable = false; - return false; + if (!kvm_is_visible_memslot(slot)) { + /* Don't expose private memslots to L2. */ + if (is_guest_mode(vcpu)) { + *pfn = KVM_PFN_NOSLOT; + *writable = false; + return false; + } + /* + * If the APIC access page exists but is disabled, go directly + * to emulation without caching the MMIO access or creating a + * MMIO SPTE. That way the cache doesn't need to be purged + * when the AVIC is re-enabled. + */ + if (slot && slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT && + !kvm_apicv_activated(vcpu->kvm)) { + *r = RET_PF_EMULATE; + return true; + } } async = false; -- cgit From 36222b117e36d487172c36bec187628085b92575 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:43 +0300 Subject: KVM: x86: don't disable APICv memslot when inhibited Thanks to the former patches, it is now possible to keep the APICv memslot always enabled, and it will be invisible to the guest when it is inhibited This code is based on a suggestion from Sean Christopherson: https://lkml.org/lkml/2021/7/19/2970 Suggested-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-9-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 - arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm/avic.c | 21 ++++++--------------- arch/x86/kvm/svm/svm.c | 1 - arch/x86/kvm/svm/svm.h | 1 - arch/x86/kvm/x86.c | 21 ++++++++------------- 6 files changed, 14 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index a12a4987154e..cefe1d81e2e8 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -72,7 +72,6 @@ KVM_X86_OP(enable_nmi_window) KVM_X86_OP(enable_irq_window) KVM_X86_OP(update_cr8_intercept) KVM_X86_OP(check_apicv_inhibit_reasons) -KVM_X86_OP_NULL(pre_update_apicv_exec_ctrl) KVM_X86_OP(refresh_apicv_exec_ctrl) KVM_X86_OP(hwapic_irr_update) KVM_X86_OP(hwapic_isr_update) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1881f4c92de9..15eefd9498b4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1352,7 +1352,6 @@ struct kvm_x86_ops { void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); bool (*check_apicv_inhibit_reasons)(ulong bit); - void (*pre_update_apicv_exec_ctrl)(struct kvm *kvm, bool activate); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index a8ad78a2faa1..d0acbeeab3d6 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -225,31 +225,26 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, * field of the VMCB. Therefore, we set up the * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here. */ -static int avic_update_access_page(struct kvm *kvm, bool activate) +static int avic_alloc_access_page(struct kvm *kvm) { void __user *ret; int r = 0; mutex_lock(&kvm->slots_lock); - /* - * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger - * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT - * memory region. So, we need to ensure that kvm->mm == current->mm. - */ - if ((kvm->arch.apic_access_memslot_enabled == activate) || - (kvm->mm != current->mm)) + + if (kvm->arch.apic_access_memslot_enabled) goto out; ret = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, APIC_DEFAULT_PHYS_BASE, - activate ? PAGE_SIZE : 0); + PAGE_SIZE); if (IS_ERR(ret)) { r = PTR_ERR(ret); goto out; } - kvm->arch.apic_access_memslot_enabled = activate; + kvm->arch.apic_access_memslot_enabled = true; out: mutex_unlock(&kvm->slots_lock); return r; @@ -270,7 +265,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) if (kvm_apicv_activated(vcpu->kvm)) { int ret; - ret = avic_update_access_page(vcpu->kvm, true); + ret = avic_alloc_access_page(vcpu->kvm); if (ret) return ret; } @@ -918,10 +913,6 @@ bool svm_check_apicv_inhibit_reasons(ulong bit) return supported & BIT(bit); } -void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate) -{ - avic_update_access_page(kvm, activate); -} static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2b6632d4c76f..b1662d6baa71 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4581,7 +4581,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_virtual_apic_mode = svm_set_virtual_apic_mode, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons, - .pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index bd0fe94c2920..bd41f2a32838 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -534,7 +534,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu); void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); bool svm_check_apicv_inhibit_reasons(ulong bit); -void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate); void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4e97f7cd412e..4d720a0cdd80 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9255,13 +9255,6 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); -/* - * NOTE: Do not hold any lock prior to calling this. - * - * In particular, kvm_request_apicv_update() expects kvm->srcu not to be - * locked, because it calls __x86_set_memory_region() which does - * synchronize_srcu(&kvm->srcu). - */ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) { unsigned long old, new, expected; @@ -9282,14 +9275,16 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new); } while (old != expected); - if (!!old == !!new) - return; + if (!!old != !!new) { + trace_kvm_apicv_update_request(activate, bit); + kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); + if (new) { + unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE); - trace_kvm_apicv_update_request(activate, bit); - if (kvm_x86_ops.pre_update_apicv_exec_ctrl) - static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate); + kvm_zap_gfn_range(kvm, gfn, gfn+1); + } + } - kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); -- cgit From b0a1637f64b06586752cc507b94e4aeff02588d6 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:44 +0300 Subject: KVM: x86: APICv: fix race in kvm_request_apicv_update on SVM Currently on SVM, the kvm_request_apicv_update toggles the APICv memslot without doing any synchronization. If there is a mismatch between that memslot state and the AVIC state, on one of the vCPUs, an APIC mmio access can be lost: For example: VCPU0: enable the APIC_ACCESS_PAGE_PRIVATE_MEMSLOT VCPU1: access an APIC mmio register. Since AVIC is still disabled on VCPU1, the access will not be intercepted by it, and neither will it cause MMIO fault, but rather it will just be read/written from/to the dummy page mapped into the APIC_ACCESS_PAGE_PRIVATE_MEMSLOT. Fix that by adding a lock guarding the AVIC state changes, and carefully order the operations of kvm_request_apicv_update to avoid this race: 1. Take the lock 2. Send KVM_REQ_APICV_UPDATE 3. Update the apic inhibit reason 4. Release the lock This ensures that at (2) all vCPUs are kicked out of the guest mode, but don't yet see the new avic state. Then only after (4) all other vCPUs can update their AVIC state and resume. Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-10-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/x86.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 15eefd9498b4..20a3ffe14ff2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1053,6 +1053,9 @@ struct kvm_arch { struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; + /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */ + struct mutex apicv_update_lock; + bool apic_access_memslot_enabled; unsigned long apicv_inhibit_reasons; @@ -1736,6 +1739,9 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); void kvm_request_apicv_update(struct kvm *kvm, bool activate, unsigned long bit); +void __kvm_request_apicv_update(struct kvm *kvm, bool activate, + unsigned long bit); + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4d720a0cdd80..89e666e5a707 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8579,6 +8579,8 @@ EXPORT_SYMBOL_GPL(kvm_apicv_activated); static void kvm_apicv_init(struct kvm *kvm) { + mutex_init(&kvm->arch.apicv_update_lock); + if (enable_apicv) clear_bit(APICV_INHIBIT_REASON_DISABLE, &kvm->arch.apicv_inhibit_reasons); @@ -9240,6 +9242,8 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + mutex_lock(&vcpu->kvm->arch.apicv_update_lock); + vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm); kvm_apic_update_apicv(vcpu); static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu); @@ -9252,39 +9256,44 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) */ if (!vcpu->arch.apicv_active) kvm_make_request(KVM_REQ_EVENT, vcpu); + + mutex_unlock(&vcpu->kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); -void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) { - unsigned long old, new, expected; + unsigned long old, new; if (!kvm_x86_ops.check_apicv_inhibit_reasons || !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit)) return; - old = READ_ONCE(kvm->arch.apicv_inhibit_reasons); - do { - expected = new = old; - if (activate) - __clear_bit(bit, &new); - else - __set_bit(bit, &new); - if (new == old) - break; - old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new); - } while (old != expected); + old = new = kvm->arch.apicv_inhibit_reasons; + + if (activate) + __clear_bit(bit, &new); + else + __set_bit(bit, &new); if (!!old != !!new) { trace_kvm_apicv_update_request(activate, bit); kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); + kvm->arch.apicv_inhibit_reasons = new; if (new) { unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE); - kvm_zap_gfn_range(kvm, gfn, gfn+1); } - } + } else + kvm->arch.apicv_inhibit_reasons = new; +} +EXPORT_SYMBOL_GPL(__kvm_request_apicv_update); +void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +{ + mutex_lock(&kvm->arch.apicv_update_lock); + __kvm_request_apicv_update(kvm, activate, bit); + mutex_unlock(&kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); -- cgit From 4628efcd4e8963a41e877318cd10346dea9a6e00 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:45 +0300 Subject: KVM: SVM: add warning for mistmatch between AVIC vcpu state and AVIC inhibition It is never a good idea to enter a guest on a vCPU when the AVIC inhibition state doesn't match the enablement of the AVIC on the vCPU. Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-11-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b1662d6baa71..22376fdd94f8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3779,6 +3779,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) pre_svm_run(vcpu); + WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu)); + sync_lapic_to_cr8(vcpu); if (unlikely(svm->asid != svm->vmcb->control.asid)) { -- cgit From 0f250a646382e017725001a552624be0c86527bf Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 10 Aug 2021 23:52:46 +0300 Subject: KVM: x86: hyper-v: Deactivate APICv only when AutoEOI feature is in use APICV_INHIBIT_REASON_HYPERV is currently unconditionally forced upon SynIC activation as SynIC's AutoEOI is incompatible with APICv/AVIC. It is, however, possible to track whether the feature was actually used by the guest and only inhibit APICv/AVIC when needed. TLFS suggests a dedicated 'HV_DEPRECATING_AEOI_RECOMMENDED' flag to let Windows know that AutoEOI feature should be avoided. While it's up to KVM userspace to set the flag, KVM can help a bit by exposing global APICv/AVIC enablement. Maxim: - always set HV_DEPRECATING_AEOI_RECOMMENDED in kvm_get_hv_cpuid, since this feature can be used regardless of AVIC Paolo: - use arch.apicv_update_lock to protect the hv->synic_auto_eoi_used instead of atomic ops Signed-off-by: Vitaly Kuznetsov Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-12-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 6 ++++++ arch/x86/kvm/hyperv.c | 32 ++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 20a3ffe14ff2..28c85c80e831 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -981,6 +981,12 @@ struct kvm_hv { /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; + /* + * How many SynICs use 'AutoEOI' feature + * (protected by arch.apicv_update_lock) + */ + unsigned int synic_auto_eoi_used; + struct hv_partition_assist_pg *hv_pa_pg; struct kvm_hv_syndbg hv_syndbg; }; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0b38f944c6b6..fe4a02715266 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -88,6 +88,10 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, int vector) { + struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); + struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); + int auto_eoi_old, auto_eoi_new; + if (vector < HV_SYNIC_FIRST_VALID_VECTOR) return; @@ -96,10 +100,30 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, else __clear_bit(vector, synic->vec_bitmap); + auto_eoi_old = bitmap_weight(synic->auto_eoi_bitmap, 256); + if (synic_has_vector_auto_eoi(synic, vector)) __set_bit(vector, synic->auto_eoi_bitmap); else __clear_bit(vector, synic->auto_eoi_bitmap); + + auto_eoi_new = bitmap_weight(synic->auto_eoi_bitmap, 256); + + if (!!auto_eoi_old == !!auto_eoi_new) + return; + + mutex_lock(&vcpu->kvm->arch.apicv_update_lock); + + if (auto_eoi_new) + hv->synic_auto_eoi_used++; + else + hv->synic_auto_eoi_used--; + + __kvm_request_apicv_update(vcpu->kvm, + !hv->synic_auto_eoi_used, + APICV_INHIBIT_REASON_HYPERV); + + mutex_unlock(&vcpu->kvm->arch.apicv_update_lock); } static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, @@ -933,12 +957,6 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages) synic = to_hv_synic(vcpu); - /* - * Hyper-V SynIC auto EOI SINT's are - * not compatible with APICV, so request - * to deactivate APICV permanently. - */ - kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV); synic->active = true; synic->dont_zero_synic_pages = dont_zero_synic_pages; synic->control = HV_SYNIC_CONTROL_ENABLE; @@ -2476,6 +2494,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; if (!cpu_smt_possible()) ent->eax |= HV_X64_NO_NONARCH_CORESHARING; + + ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED; /* * Default number of spinlock retry attempts, matches * HyperV 2016. -- cgit From 30eed56a7e1cbefe933a33d661827e5c72cd136f Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:47 +0300 Subject: KVM: SVM: remove svm_toggle_avic_for_irq_window Now that kvm_request_apicv_update doesn't need to drop the kvm->srcu lock, we can call kvm_request_apicv_update directly. Signed-off-by: Maxim Levitsky Reviewed-by: Paolo Bonzini Message-Id: <20210810205251.424103-13-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 11 ----------- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/svm/svm.h | 1 - 3 files changed, 2 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index d0acbeeab3d6..1def54c26259 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -582,17 +582,6 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu) avic_handle_ldr_update(vcpu); } -void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate) -{ - if (!enable_apicv || !lapic_in_kernel(vcpu)) - return; - - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - kvm_request_apicv_update(vcpu->kvm, activate, - APICV_INHIBIT_REASON_IRQWIN); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); -} - void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { return; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 22376fdd94f8..4e76e78c98ec 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2992,7 +2992,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu) * In this case AVIC was temporarily disabled for * requesting the IRQ window and we have to re-enable it. */ - svm_toggle_avic_for_irq_window(vcpu, true); + kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN); ++vcpu->stat.irq_window_exits; return 1; @@ -3544,7 +3544,7 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu) * via AVIC. In such case, we need to temporarily disable AVIC, * and fallback to injecting IRQ via V_IRQ. */ - svm_toggle_avic_for_irq_window(vcpu, false); + kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN); svm_set_vintr(svm); } } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index bd41f2a32838..aae851762b59 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -524,7 +524,6 @@ int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); void avic_init_vmcb(struct vcpu_svm *svm); -void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -- cgit From 06ef813466c63ff1a61b5f99592e58d049c2c1ac Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:48 +0300 Subject: KVM: SVM: avoid refreshing avic if its state didn't change Since AVIC can be inhibited and uninhibited rapidly it is possible that we have nothing to do by the time the svm_refresh_apicv_exec_ctrl is called. Detect and avoid this, which will be useful when we will start calling avic_vcpu_load/avic_vcpu_put when the avic inhibition state changes. Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-14-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 89e666e5a707..bf8cb1021d11 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9239,12 +9239,18 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) { + bool activate; + if (!lapic_in_kernel(vcpu)) return; mutex_lock(&vcpu->kvm->arch.apicv_update_lock); - vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm); + activate = kvm_apicv_activated(vcpu->kvm); + if (vcpu->arch.apicv_active == activate) + goto out; + + vcpu->arch.apicv_active = activate; kvm_apic_update_apicv(vcpu); static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu); @@ -9257,6 +9263,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) if (!vcpu->arch.apicv_active) kvm_make_request(KVM_REQ_EVENT, vcpu); +out: mutex_unlock(&vcpu->kvm->arch.apicv_update_lock); } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); -- cgit From bf5f6b9d7ad6b88df15d691d9759f9a397488c7e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:49 +0300 Subject: KVM: SVM: move check for kvm_vcpu_apicv_active outside of avic_vcpu_{put|load} No functional change intended. Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-15-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 10 ++++------ arch/x86/kvm/svm/svm.c | 7 +++++-- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1def54c26259..e7728b16a46f 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -940,9 +940,6 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); - if (!kvm_vcpu_apicv_active(vcpu)) - return; - /* * Since the host physical APIC id is 8 bits, * we can support host APIC ID upto 255. @@ -970,9 +967,6 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) u64 entry; struct vcpu_svm *svm = to_svm(vcpu); - if (!kvm_vcpu_apicv_active(vcpu)) - return; - entry = READ_ONCE(*(svm->avic_physical_id_cache)); if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) avic_update_iommu_vcpu_affinity(vcpu, -1, 0); @@ -989,6 +983,10 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) struct vcpu_svm *svm = to_svm(vcpu); svm->avic_is_running = is_run; + + if (!kvm_vcpu_apicv_active(vcpu)) + return; + if (is_run) avic_vcpu_load(vcpu, vcpu->cpu); else diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4e76e78c98ec..114c7e29467b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1481,12 +1481,15 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) sd->current_vmcb = svm->vmcb; indirect_branch_prediction_barrier(); } - avic_vcpu_load(vcpu, cpu); + if (kvm_vcpu_apicv_active(vcpu)) + avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { - avic_vcpu_put(vcpu); + if (kvm_vcpu_apicv_active(vcpu)) + avic_vcpu_put(vcpu); + svm_prepare_host_switch(vcpu); ++vcpu->stat.host_state_reload; -- cgit From df7e4827c5490a6a0cc41341497f5267712511cf Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:50 +0300 Subject: KVM: SVM: call avic_vcpu_load/avic_vcpu_put when enabling/disabling AVIC Currently it is possible to have the following scenario: 1. AVIC is disabled by svm_refresh_apicv_exec_ctrl 2. svm_vcpu_blocking calls avic_vcpu_put which does nothing 3. svm_vcpu_unblocking enables the AVIC (due to KVM_REQ_APICV_UPDATE) and then calls avic_vcpu_load 4. warning is triggered in avic_vcpu_load since AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK was never cleared While it is possible to just remove the warning, it seems to be more robust to fully disable/enable AVIC in svm_refresh_apicv_exec_ctrl by calling the avic_vcpu_load/avic_vcpu_put Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-16-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index e7728b16a46f..01c0e83e1b71 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -651,6 +651,11 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) } vmcb_mark_dirty(vmcb, VMCB_AVIC); + if (activated) + avic_vcpu_load(vcpu, vcpu->cpu); + else + avic_vcpu_put(vcpu); + svm_set_pi_irte_mode(vcpu, activated); } -- cgit From 73143035c214f3b0ac5cc2393197f828adeefc1e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 10 Aug 2021 23:52:51 +0300 Subject: KVM: SVM: AVIC: drop unsupported AVIC base relocation code APIC base relocation is not supported anyway and won't work correctly so just drop the code that handles it and keep AVIC MMIO bar at the default APIC base. Signed-off-by: Maxim Levitsky Message-Id: <20210810205251.424103-17-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/avic.c | 2 ++ arch/x86/kvm/svm/svm.c | 7 ------- arch/x86/kvm/svm/svm.h | 6 ------ 3 files changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 01c0e83e1b71..8052d92069e0 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -197,6 +197,8 @@ void avic_init_vmcb(struct vcpu_svm *svm) vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT; + vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK; + if (kvm_apicv_activated(svm->vcpu.kvm)) vmcb->control.int_ctl |= AVIC_ENABLE_MASK; else diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 114c7e29467b..7b58e445a967 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1314,9 +1314,6 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) svm->virt_spec_ctrl = 0; init_vmcb(vcpu); - - if (kvm_vcpu_apicv_active(vcpu) && !init_event) - avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); } void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb) @@ -2967,10 +2964,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->msr_decfg = data; break; } - case MSR_IA32_APICBASE: - if (kvm_vcpu_apicv_active(vcpu)) - avic_update_vapic_bar(to_svm(vcpu), data); - fallthrough; default: return kvm_set_msr_common(vcpu, msr); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index aae851762b59..524d943f3efc 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -503,12 +503,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops; #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL -static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data) -{ - svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK; - vmcb_mark_dirty(svm->vmcb, VMCB_AVIC); -} - static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); -- cgit From f95937ccf5bd5e0a6bbac2b8e65a87982ffae403 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 2 Aug 2021 16:56:29 +0000 Subject: KVM: stats: Support linear and logarithmic histogram statistics Add new types of KVM stats, linear and logarithmic histogram. Histogram are very useful for observing the value distribution of time or size related stats. Signed-off-by: Jing Zhang Message-Id: <20210802165633.1866976-2-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/guest.c | 4 ---- arch/mips/kvm/mips.c | 4 ---- arch/powerpc/kvm/book3s.c | 4 ---- arch/powerpc/kvm/booke.c | 4 ---- arch/s390/kvm/kvm-s390.c | 4 ---- arch/x86/kvm/x86.c | 4 ---- 6 files changed, 24 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 1dfb83578277..5188184d25d0 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -31,8 +31,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -52,8 +50,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), STATS_DESC_COUNTER(VCPU, exits) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index af9dd029a4e1..75c6f264c626 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { KVM_GENERIC_VM_STATS() }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -85,8 +83,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits), #endif }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 79833f78d1da..5cc6e90095b0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -43,8 +43,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -88,8 +86,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, pthru_host), STATS_DESC_COUNTER(VCPU, pthru_bad_aff) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 551b30d84aee..5ed6c235e059 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -41,8 +41,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_ICOUNTER(VM, num_2M_pages), STATS_DESC_ICOUNTER(VM, num_1G_pages) }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -79,8 +77,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, pthru_host), STATS_DESC_COUNTER(VCPU, pthru_bad_aff) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 02574d7b3612..4dc7e966a720 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -66,8 +66,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, inject_service_signal), STATS_DESC_COUNTER(VM, inject_virtio) }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -174,8 +172,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), STATS_DESC_COUNTER(VCPU, pfault_sync) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bf8cb1021d11..9425589f34ca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -238,8 +238,6 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size), STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions) }; -static_assert(ARRAY_SIZE(kvm_vm_stats_desc) == - sizeof(struct kvm_vm_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vm_stats_header = { .name_size = KVM_STATS_NAME_SIZE, @@ -279,8 +277,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_ICOUNTER(VCPU, guest_mode) }; -static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == - sizeof(struct kvm_vcpu_stat) / sizeof(u64)); const struct kvm_stats_header kvm_vcpu_stats_header = { .name_size = KVM_STATS_NAME_SIZE, -- cgit From 87bcc5fa092f82a9890f9e73e4f4c7016ef64049 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 2 Aug 2021 16:56:32 +0000 Subject: KVM: stats: Add halt_wait_ns stats for all architectures Add simple stats halt_wait_ns to record the time a VCPU has spent on waiting for all architectures (not just powerpc). Signed-off-by: Jing Zhang Message-Id: <20210802165633.1866976-5-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- arch/powerpc/include/asm/kvm_host.h | 1 - arch/powerpc/kvm/book3s.c | 1 - arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/kvm/booke.c | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9f52f282b1aa..4931d03e5799 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -103,7 +103,6 @@ struct kvm_vcpu_stat { u64 emulated_inst_exits; u64 dec_exits; u64 ext_intr_exits; - u64 halt_wait_ns; u64 halt_successful_wait; u64 dbell_exits; u64 gdbell_exits; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5cc6e90095b0..b785f6772391 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -69,7 +69,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, emulated_inst_exits), STATS_DESC_COUNTER(VCPU, dec_exits), STATS_DESC_COUNTER(VCPU, ext_intr_exits), - STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns), STATS_DESC_COUNTER(VCPU, halt_successful_wait), STATS_DESC_COUNTER(VCPU, dbell_exits), STATS_DESC_COUNTER(VCPU, gdbell_exits), diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1d1fcc290fca..813ca155561b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4144,7 +4144,7 @@ out: /* Attribute wait time */ if (do_sleep) { - vc->runner->stat.halt_wait_ns += + vc->runner->stat.generic.halt_wait_ns += ktime_to_ns(cur) - ktime_to_ns(start_wait); /* Attribute failed poll time */ if (vc->halt_poll_ns) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5ed6c235e059..977801c83aff 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -67,7 +67,6 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, emulated_inst_exits), STATS_DESC_COUNTER(VCPU, dec_exits), STATS_DESC_COUNTER(VCPU, ext_intr_exits), - STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns), STATS_DESC_COUNTER(VCPU, halt_successful_wait), STATS_DESC_COUNTER(VCPU, dbell_exits), STATS_DESC_COUNTER(VCPU, gdbell_exits), -- cgit From 8ccba534a1a5c6565220c81113d6157571f380cb Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Mon, 2 Aug 2021 16:56:33 +0000 Subject: KVM: stats: Add halt polling related histogram stats Add three log histogram stats to record the distribution of time spent on successful polling, failed polling and VCPU wait. halt_poll_success_hist: Distribution of spent time for a successful poll. halt_poll_fail_hist: Distribution of spent time for a failed poll. halt_wait_hist: Distribution of time a VCPU has spent on waiting. Signed-off-by: Jing Zhang Message-Id: <20210802165633.1866976-6-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_hv.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 813ca155561b..6d63c8e6d4f0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4146,17 +4146,29 @@ out: if (do_sleep) { vc->runner->stat.generic.halt_wait_ns += ktime_to_ns(cur) - ktime_to_ns(start_wait); + KVM_STATS_LOG_HIST_UPDATE( + vc->runner->stat.generic.halt_wait_hist, + ktime_to_ns(cur) - ktime_to_ns(start_wait)); /* Attribute failed poll time */ - if (vc->halt_poll_ns) + if (vc->halt_poll_ns) { vc->runner->stat.generic.halt_poll_fail_ns += ktime_to_ns(start_wait) - ktime_to_ns(start_poll); + KVM_STATS_LOG_HIST_UPDATE( + vc->runner->stat.generic.halt_poll_fail_hist, + ktime_to_ns(start_wait) - + ktime_to_ns(start_poll)); + } } else { /* Attribute successful poll time */ - if (vc->halt_poll_ns) + if (vc->halt_poll_ns) { vc->runner->stat.generic.halt_poll_success_ns += ktime_to_ns(cur) - ktime_to_ns(start_poll); + KVM_STATS_LOG_HIST_UPDATE( + vc->runner->stat.generic.halt_poll_success_hist, + ktime_to_ns(cur) - ktime_to_ns(start_poll)); + } } /* Adjust poll time */ -- cgit From 4293ddb788c1a98bdfa6479bcfd63ad5ce0a5ce6 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 2 Aug 2021 21:46:05 -0700 Subject: KVM: x86/mmu: Remove redundant spte present check in mmu_set_spte Drop an unnecessary is_shadow_present_pte() check when updating the rmaps after installing a non-MMIO SPTE. set_spte() is used only to create shadow-present SPTEs, e.g. MMIO SPTEs are handled early on, mmu_set_spte() runs with mmu_lock held for write, i.e. the SPTE can't be zapped between writing the SPTE and updating the rmaps. Opportunistically combine the "new SPTE" logic for large pages and rmaps. No functional change intended. Suggested-by: Ben Gardon Reviewed-by: David Matlack Reviewed-by: Ben Gardon Reviewed-by: Sean Christopherson Signed-off-by: Mingwei Zhang Message-Id: <20210803044607.599629-2-mizhang@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8568ae42e867..2f6458bca65d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2776,17 +2776,13 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, pgprintk("%s: setting spte %llx\n", __func__, *sptep); trace_kvm_mmu_set_spte(level, gfn, sptep); - if (!was_rmapped && is_large_pte(*sptep)) - ++vcpu->kvm->stat.lpages; - if (is_shadow_present_pte(*sptep)) { - if (!was_rmapped) { - rmap_count = rmap_add(vcpu, sptep, gfn); - if (rmap_count > vcpu->kvm->stat.max_mmu_rmap_size) - vcpu->kvm->stat.max_mmu_rmap_size = rmap_count; - if (rmap_count > RMAP_RECYCLE_THRESHOLD) - rmap_recycle(vcpu, sptep, gfn); - } + if (!was_rmapped) { + if (is_large_pte(*sptep)) + ++vcpu->kvm->stat.lpages; + rmap_count = rmap_add(vcpu, sptep, gfn); + if (rmap_count > RMAP_RECYCLE_THRESHOLD) + rmap_recycle(vcpu, sptep, gfn); } return ret; -- cgit From 088acd23526647844aec1c39db4ad02552c86c7b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 2 Aug 2021 21:46:06 -0700 Subject: KVM: x86/mmu: Avoid collision with !PRESENT SPTEs in TDP MMU lpage stats Factor in whether or not the old/new SPTEs are shadow-present when adjusting the large page stats in the TDP MMU. A modified MMIO SPTE can toggle the page size bit, as bit 7 is used to store the MMIO generation, i.e. is_large_pte() can get a false positive when called on a MMIO SPTE. Ditto for nuking SPTEs with REMOVED_SPTE, which sets bit 7 in its magic value. Opportunistically move the logic below the check to verify at least one of the old/new SPTEs is shadow present. Use is/was_leaf even though is/was_present would suffice. The code generation is roughly equivalent since all flags need to be computed prior to the code in question, and using the *_leaf flags will minimize the diff in a future enhancement to account all pages, i.e. will change the check to "is_leaf != was_leaf". Reviewed-by: David Matlack Reviewed-by: Ben Gardon Fixes: 1699f65c8b65 ("kvm/x86: Fix 'lpages' kvm stat for TDM MMU") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Mingwei Zhang Message-Id: <20210803044607.599629-3-mizhang@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index fb1b2dc7a6d1..85f69558b490 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -412,6 +412,7 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, bool was_leaf = was_present && is_last_spte(old_spte, level); bool is_leaf = is_present && is_last_spte(new_spte, level); bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); + bool was_large, is_large; WARN_ON(level > PT64_ROOT_MAX_LEVEL); WARN_ON(level < PG_LEVEL_4K); @@ -445,13 +446,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); - if (is_large_pte(old_spte) != is_large_pte(new_spte)) { - if (is_large_pte(old_spte)) - atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages); - else - atomic64_add(1, (atomic64_t*)&kvm->stat.lpages); - } - /* * The only times a SPTE should be changed from a non-present to * non-present state is when an MMIO entry is installed/modified/ @@ -477,6 +471,18 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, return; } + /* + * Update large page stats if a large page is being zapped, created, or + * is replacing an existing shadow page. + */ + was_large = was_leaf && is_large_pte(old_spte); + is_large = is_leaf && is_large_pte(new_spte); + if (was_large != is_large) { + if (was_large) + atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages); + else + atomic64_add(1, (atomic64_t *)&kvm->stat.lpages); + } if (was_leaf && is_dirty_spte(old_spte) && (!is_present || !is_dirty_spte(new_spte) || pfn_changed)) -- cgit From 71f51d2c3253645ccff69d6fa3a870f47005f0b3 Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Mon, 2 Aug 2021 21:46:07 -0700 Subject: KVM: x86/mmu: Add detailed page size stats Existing KVM code tracks the number of large pages regardless of their sizes. Therefore, when large page of 1GB (or larger) is adopted, the information becomes less useful because lpages counts a mix of 1G and 2M pages. So remove the lpages since it is easy for user space to aggregate the info. Instead, provide a comprehensive page stats of all sizes from 4K to 512G. Suggested-by: Ben Gardon Reviewed-by: David Matlack Reviewed-by: Ben Gardon Signed-off-by: Mingwei Zhang Cc: Jing Zhang Cc: David Matlack Cc: Sean Christopherson Message-Id: <20210803044607.599629-4-mizhang@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 9 ++++++++- arch/x86/kvm/mmu.h | 4 ++++ arch/x86/kvm/mmu/mmu.c | 32 ++++++++++++++++---------------- arch/x86/kvm/mmu/tdp_mmu.c | 15 ++------------- arch/x86/kvm/x86.c | 4 +++- 5 files changed, 33 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 28c85c80e831..4c4983a9378c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1214,7 +1214,14 @@ struct kvm_vm_stat { u64 mmu_recycled; u64 mmu_cache_miss; u64 mmu_unsync; - u64 lpages; + union { + struct { + atomic64_t pages_4k; + atomic64_t pages_2m; + atomic64_t pages_1g; + }; + atomic64_t pages[KVM_NR_PAGE_SIZES]; + }; u64 nx_lpage_splits; u64 max_mmu_page_hash_collisions; u64 max_mmu_rmap_size; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 59e831a8ab9d..e9688a9f7b57 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -261,4 +261,8 @@ kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level) return __kvm_mmu_slot_lpages(slot, slot->npages, level); } +static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count) +{ + atomic64_add(count, &kvm->stat.pages[level - 1]); +} #endif diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2f6458bca65d..54cb15e4b550 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -604,10 +604,11 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) * state bits, it is used to clear the last level sptep. * Returns the old PTE. */ -static u64 mmu_spte_clear_track_bits(u64 *sptep) +static int mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep) { kvm_pfn_t pfn; u64 old_spte = *sptep; + int level = sptep_to_sp(sptep)->role.level; if (!spte_has_volatile_bits(old_spte)) __update_clear_spte_fast(sptep, 0ull); @@ -617,6 +618,8 @@ static u64 mmu_spte_clear_track_bits(u64 *sptep) if (!is_shadow_present_pte(old_spte)) return old_spte; + kvm_update_page_stats(kvm, level, -1); + pfn = spte_to_pfn(old_spte); /* @@ -1001,14 +1004,15 @@ static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) } } -static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep) +static void pte_list_remove(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + u64 *sptep) { - mmu_spte_clear_track_bits(sptep); + mmu_spte_clear_track_bits(kvm, sptep); __pte_list_remove(sptep, rmap_head); } /* Return true if rmap existed, false otherwise */ -static bool pte_list_destroy(struct kvm_rmap_head *rmap_head) +static bool pte_list_destroy(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc, *next; int i; @@ -1017,7 +1021,7 @@ static bool pte_list_destroy(struct kvm_rmap_head *rmap_head) return false; if (!(rmap_head->val & 1)) { - mmu_spte_clear_track_bits((u64 *)rmap_head->val); + mmu_spte_clear_track_bits(kvm, (u64 *)rmap_head->val); goto out; } @@ -1025,7 +1029,7 @@ static bool pte_list_destroy(struct kvm_rmap_head *rmap_head) for (; desc; desc = next) { for (i = 0; i < desc->spte_count; i++) - mmu_spte_clear_track_bits(desc->sptes[i]); + mmu_spte_clear_track_bits(kvm, desc->sptes[i]); next = desc->more; mmu_free_pte_list_desc(desc); } @@ -1188,7 +1192,7 @@ out: static void drop_spte(struct kvm *kvm, u64 *sptep) { - u64 old_spte = mmu_spte_clear_track_bits(sptep); + u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep); if (is_shadow_present_pte(old_spte)) rmap_remove(kvm, sptep); @@ -1200,7 +1204,6 @@ static bool __drop_large_spte(struct kvm *kvm, u64 *sptep) if (is_large_pte(*sptep)) { WARN_ON(sptep_to_sp(sptep)->role.level == PG_LEVEL_4K); drop_spte(kvm, sptep); - --kvm->stat.lpages; return true; } @@ -1450,7 +1453,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, const struct kvm_memory_slot *slot) { - return pte_list_destroy(rmap_head); + return pte_list_destroy(kvm, rmap_head); } static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, @@ -1481,13 +1484,13 @@ restart: need_flush = 1; if (pte_write(pte)) { - pte_list_remove(rmap_head, sptep); + pte_list_remove(kvm, rmap_head, sptep); goto restart; } else { new_spte = kvm_mmu_changed_pte_notifier_make_spte( *sptep, new_pfn); - mmu_spte_clear_track_bits(sptep); + mmu_spte_clear_track_bits(kvm, sptep); mmu_spte_set(sptep, new_spte); } } @@ -2292,8 +2295,6 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, if (is_shadow_present_pte(pte)) { if (is_last_spte(pte, sp->role.level)) { drop_spte(kvm, spte); - if (is_large_pte(pte)) - --kvm->stat.lpages; } else { child = to_shadow_page(pte & PT64_BASE_ADDR_MASK); drop_parent_pte(child, spte); @@ -2778,8 +2779,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, trace_kvm_mmu_set_spte(level, gfn, sptep); if (!was_rmapped) { - if (is_large_pte(*sptep)) - ++vcpu->kvm->stat.lpages; + kvm_update_page_stats(vcpu->kvm, level, 1); rmap_count = rmap_add(vcpu, sptep, gfn); if (rmap_count > RMAP_RECYCLE_THRESHOLD) rmap_recycle(vcpu, sptep, gfn); @@ -5809,7 +5809,7 @@ restart: if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn, pfn, PG_LEVEL_NUM)) { - pte_list_remove(rmap_head, sptep); + pte_list_remove(kvm, rmap_head, sptep); if (kvm_available_flush_tlb_with_range()) kvm_flush_remote_tlbs_with_address(kvm, sp->gfn, diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 85f69558b490..db636250972a 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -412,7 +412,6 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, bool was_leaf = was_present && is_last_spte(old_spte, level); bool is_leaf = is_present && is_last_spte(new_spte, level); bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); - bool was_large, is_large; WARN_ON(level > PT64_ROOT_MAX_LEVEL); WARN_ON(level < PG_LEVEL_4K); @@ -471,18 +470,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, return; } - /* - * Update large page stats if a large page is being zapped, created, or - * is replacing an existing shadow page. - */ - was_large = was_leaf && is_large_pte(old_spte); - is_large = is_leaf && is_large_pte(new_spte); - if (was_large != is_large) { - if (was_large) - atomic64_sub(1, (atomic64_t *)&kvm->stat.lpages); - else - atomic64_add(1, (atomic64_t *)&kvm->stat.lpages); - } + if (is_leaf != was_leaf) + kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1); if (was_leaf && is_dirty_spte(old_spte) && (!is_present || !is_dirty_spte(new_spte) || pfn_changed)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9425589f34ca..4e07cae56636 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -233,7 +233,9 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = { STATS_DESC_COUNTER(VM, mmu_recycled), STATS_DESC_COUNTER(VM, mmu_cache_miss), STATS_DESC_ICOUNTER(VM, mmu_unsync), - STATS_DESC_ICOUNTER(VM, lpages), + STATS_DESC_ICOUNTER(VM, pages_4k), + STATS_DESC_ICOUNTER(VM, pages_2m), + STATS_DESC_ICOUNTER(VM, pages_1g), STATS_DESC_ICOUNTER(VM, nx_lpage_splits), STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size), STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions) -- cgit From 9653f2da7522c5e762e2edd2beb53170669d0a2b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 15:45:54 -0700 Subject: KVM: x86/mmu: Drop 'shared' param from tdp_mmu_link_page() Drop @shared from tdp_mmu_link_page() and hardcode it to work for mmu_lock being held for read. The helper has exactly one caller and in all likelihood will only ever have exactly one caller. Even if KVM adds a path to install translations without an initiating page fault, odds are very, very good that the path will just be a wrapper to the "page fault" handler (both SNP and TDX RFCs propose patches to do exactly that). No functional change intended. Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210810224554.2978735-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index db636250972a..64ccfc1fa553 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -255,26 +255,17 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, * * @kvm: kvm instance * @sp: the new page - * @shared: This operation may not be running under the exclusive use of - * the MMU lock and the operation must synchronize with other - * threads that might be adding or removing pages. * @account_nx: This page replaces a NX large page and should be marked for * eventual reclaim. */ static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp, - bool shared, bool account_nx) + bool account_nx) { - if (shared) - spin_lock(&kvm->arch.tdp_mmu_pages_lock); - else - lockdep_assert_held_write(&kvm->mmu_lock); - + spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); if (account_nx) account_huge_nx_page(kvm, sp); - - if (shared) - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } /** @@ -1062,7 +1053,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, !shadow_accessed_mask); if (tdp_mmu_set_spte_atomic_no_dirty_log(vcpu->kvm, &iter, new_spte)) { - tdp_mmu_link_page(vcpu->kvm, sp, true, + tdp_mmu_link_page(vcpu->kvm, sp, huge_page_disallowed && req_level >= iter.level); -- cgit From 7a4bca85b23f7a573da61f161dfbf8b00e9e2955 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 11 Aug 2021 15:29:22 +0300 Subject: KVM: SVM: split svm_handle_invalid_exit Split the check for having a vmexit handler to svm_check_exit_valid, and make svm_handle_invalid_exit only handle a vmexit that is already not valid. Signed-off-by: Maxim Levitsky Message-Id: <20210811122927.900604-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7b58e445a967..e45259177009 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3236,12 +3236,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } -static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) +static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code) { - if (exit_code < ARRAY_SIZE(svm_exit_handlers) && - svm_exit_handlers[exit_code]) - return 0; + return (exit_code < ARRAY_SIZE(svm_exit_handlers) && + svm_exit_handlers[exit_code]); +} +static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) +{ vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code); dump_vmcb(vcpu); vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -3249,14 +3251,13 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) vcpu->run->internal.ndata = 2; vcpu->run->internal.data[0] = exit_code; vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; - - return -EINVAL; + return 0; } int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) { - if (svm_handle_invalid_exit(vcpu, exit_code)) - return 0; + if (!svm_check_exit_valid(vcpu, exit_code)) + return svm_handle_invalid_exit(vcpu, exit_code); #ifdef CONFIG_RETPOLINE if (exit_code == SVM_EXIT_MSR) -- cgit From 61e5f69ef08379cdc74e8f15d3770976ed48480a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Wed, 11 Aug 2021 15:29:26 +0300 Subject: KVM: x86: implement KVM_GUESTDBG_BLOCKIRQ KVM_GUESTDBG_BLOCKIRQ will allow KVM to block all interrupts while running. This change is mostly intended for more robust single stepping of the guest and it has the following benefits when enabled: * Resuming from a breakpoint is much more reliable. When resuming execution from a breakpoint, with interrupts enabled, more often than not, KVM would inject an interrupt and make the CPU jump immediately to the interrupt handler and eventually return to the breakpoint, to trigger it again. From the user point of view it looks like the CPU never executed a single instruction and in some cases that can even prevent forward progress, for example, when the breakpoint is placed by an automated script (e.g lx-symbols), which does something in response to the breakpoint and then continues the guest automatically. If the script execution takes enough time for another interrupt to arrive, the guest will be stuck on the same breakpoint RIP forever. * Normal single stepping is much more predictable, since it won't land the debugger into an interrupt handler. * RFLAGS.TF has less chance to be leaked to the guest: We set that flag behind the guest's back to do single stepping but if single step lands us into an interrupt/exception handler it will be leaked to the guest in the form of being pushed to the stack. This doesn't completely eliminate this problem as exceptions can still happen, but at least this reduces the chances of this happening. Signed-off-by: Maxim Levitsky Message-Id: <20210811122927.900604-6-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/x86.c | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4c4983a9378c..7723865077fd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -222,7 +222,8 @@ enum x86_intercept_stage; KVM_GUESTDBG_USE_HW_BP | \ KVM_GUESTDBG_USE_SW_BP | \ KVM_GUESTDBG_INJECT_BP | \ - KVM_GUESTDBG_INJECT_DB) + KVM_GUESTDBG_INJECT_DB | \ + KVM_GUESTDBG_BLOCKIRQ) #define PFERR_PRESENT_BIT 0 diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a6c327f8ad9e..2ef1f6513c68 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -295,6 +295,7 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW_BP 0x00020000 #define KVM_GUESTDBG_INJECT_DB 0x00040000 #define KVM_GUESTDBG_INJECT_BP 0x00080000 +#define KVM_GUESTDBG_BLOCKIRQ 0x00100000 /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4e07cae56636..1a00af1b076b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8892,6 +8892,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) can_inject = false; } + /* Don't inject interrupts if the user asked to avoid doing so */ + if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) + return 0; + /* * Finally, inject interrupt events. If an event cannot be injected * due to architectural conditions (e.g. IF=0) a window-open exit -- cgit From ec607a564f70519b340f7eb4cfc0f4a6b55285ac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Aug 2021 07:05:58 -0400 Subject: KVM: x86: clamp host mapping level to max_level in kvm_mmu_max_mapping_level This change started as a way to make kvm_mmu_hugepage_adjust a bit simpler, but it does fix two bugs as well. One bug is in zapping collapsible PTEs. If a large page size is disallowed but not all of them, kvm_mmu_max_mapping_level will return the host mapping level and the small PTEs will be zapped up to that level. However, if e.g. 1GB are prohibited, we can still zap 4KB mapping and preserve the 2MB ones. This can happen for example when NX huge pages are in use. The second would happen when userspace backs guest memory with a 1gb hugepage but only assign a subset of the page to the guest. 1gb pages would be disallowed by the memslot, but not 2mb. kvm_mmu_max_mapping_level() would fall through to the host_pfn_mapping_level() logic, see the 1gb hugepage, and map the whole thing into the guest. Fixes: 2f57b7051fe8 ("KVM: x86/mmu: Persist gfn_lpage_is_disallowed() to max_level") Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 54cb15e4b550..bfd2705a7291 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2910,6 +2910,7 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, int max_level) { struct kvm_lpage_info *linfo; + int host_level; max_level = min(max_level, max_huge_page_level); for ( ; max_level > PG_LEVEL_4K; max_level--) { @@ -2921,7 +2922,8 @@ int kvm_mmu_max_mapping_level(struct kvm *kvm, if (max_level == PG_LEVEL_4K) return PG_LEVEL_4K; - return host_pfn_mapping_level(kvm, gfn, pfn, slot); + host_level = host_pfn_mapping_level(kvm, gfn, pfn, slot); + return min(host_level, max_level); } int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn, @@ -2945,17 +2947,12 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn, if (!slot) return PG_LEVEL_4K; - level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level); - if (level == PG_LEVEL_4K) - return level; - - *req_level = level = min(level, max_level); - /* * Enforce the iTLB multihit workaround after capturing the requested * level, which will be used to do precise, accurate accounting. */ - if (huge_page_disallowed) + *req_level = level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level); + if (level == PG_LEVEL_4K || huge_page_disallowed) return PG_LEVEL_4K; /* -- cgit From 746700d21fd52399c97aeb7791584bbf5426983c Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 18 Aug 2021 11:55:47 -0500 Subject: KVM: x86: Allow CPU to force vendor-specific TDP level AMD future CPUs will require a 5-level NPT if host CR4.LA57 is set. To prevent kvm_mmu_get_tdp_level() from incorrectly changing NPT level on behalf of CPUs, add a new parameter in kvm_configure_mmu() to force a fixed TDP level. Signed-off-by: Wei Huang Message-Id: <20210818165549.3771014-2-wei.huang2@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 ++--- arch/x86/kvm/mmu/mmu.c | 10 ++++++++-- arch/x86/kvm/svm/svm.c | 4 +++- arch/x86/kvm/vmx/vmx.c | 3 ++- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7723865077fd..24dac52b7bde 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -717,7 +717,6 @@ struct kvm_vcpu_arch { u64 reserved_gpa_bits; int maxphyaddr; - int max_tdp_level; /* emulate context */ @@ -1766,8 +1765,8 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); -void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level, - int tdp_huge_page_level); +void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + int tdp_max_root_level, int tdp_huge_page_level); static inline u16 kvm_read_ldt(void) { diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index bfd2705a7291..b2e68ce8b722 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -97,6 +97,7 @@ module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); bool tdp_enabled = false; static int max_huge_page_level __read_mostly; +static int tdp_root_level __read_mostly; static int max_tdp_level __read_mostly; enum { @@ -4684,6 +4685,10 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) { + /* tdp_root_level is architecture forced level, use it if nonzero */ + if (tdp_root_level) + return tdp_root_level; + /* Use 5-level TDP if and only if it's useful/necessary. */ if (max_tdp_level == 5 && cpuid_maxphyaddr(vcpu) <= 48) return 4; @@ -5375,10 +5380,11 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) */ } -void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level, - int tdp_huge_page_level) +void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, + int tdp_max_root_level, int tdp_huge_page_level) { tdp_enabled = enable_tdp; + tdp_root_level = tdp_forced_root_level; max_tdp_level = tdp_max_root_level; /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e45259177009..bcffae2e36d2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1013,7 +1013,9 @@ static __init int svm_hardware_setup(void) if (!boot_cpu_has(X86_FEATURE_NPT)) npt_enabled = false; - kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G); + /* Force VM NPT level equal to the host's max NPT level */ + kvm_configure_mmu(npt_enabled, get_max_npt_level(), + get_max_npt_level(), PG_LEVEL_1G); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); /* Note, SEV setup consumes npt_enabled. */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cd913100b300..fada1055f325 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7820,7 +7820,8 @@ static __init int hardware_setup(void) ept_lpage_level = PG_LEVEL_2M; else ept_lpage_level = PG_LEVEL_4K; - kvm_configure_mmu(enable_ept, vmx_get_max_tdp_level(), ept_lpage_level); + kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(), + ept_lpage_level); /* * Only enable PML when hardware supports PML feature, and both EPT -- cgit From cb0f722aff6e9ba970a9fee9263c7821bbe811de Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 18 Aug 2021 11:55:48 -0500 Subject: KVM: x86/mmu: Support shadowing NPT when 5-level paging is enabled in host When the 5-level page table CPU flag is set in the host, but the guest has CR4.LA57=0 (including the case of a 32-bit guest), the top level of the shadow NPT page tables will be fixed, consisting of one pointer to a lower-level table and 511 non-present entries. Extend the existing code that creates the fixed PML4 or PDP table, to provide a fixed PML5 table if needed. This is not needed on EPT because the number of layers in the tables is specified in the EPTP instead of depending on the host CR4. Suggested-by: Paolo Bonzini Signed-off-by: Wei Huang Message-Id: <20210818165549.3771014-3-wei.huang2@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu/mmu.c | 53 ++++++++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 24dac52b7bde..0532f4e84308 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -441,6 +441,7 @@ struct kvm_mmu { u64 *pae_root; u64 *pml4_root; + u64 *pml5_root; /* * check zero bits on shadow page table entries, these diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b2e68ce8b722..c04e30f6e0db 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3536,15 +3536,22 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * the shadow page table may be a PAE or a long mode page table. */ pm_mask = PT_PRESENT_MASK | shadow_me_mask; - if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) { + if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; if (WARN_ON_ONCE(!mmu->pml4_root)) { r = -EIO; goto out_unlock; } - mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; + + if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) { + if (WARN_ON_ONCE(!mmu->pml5_root)) { + r = -EIO; + goto out_unlock; + } + mmu->pml5_root[0] = __pa(mmu->pml4_root) | pm_mask; + } } for (i = 0; i < 4; ++i) { @@ -3563,7 +3570,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) mmu->pae_root[i] = root | pm_mask; } - if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) + if (mmu->shadow_root_level == PT64_ROOT_5LEVEL) + mmu->root_hpa = __pa(mmu->pml5_root); + else if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) mmu->root_hpa = __pa(mmu->pml4_root); else mmu->root_hpa = __pa(mmu->pae_root); @@ -3579,7 +3588,9 @@ out_unlock: static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u64 *pml4_root, *pae_root; + u64 *pml5_root = NULL; + u64 *pml4_root = NULL; + u64 *pae_root; /* * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP @@ -3591,21 +3602,15 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) mmu->shadow_root_level < PT64_ROOT_4LEVEL) return 0; - /* - * This mess only works with 4-level paging and needs to be updated to - * work with 5-level paging. - */ - if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL)) - return -EIO; - - if (mmu->pae_root && mmu->pml4_root) + if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root) return 0; /* * The special roots should always be allocated in concert. Yell and * bail if KVM ends up in a state where only one of the roots is valid. */ - if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root)) + if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root || + mmu->pml5_root)) return -EIO; /* @@ -3616,16 +3621,31 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (!pae_root) return -ENOMEM; +#ifdef CONFIG_X86_64 pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!pml4_root) { - free_page((unsigned long)pae_root); - return -ENOMEM; + if (!pml4_root) + goto err_pml4; + + if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) { + pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + if (!pml5_root) + goto err_pml5; } +#endif mmu->pae_root = pae_root; mmu->pml4_root = pml4_root; + mmu->pml5_root = pml5_root; return 0; + +#ifdef CONFIG_X86_64 +err_pml5: + free_page((unsigned long)pml4_root); +err_pml4: + free_page((unsigned long)pae_root); + return -ENOMEM; +#endif } void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) @@ -5461,6 +5481,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu) set_memory_encrypted((unsigned long)mmu->pae_root, 1); free_page((unsigned long)mmu->pae_root); free_page((unsigned long)mmu->pml4_root); + free_page((unsigned long)mmu->pml5_root); } static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) -- cgit From 43e540cc9f2ca12a2364ddf64e5ef929a546550d Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 18 Aug 2021 11:55:49 -0500 Subject: KVM: SVM: Add 5-level page table support for SVM When the 5-level page table is enabled on host OS, the nested page table for guest VMs must use 5-level as well. Update get_npt_level() function to reflect this requirement. In the meanwhile, remove the code that prevents kvm-amd driver from being loaded when 5-level page table is detected. Signed-off-by: Wei Huang Message-Id: <20210818165549.3771014-4-wei.huang2@amd.com> [Tweak condition as suggested by Sean. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bcffae2e36d2..1a70e11f0487 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -259,7 +259,7 @@ u32 svm_msrpm_offset(u32 msr) static int get_max_npt_level(void) { #ifdef CONFIG_X86_64 - return PT64_ROOT_4LEVEL; + return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; #else return PT32E_ROOT_LEVEL; #endif @@ -460,11 +460,6 @@ static int has_svm(void) return 0; } - if (pgtable_l5_enabled()) { - pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n"); - return 0; - } - return 1; } -- cgit From 126b39368604dbf155ce6dad8781a0454477022a Mon Sep 17 00:00:00 2001 From: Huilong Deng Date: Tue, 17 Aug 2021 10:53:38 +0800 Subject: MIPS: Return true/false (not 1/0) from bool functions ./arch/mips/kernel/uprobes.c:261:8-9: WARNING: return of 0/1 in function 'arch_uprobe_skip_sstep' with return type bool ./arch/mips/kernel/uprobes.c:78:10-11: WARNING: return of 0/1 in function 'is_trap_insn' with return type bool ./arch/mips/kvm/mmu.c:489:9-10: WARNING: return of 0/1 in function 'kvm_test_age_gfn' with return type bool ./arch/mips/kvm/mmu.c:445:8-9: WARNING: return of 0/1 in function 'kvm_unmap_gfn_range' with return type bool Signed-off-by: Huilong Deng Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/uprobes.c | 10 +++++----- arch/mips/kvm/mmu.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index 6dbe4eab0a0e..9db2a6db5f62 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -75,7 +75,7 @@ bool is_trap_insn(uprobe_opcode_t *insn) case tlt_op: case tltu_op: case tne_op: - return 1; + return true; } break; @@ -87,12 +87,12 @@ bool is_trap_insn(uprobe_opcode_t *insn) case tlti_op: case tltiu_op: case tnei_op: - return 1; + return true; } break; } - return 0; + return false; } #define UPROBE_TRAP_NR ULONG_MAX @@ -254,9 +254,9 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) * See if the instruction can be emulated. * Returns true if instruction was emulated, false otherwise. * - * For now we always emulate so this function just returns 0. + * For now we always emulate so this function just returns false. */ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { - return 0; + return false; } diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 6d1f68cf4edf..1bfd1b501d82 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -442,7 +442,7 @@ static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_mips_flush_gpa_pt(kvm, range->start, range->end); - return 1; + return true; } bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) @@ -486,7 +486,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); if (!gpa_pte) - return 0; + return false; return pte_young(*gpa_pte); } -- cgit From 0181f6f19c6c35b24f1516d8db22f3bbce762633 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Aug 2021 20:04:15 +0300 Subject: MIPS: mscc: ocelot: disable all switch ports by default The ocelot switch driver used to ignore ports which do not have a phy-handle property and not probe those, but this is not quite ok since it is valid to not have a phy-handle property if there is a fixed-link. It seems that checking for a phy-handle was a proxy for the proper check which is for the status, but that doesn't make a lot of sense, since the ocelot driver already iterates using for_each_available_child_of_node which skips the disabled ports, so I have no idea. Anyway, a widespread pattern in device trees is for a SoC dtsi to disable by default all hardware, and let board dts files enable what is used. So let's do that and enable only the ports with a phy-handle in the pcb120 and pcb123 device tree files. Signed-off-by: Vladimir Oltean Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/mscc/ocelot.dtsi | 11 +++++++++++ arch/mips/boot/dts/mscc/ocelot_pcb120.dts | 8 ++++++++ arch/mips/boot/dts/mscc/ocelot_pcb123.dts | 4 ++++ 3 files changed, 23 insertions(+) (limited to 'arch') diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi index 535a98284dcb..e51db651af13 100644 --- a/arch/mips/boot/dts/mscc/ocelot.dtsi +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -150,36 +150,47 @@ port0: port@0 { reg = <0>; + status = "disabled"; }; port1: port@1 { reg = <1>; + status = "disabled"; }; port2: port@2 { reg = <2>; + status = "disabled"; }; port3: port@3 { reg = <3>; + status = "disabled"; }; port4: port@4 { reg = <4>; + status = "disabled"; }; port5: port@5 { reg = <5>; + status = "disabled"; }; port6: port@6 { reg = <6>; + status = "disabled"; }; port7: port@7 { reg = <7>; + status = "disabled"; }; port8: port@8 { reg = <8>; + status = "disabled"; }; port9: port@9 { reg = <9>; + status = "disabled"; }; port10: port@10 { reg = <10>; + status = "disabled"; }; }; }; diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts index 897de5025d7f..d2dc6b3d923c 100644 --- a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts +++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts @@ -69,40 +69,48 @@ }; &port0 { + status = "okay"; phy-handle = <&phy0>; }; &port1 { + status = "okay"; phy-handle = <&phy1>; }; &port2 { + status = "okay"; phy-handle = <&phy2>; }; &port3 { + status = "okay"; phy-handle = <&phy3>; }; &port4 { + status = "okay"; phy-handle = <&phy7>; phy-mode = "sgmii"; phys = <&serdes 4 SERDES1G(2)>; }; &port5 { + status = "okay"; phy-handle = <&phy4>; phy-mode = "sgmii"; phys = <&serdes 5 SERDES1G(5)>; }; &port6 { + status = "okay"; phy-handle = <&phy6>; phy-mode = "sgmii"; phys = <&serdes 6 SERDES1G(3)>; }; &port9 { + status = "okay"; phy-handle = <&phy5>; phy-mode = "sgmii"; phys = <&serdes 9 SERDES1G(4)>; diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts index ef852f382da8..7d7e638791dd 100644 --- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts +++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts @@ -47,17 +47,21 @@ }; &port0 { + status = "okay"; phy-handle = <&phy0>; }; &port1 { + status = "okay"; phy-handle = <&phy1>; }; &port2 { + status = "okay"; phy-handle = <&phy2>; }; &port3 { + status = "okay"; phy-handle = <&phy3>; }; -- cgit From eba54cbb92d28b4f6dc1ed5f73f5187b09d82c08 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Aug 2021 20:04:16 +0300 Subject: MIPS: mscc: ocelot: mark the phy-mode for internal PHY ports The ocelot driver was converted to phylink, and that expects a valid phy_interface_t. Without a phy-mode, of_get_phy_mode returns PHY_INTERFACE_MODE_NA, which is not ideal because phylink rejects that. The ocelot driver was patched to treat PHY_INTERFACE_MODE_NA as PHY_INTERFACE_MODE_INTERNAL to work with the broken DT blobs, but we should fix the device trees and specify the phy-mode too. Signed-off-by: Vladimir Oltean Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/dts/mscc/ocelot_pcb120.dts | 4 ++++ arch/mips/boot/dts/mscc/ocelot_pcb123.dts | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts index d2dc6b3d923c..bd240690cb37 100644 --- a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts +++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts @@ -71,21 +71,25 @@ &port0 { status = "okay"; phy-handle = <&phy0>; + phy-mode = "internal"; }; &port1 { status = "okay"; phy-handle = <&phy1>; + phy-mode = "internal"; }; &port2 { status = "okay"; phy-handle = <&phy2>; + phy-mode = "internal"; }; &port3 { status = "okay"; phy-handle = <&phy3>; + phy-mode = "internal"; }; &port4 { diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts index 7d7e638791dd..0185045c7630 100644 --- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts +++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts @@ -49,19 +49,23 @@ &port0 { status = "okay"; phy-handle = <&phy0>; + phy-mode = "internal"; }; &port1 { status = "okay"; phy-handle = <&phy1>; + phy-mode = "internal"; }; &port2 { status = "okay"; phy-handle = <&phy2>; + phy-mode = "internal"; }; &port3 { status = "okay"; phy-handle = <&phy3>; + phy-mode = "internal"; }; -- cgit From d00aa8061e04da5c570a54283462e47fab01bd3c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 20 Jul 2021 08:41:19 -0600 Subject: ARM: dts: omap: Drop references to opp.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit opp.txt is getting removed with the OPP binding converted to DT schema. As it is unusual to reference a binding doc from a dts file, let's just remove the reference. Cc: "Benoît Cousson" Cc: Tony Lindgren Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- arch/arm/boot/dts/omap34xx.dtsi | 1 - arch/arm/boot/dts/omap36xx.dtsi | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi index feaa43b78535..8b8451399784 100644 --- a/arch/arm/boot/dts/omap34xx.dtsi +++ b/arch/arm/boot/dts/omap34xx.dtsi @@ -24,7 +24,6 @@ }; }; - /* see Documentation/devicetree/bindings/opp/opp.txt */ cpu0_opp_table: opp-table { compatible = "operating-points-v2-ti-cpu"; syscon = <&scm_conf>; diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi index 20844dbc002e..22b33098b1a2 100644 --- a/arch/arm/boot/dts/omap36xx.dtsi +++ b/arch/arm/boot/dts/omap36xx.dtsi @@ -29,7 +29,6 @@ }; }; - /* see Documentation/devicetree/bindings/opp/opp.txt */ cpu0_opp_table: opp-table { compatible = "operating-points-v2-ti-cpu"; syscon = <&scm_conf>; -- cgit From a00ea5b6f2bbef8b004b0b7228c61680a50c7c3f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 Aug 2021 06:45:20 +0000 Subject: powerpc/syscalls: Remove __NR__exit __NR__exit is nowhere used. On most architectures it was removed by commit 135ab6ec8fda ("[PATCH] remove remaining errno and __KERNEL_SYSCALLS__ references") but not on powerpc. powerpc removed __KERNEL_SYSCALLS__ in commit 3db03b4afb3e ("[PATCH] rename the provided execve functions to kernel_execve"), but __NR__exit was left over. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/6457eb4f327313323ed1f70e540bbb4ddc9178fa.1629701106.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/unistd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index b541c690a31c..5eb462af6766 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -9,8 +9,6 @@ #define NR_syscalls __NR_syscalls -#define __NR__exit __NR_exit - #ifndef __ASSEMBLY__ #include -- cgit From 3accc0faef081b6813967b34f7d05a3edb855cbd Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 23 Aug 2021 11:00:38 +0200 Subject: powerpc/prom: Fix unused variable ‘reserve_map’ when CONFIG_PPC32 is not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. arch/powerpc/kernel/prom.c: In function ‘early_reserve_mem’: arch/powerpc/kernel/prom.c:625:10: error: variable ‘reserve_map’ set but not used [-Werror=unused-but-set-variable] __be64 *reserve_map; ^~~~~~~~~~~ cc1: all warnings being treated as errors Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210823090039.166120-2-clg@kaod.org --- arch/powerpc/kernel/prom.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f620e04dc9bf..44b2cdc0aae3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -640,7 +640,9 @@ static void __init early_reserve_mem(void) } #endif /* CONFIG_BLK_DEV_INITRD */ -#ifdef CONFIG_PPC32 + if (!IS_ENABLED(CONFIG_PPC32)) + return; + /* * Handle the case where we might be booting from an old kexec * image that setup the mem_rsvmap as pairs of 32-bit values @@ -661,7 +663,6 @@ static void __init early_reserve_mem(void) } return; } -#endif } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -- cgit From cc47ad409ba9cc950e9c492c8ba653dabd392148 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 23 Aug 2021 11:00:39 +0200 Subject: powerpc/compat_sys: Declare syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a compile error with W=1. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210823090039.166120-3-clg@kaod.org --- arch/powerpc/include/asm/syscalls.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index 398171fdcd9f..7ee66ae5444d 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -6,6 +6,7 @@ #include #include #include +#include struct rtas_args; @@ -18,5 +19,34 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len, asmlinkage long ppc64_personality(unsigned long personality); asmlinkage long sys_rtas(struct rtas_args __user *uargs); +#ifdef CONFIG_COMPAT +unsigned long compat_sys_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff); + +compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, + u32 reg6, u32 pos1, u32 pos2); + +compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, + u32 reg6, u32 pos1, u32 pos2); + +compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count); + +int compat_sys_truncate64(const char __user *path, u32 reg4, + unsigned long len1, unsigned long len2); + +long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2); + +int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, + unsigned long len2); + +long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, + size_t len, int advice); + +long compat_sys_sync_file_range2(int fd, unsigned int flags, + unsigned int offset1, unsigned int offset2, + unsigned int nbytes1, unsigned int nbytes2); +#endif + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_SYSCALLS_H */ -- cgit From b424d4d4263200459615c87ad8dddaf4bb571a9d Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:52 +0800 Subject: arm64: PCI: Restructure pcibios_root_bridge_prepare() Restructure the pcibios_root_bridge_prepare() as the preparation for supporting cases when no real ACPI device is related to the PCI host bridge. No functional change. Link: https://lore.kernel.org/r/20210726180657.142727-4-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi Acked-by: Catalin Marinas --- arch/arm64/kernel/pci.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 1006ed2d7c60..5148ae242780 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -82,14 +82,19 @@ int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { - if (!acpi_disabled) { - struct pci_config_window *cfg = bridge->bus->sysdata; - struct acpi_device *adev = to_acpi_device(cfg->parent); - struct device *bus_dev = &bridge->bus->dev; + struct pci_config_window *cfg; + struct acpi_device *adev; + struct device *bus_dev; - ACPI_COMPANION_SET(&bridge->dev, adev); - set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); - } + if (acpi_disabled) + return 0; + + cfg = bridge->bus->sysdata; + adev = to_acpi_device(cfg->parent); + bus_dev = &bridge->bus->dev; + + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); return 0; } -- cgit From 7d40c0f70d92291605c4498b8ee4b3a3c3ba07b1 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 27 Jul 2021 02:06:53 +0800 Subject: arm64: PCI: Support root bridge preparation for Hyper-V Currently at root bridge preparation, the corresponding ACPI device will be set as the companion, however for a Hyper-V virtual PCI root bridge, there is no corresponding ACPI device, because a Hyper-V virtual PCI root bridge is discovered via VMBus rather than ACPI table. In order to support this, we need to make pcibios_root_bridge_prepare() work with cfg->parent being NULL. Use a NULL pointer as the ACPI device if there is no corresponding ACPI device, and this is fine because: 1) ACPI_COMPANION_SET() can work with the second parameter being NULL, 2) semantically, if a NULL pointer is set via ACPI_COMPANION_SET(), ACPI_COMPANION() (the read API for this field) will return NULL, and since ACPI_COMPANION() may return NULL, so users must have handled the cases where it returns NULL, and 3) since there is no corresponding ACPI device, it would be wrong to use any other value here. Link: https://lore.kernel.org/r/20210726180657.142727-5-boqun.feng@gmail.com Signed-off-by: Boqun Feng Signed-off-by: Lorenzo Pieralisi Acked-by: Catalin Marinas --- arch/arm64/kernel/pci.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 5148ae242780..2276689b5411 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -90,7 +90,17 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) return 0; cfg = bridge->bus->sysdata; - adev = to_acpi_device(cfg->parent); + + /* + * On Hyper-V there is no corresponding ACPI device for a root bridge, + * therefore ->parent is set as NULL by the driver. And set 'adev' as + * NULL in this case because there is no proper ACPI device. + */ + if (!cfg->parent) + adev = NULL; + else + adev = to_acpi_device(cfg->parent); + bus_dev = &bridge->bus->dev; ACPI_COMPANION_SET(&bridge->dev, adev); -- cgit From 5d7d6dac8fe99ed59eee2300e4a03370f94d5222 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 5 Aug 2021 18:26:14 -0300 Subject: KVM: PPC: Book3S HV: Fix copy_tofrom_guest routines The __kvmhv_copy_tofrom_guest_radix function was introduced along with nested HV guest support. It uses the platform's Radix MMU quadrants to provide a nested hypervisor with fast access to its nested guests memory (H_COPY_TOFROM_GUEST hypercall). It has also since been added as a fast path for the kvmppc_ld/st routines which are used during instruction emulation. The commit def0bfdbd603 ("powerpc: use probe_user_read() and probe_user_write()") changed the low level copy function from raw_copy_from_user to probe_user_read, which adds a check to access_ok. In powerpc that is: static inline bool __access_ok(unsigned long addr, unsigned long size) { return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr; } and TASK_SIZE_MAX is 0x0010000000000000UL for 64-bit, which means that setting the two MSBs of the effective address (which correspond to the quadrant) now cause access_ok to reject the access. This was not caught earlier because the most common code path via kvmppc_ld/st contains a fallback (kvm_read_guest) that is likely to succeed for L1 guests. For nested guests there is no fallback. Another issue is that probe_user_read (now __copy_from_user_nofault) does not return the number of bytes not copied in case of failure, so the destination memory is not being cleared anymore in kvmhv_copy_from_guest_radix: ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n); if (ret > 0) <-- always false! memset(to + (n - ret), 0, ret); This patch fixes both issues by skipping access_ok and open-coding the low level __copy_to/from_user_inatomic. Fixes: def0bfdbd603 ("powerpc: use probe_user_read() and probe_user_write()") Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210805212616.2641017-2-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index b5905ae4377c..44eb7b1ef289 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -65,10 +65,12 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, } isync(); + pagefault_disable(); if (is_load) - ret = copy_from_user_nofault(to, (const void __user *)from, n); + ret = __copy_from_user_inatomic(to, (const void __user *)from, n); else - ret = copy_to_user_nofault((void __user *)to, from, n); + ret = __copy_to_user_inatomic((void __user *)to, from, n); + pagefault_enable(); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) -- cgit From c232461c0c3b0aca637f0d7658a7f5d0bb393a4e Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 5 Aug 2021 18:26:15 -0300 Subject: KVM: PPC: Book3S HV: Add sanity check to copy_tofrom_guest Both paths into __kvmhv_copy_tofrom_guest_radix ensure that we arrive with an effective address that is smaller than our total addressable space and addresses quadrant 0. - The H_COPY_TOFROM_GUEST hypercall path rejects the call with H_PARAMETER if the effective address has any of the twelve most significant bits set. - The kvmhv_copy_tofrom_guest_radix path clears the top twelve bits before calling the internal function. Although the callers make sure that the effective address is sane, any future use of the function is exposed to a programming error, so add a sanity check. Suggested-by: Nicholas Piggin Signed-off-by: Fabiano Rosas Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210805212616.2641017-3-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 44eb7b1ef289..1b1c9e9e539b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -44,6 +44,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, (to != NULL) ? __pa(to): 0, (from != NULL) ? __pa(from): 0, n); + if (eaddr & (0xFFFUL << 52)) + return ret; + quadrant = 1; if (!pid) quadrant = 2; -- cgit From 0eb596f1e6103ebe122792a425b88c5dc21c4087 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 5 Aug 2021 18:26:16 -0300 Subject: KVM: PPC: Book3S HV: Stop exporting symbols from book3s_64_mmu_radix The book3s_64_mmu_radix.o object is not part of the KVM builtins and all the callers of the exported symbols are in the same kvm-hv.ko module so we should not need to export any symbols. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210805212616.2641017-4-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 1b1c9e9e539b..16359525a40f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -86,7 +86,6 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, return ret; } -EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix); static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, void *from, unsigned long n) @@ -122,14 +121,12 @@ long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, return ret; } -EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix); long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from, unsigned long n) { return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n); } -EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix); int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, -- cgit From 1e753732bda6dcf888ea0b90b2a91ac1c1a0bae9 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 23 Aug 2021 17:20:46 -0400 Subject: s390/vfio-ap: r/w lock for PQAP interception handler function pointer The function pointer to the interception handler for the PQAP instruction can get changed during the interception process. Let's add a semaphore to struct kvm_s390_crypto to control read/write access to the function pointer contained therein. The semaphore must be locked for write access by the vfio_ap device driver when notified that the KVM pointer has been set or cleared. It must be locked for read access by the interception framework when the PQAP instruction is intercepted. Signed-off-by: Tony Krowiak Reviewed-by: Jason Gunthorpe Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210823212047.1476436-2-akrowiak@linux.ibm.com Signed-off-by: Alex Williamson --- arch/s390/include/asm/kvm_host.h | 8 +++----- arch/s390/kvm/kvm-s390.c | 1 + arch/s390/kvm/priv.c | 15 +++++++++------ 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 161a9e12bfb8..d681ae462350 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -798,14 +798,12 @@ struct kvm_s390_cpu_model { unsigned short ibc; }; -struct kvm_s390_module_hook { - int (*hook)(struct kvm_vcpu *vcpu); - struct module *owner; -}; +typedef int (*crypto_hook)(struct kvm_vcpu *vcpu); struct kvm_s390_crypto { struct kvm_s390_crypto_cb *crycb; - struct kvm_s390_module_hook *pqap_hook; + struct rw_semaphore pqap_hook_rwsem; + crypto_hook *pqap_hook; __u32 crycbd; __u8 aes_kw; __u8 dea_kw; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4527ac7b5961..5c4f559bcd60 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2630,6 +2630,7 @@ static void kvm_s390_crypto_init(struct kvm *kvm) { kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb; kvm_s390_set_crycb_format(kvm); + init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem); if (!test_kvm_facility(kvm, 76)) return; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 9928f785c677..53da4ceb16a3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -610,6 +610,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) static int handle_pqap(struct kvm_vcpu *vcpu) { struct ap_queue_status status = {}; + crypto_hook pqap_hook; unsigned long reg0; int ret; uint8_t fc; @@ -654,18 +655,20 @@ static int handle_pqap(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* - * Verify that the hook callback is registered, lock the owner - * and call the hook. + * If the hook callback is registered, there will be a pointer to the + * hook function pointer in the kvm_s390_crypto structure. Lock the + * owner, retrieve the hook function pointer and call the hook. */ + down_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); if (vcpu->kvm->arch.crypto.pqap_hook) { - if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner)) - return -EOPNOTSUPP; - ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu); - module_put(vcpu->kvm->arch.crypto.pqap_hook->owner); + pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook; + ret = pqap_hook(vcpu); if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000) kvm_s390_set_psw_cc(vcpu, 3); + up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); return ret; } + up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); /* * A vfio_driver must register a hook. * No hook means no driver to enable the SIE CRYCB and no queues. -- cgit From 86956e70761b3292156d668e87126844334dd71b Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 23 Aug 2021 17:20:47 -0400 Subject: s390/vfio-ap: replace open coded locks for VFIO_GROUP_NOTIFY_SET_KVM notification It was pointed out during an unrelated patch review that locks should not be open coded - i.e., writing the algorithm of a standard lock in a function instead of using a lock from the standard library. The setting and testing of a busy flag and sleeping on a wait_event is the same thing a lock does. The open coded locks are invisible to lockdep, so potential locking problems are not detected. This patch removes the open coded locks used during VFIO_GROUP_NOTIFY_SET_KVM notification. The busy flag and wait queue were introduced to resolve a possible circular locking dependency reported by lockdep when starting a secure execution guest configured with AP adapters and domains. Reversing the order in which the kvm->lock mutex and matrix_dev->lock mutex are locked resolves the issue reported by lockdep, thus enabling the removal of the open coded locks. Signed-off-by: Tony Krowiak Acked-by: Halil Pasic Link: https://lore.kernel.org/r/20210823212047.1476436-3-akrowiak@linux.ibm.com Signed-off-by: Alex Williamson --- arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5c4f559bcd60..efda0615741f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2559,12 +2559,26 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm) kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; } +/* + * kvm_arch_crypto_set_masks + * + * @kvm: pointer to the target guest's KVM struct containing the crypto masks + * to be set. + * @apm: the mask identifying the accessible AP adapters + * @aqm: the mask identifying the accessible AP domains + * @adm: the mask identifying the accessible AP control domains + * + * Set the masks that identify the adapters, domains and control domains to + * which the KVM guest is granted access. + * + * Note: The kvm->lock mutex must be locked by the caller before invoking this + * function. + */ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm) { struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb; - mutex_lock(&kvm->lock); kvm_s390_vcpu_block_all(kvm); switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) { @@ -2595,13 +2609,23 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, /* recreate the shadow crycb for each vcpu */ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); kvm_s390_vcpu_unblock_all(kvm); - mutex_unlock(&kvm->lock); } EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks); +/* + * kvm_arch_crypto_clear_masks + * + * @kvm: pointer to the target guest's KVM struct containing the crypto masks + * to be cleared. + * + * Clear the masks that identify the adapters, domains and control domains to + * which the KVM guest is granted access. + * + * Note: The kvm->lock mutex must be locked by the caller before invoking this + * function. + */ void kvm_arch_crypto_clear_masks(struct kvm *kvm) { - mutex_lock(&kvm->lock); kvm_s390_vcpu_block_all(kvm); memset(&kvm->arch.crypto.crycb->apcb0, 0, @@ -2613,7 +2637,6 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm) /* recreate the shadow crycb for each vcpu */ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART); kvm_s390_vcpu_unblock_all(kvm); - mutex_unlock(&kvm->lock); } EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks); -- cgit From 82a423053eb3cf27209c78ce1852ea18d173722a Mon Sep 17 00:00:00 2001 From: Changcheng Deng Date: Sat, 14 Aug 2021 00:30:38 +0800 Subject: arch/arc/kernel/: fix misspellings using codespell tool Some typos are found out by codespell tool: ./intc-compact.c:145: prioity ==> priority ./smp.c:286: recevier ==> receiver ./stacktrace.c:152 prelogue ==> prologue Fix typos found by codespell. Reported-by: Zeal Robot Signed-off-by: Changcheng Deng Signed-off-by: Yi Wang Signed-off-by: Vineet Gupta --- arch/arc/kernel/intc-compact.c | 2 +- arch/arc/kernel/smp.c | 2 +- arch/arc/kernel/stacktrace.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index a86641b91e65..6885e422870e 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. * Here local_irq_enable( ) shd not re-enable lower priority interrupts * -If called from soft-ISR, it must re-enable all interrupts - * soft ISR are low prioity jobs which can be very slow, thus all IRQs + * soft ISR are low priority jobs which can be very slow, thus all IRQs * must be enabled while they run. * Now hardware context wise we may still be in L2 ISR (not done rtie) * still we must re-enable both L1 and L2 IRQs diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index db0e104d6835..b5b5fe2c6e1e 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -283,7 +283,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) /* * Call the platform specific IPI kick function, but avoid if possible: * Only do so if there's no pending msg from other concurrent sender(s). - * Otherwise, recevier will see this msg as well when it takes the + * Otherwise, receiver will see this msg as well when it takes the * IPI corresponding to that msg. This is true, even if it is already in * IPI handler, because !@old means it has not yet dequeued the msg(s) * so @new msg can be a free-loader diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 1b9576d21e24..c376ff3147e7 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, #else /* On ARC, only Dward based unwinder works. fp based backtracing is * not possible (-fno-omit-frame-pointer) because of the way function - * prelogue is setup (callee regs saved and then fp set and not other + * prologue is setup (callee regs saved and then fp set and not other * way around */ pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); -- cgit From 6b5ff0405e4190f23780362ea324b250bc495683 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Aug 2021 14:05:33 -0700 Subject: ARC: export clear_user_page() for modules 0day bot reports a build error: ERROR: modpost: "clear_user_page" [drivers/media/v4l2-core/videobuf-dma-sg.ko] undefined! so export it in arch/arc/ to fix the build error. In most ARCHes, clear_user_page() is a macro. OTOH, in a few ARCHes it is a function and needs to be exported. PowerPC exported it in 2004. It looks like nds32 and nios2 still need to have it exported. Fixes: 4102b53392d63 ("ARC: [mm] Aliasing VIPT dcache support 2/4") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Guenter Roeck Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index a2fbea3ee07c..102418ac5ff4 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1123,7 +1123,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) clear_page(to); clear_bit(PG_dc_clean, &page->flags); } - +EXPORT_SYMBOL(clear_user_page); /********************************************************************** * Explicit Cache flush request from user space via syscall -- cgit From b0f839b4b91588c24bac8d320f7fc9fe2dea4517 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 3 Aug 2021 20:26:09 -0700 Subject: ARC: atomics: disintegrate header Non functional change, to ease future addition/removal Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic-llsc.h | 103 ++++++++ arch/arc/include/asm/atomic-spinlock.h | 111 +++++++++ arch/arc/include/asm/atomic.h | 429 +-------------------------------- arch/arc/include/asm/atomic64-arcv2.h | 242 +++++++++++++++++++ 4 files changed, 461 insertions(+), 424 deletions(-) create mode 100644 arch/arc/include/asm/atomic-llsc.h create mode 100644 arch/arc/include/asm/atomic-spinlock.h create mode 100644 arch/arc/include/asm/atomic64-arcv2.h (limited to 'arch') diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h new file mode 100644 index 000000000000..aab4f2855457 --- /dev/null +++ b/arch/arc/include/asm/atomic-llsc.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_ARC_ATOMIC_LLSC_H +#define _ASM_ARC_ATOMIC_LLSC_H + +#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) + +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void arch_atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned int val; \ + \ + __asm__ __volatile__( \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " bnz 1b \n" \ + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ + [i] "ir" (i) \ + : "cc"); \ +} \ + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned int val; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND themselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[val], [%[ctr]] \n" \ + " " #asm_op " %[val], %[val], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " bnz 1b \n" \ + : [val] "=&r" (val) \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return val; \ +} + +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned int val, orig; \ + \ + /* \ + * Explicit full memory barrier needed before/after as \ + * LLOCK/SCOND themselves don't provide any such semantics \ + */ \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: llock %[orig], [%[ctr]] \n" \ + " " #asm_op " %[val], %[orig], %[i] \n" \ + " scond %[val], [%[ctr]] \n" \ + " bnz 1b \n" \ + : [val] "=&r" (val), \ + [orig] "=&r" (orig) \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ + : "cc"); \ + \ + smp_mb(); \ + \ + return orig; \ +} + +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(add, +=, add) +ATOMIC_OPS(sub, -=, sub) + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) + +#define arch_atomic_andnot arch_atomic_andnot +#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#endif diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h new file mode 100644 index 000000000000..bdf87610b2d7 --- /dev/null +++ b/arch/arc/include/asm/atomic-spinlock.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _ASM_ARC_ATOMIC_SPLOCK_H +#define _ASM_ARC_ATOMIC_SPLOCK_H + +#ifndef CONFIG_SMP + + /* violating atomic_xxx API locking protocol in UP for optimization sake */ +#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) + +#else + +static inline void arch_atomic_set(atomic_t *v, int i) +{ + /* + * Independent of hardware support, all of the atomic_xxx() APIs need + * to follow the same locking rules to make sure that a "hardware" + * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn + * sequence + * + * Thus atomic_set() despite being 1 insn (and seemingly atomic) + * requires the locking. + */ + unsigned long flags; + + atomic_ops_lock(flags); + WRITE_ONCE(v->counter, i); + atomic_ops_unlock(flags); +} + +#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) + +#endif + +/* + * Non hardware assisted Atomic-R-M-W + * Locking would change to irq-disabling only (UP) and spinlocks (SMP) + */ + +#define ATOMIC_OP(op, c_op, asm_op) \ +static inline void arch_atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + \ + atomic_ops_lock(flags); \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ +} + +#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ +static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long temp; \ + \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(flags); \ + temp = v->counter; \ + temp c_op i; \ + v->counter = temp; \ + atomic_ops_unlock(flags); \ + \ + return temp; \ +} + +#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ +static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long flags; \ + unsigned long orig; \ + \ + /* \ + * spin lock/unlock provides the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(flags); \ + orig = v->counter; \ + v->counter c_op i; \ + atomic_ops_unlock(flags); \ + \ + return orig; \ +} + +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_OP_RETURN(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(add, +=, add) +ATOMIC_OPS(sub, -=, sub) + +#undef ATOMIC_OPS +#define ATOMIC_OPS(op, c_op, asm_op) \ + ATOMIC_OP(op, c_op, asm_op) \ + ATOMIC_FETCH_OP(op, c_op, asm_op) + +ATOMIC_OPS(and, &=, and) +ATOMIC_OPS(andnot, &= ~, bic) +ATOMIC_OPS(or, |=, or) +ATOMIC_OPS(xor, ^=, xor) + +#define arch_atomic_andnot arch_atomic_andnot +#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#endif diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 7a36d79b5b2f..ee88e1dbaab5 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -17,435 +17,16 @@ #define arch_atomic_read(v) READ_ONCE((v)->counter) #ifdef CONFIG_ARC_HAS_LLSC - -#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) - -#define ATOMIC_OP(op, c_op, asm_op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ -{ \ - unsigned int val; \ - \ - __asm__ __volatile__( \ - "1: llock %[val], [%[ctr]] \n" \ - " " #asm_op " %[val], %[val], %[i] \n" \ - " scond %[val], [%[ctr]] \n" \ - " bnz 1b \n" \ - : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ - : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ - [i] "ir" (i) \ - : "cc"); \ -} \ - -#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ -{ \ - unsigned int val; \ - \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ - __asm__ __volatile__( \ - "1: llock %[val], [%[ctr]] \n" \ - " " #asm_op " %[val], %[val], %[i] \n" \ - " scond %[val], [%[ctr]] \n" \ - " bnz 1b \n" \ - : [val] "=&r" (val) \ - : [ctr] "r" (&v->counter), \ - [i] "ir" (i) \ - : "cc"); \ - \ - smp_mb(); \ - \ - return val; \ -} - -#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ -{ \ - unsigned int val, orig; \ - \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ - __asm__ __volatile__( \ - "1: llock %[orig], [%[ctr]] \n" \ - " " #asm_op " %[val], %[orig], %[i] \n" \ - " scond %[val], [%[ctr]] \n" \ - " bnz 1b \n" \ - : [val] "=&r" (val), \ - [orig] "=&r" (orig) \ - : [ctr] "r" (&v->counter), \ - [i] "ir" (i) \ - : "cc"); \ - \ - smp_mb(); \ - \ - return orig; \ -} - -#else /* !CONFIG_ARC_HAS_LLSC */ - -#ifndef CONFIG_SMP - - /* violating atomic_xxx API locking protocol in UP for optimization sake */ -#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) - +#include #else - -static inline void arch_atomic_set(atomic_t *v, int i) -{ - /* - * Independent of hardware support, all of the atomic_xxx() APIs need - * to follow the same locking rules to make sure that a "hardware" - * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn - * sequence - * - * Thus atomic_set() despite being 1 insn (and seemingly atomic) - * requires the locking. - */ - unsigned long flags; - - atomic_ops_lock(flags); - WRITE_ONCE(v->counter, i); - atomic_ops_unlock(flags); -} - -#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) - +#include #endif -/* - * Non hardware assisted Atomic-R-M-W - * Locking would change to irq-disabling only (UP) and spinlocks (SMP) - */ - -#define ATOMIC_OP(op, c_op, asm_op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ -{ \ - unsigned long flags; \ - \ - atomic_ops_lock(flags); \ - v->counter c_op i; \ - atomic_ops_unlock(flags); \ -} - -#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ -{ \ - unsigned long flags; \ - unsigned long temp; \ - \ - /* \ - * spin lock/unlock provides the needed smp_mb() before/after \ - */ \ - atomic_ops_lock(flags); \ - temp = v->counter; \ - temp c_op i; \ - v->counter = temp; \ - atomic_ops_unlock(flags); \ - \ - return temp; \ -} - -#define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ -{ \ - unsigned long flags; \ - unsigned long orig; \ - \ - /* \ - * spin lock/unlock provides the needed smp_mb() before/after \ - */ \ - atomic_ops_lock(flags); \ - orig = v->counter; \ - v->counter c_op i; \ - atomic_ops_unlock(flags); \ - \ - return orig; \ -} - -#endif /* !CONFIG_ARC_HAS_LLSC */ - -#define ATOMIC_OPS(op, c_op, asm_op) \ - ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_OP_RETURN(op, c_op, asm_op) \ - ATOMIC_FETCH_OP(op, c_op, asm_op) - -ATOMIC_OPS(add, +=, add) -ATOMIC_OPS(sub, -=, sub) - -#undef ATOMIC_OPS -#define ATOMIC_OPS(op, c_op, asm_op) \ - ATOMIC_OP(op, c_op, asm_op) \ - ATOMIC_FETCH_OP(op, c_op, asm_op) - -ATOMIC_OPS(and, &=, and) -ATOMIC_OPS(andnot, &= ~, bic) -ATOMIC_OPS(or, |=, or) -ATOMIC_OPS(xor, ^=, xor) - -#define arch_atomic_andnot arch_atomic_andnot -#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot - -#undef ATOMIC_OPS -#undef ATOMIC_FETCH_OP -#undef ATOMIC_OP_RETURN -#undef ATOMIC_OP - #ifdef CONFIG_GENERIC_ATOMIC64 - #include - -#else /* Kconfig ensures this is only enabled with needed h/w assist */ - -/* - * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD) - * - The address HAS to be 64-bit aligned - * - There are 2 semantics involved here: - * = exclusive implies no interim update between load/store to same addr - * = both words are observed/updated together: this is guaranteed even - * for regular 64-bit load (LDD) / store (STD). Thus atomic64_set() - * is NOT required to use LLOCKD+SCONDD, STD suffices - */ - -typedef struct { - s64 __aligned(8) counter; -} atomic64_t; - -#define ATOMIC64_INIT(a) { (a) } - -static inline s64 arch_atomic64_read(const atomic64_t *v) -{ - s64 val; - - __asm__ __volatile__( - " ldd %0, [%1] \n" - : "=r"(val) - : "r"(&v->counter)); - - return val; -} - -static inline void arch_atomic64_set(atomic64_t *v, s64 a) -{ - /* - * This could have been a simple assignment in "C" but would need - * explicit volatile. Otherwise gcc optimizers could elide the store - * which borked atomic64 self-test - * In the inline asm version, memory clobber needed for exact same - * reason, to tell gcc about the store. - * - * This however is not needed for sibling atomic64_add() etc since both - * load/store are explicitly done in inline asm. As long as API is used - * for each access, gcc has no way to optimize away any load/store - */ - __asm__ __volatile__( - " std %0, [%1] \n" - : - : "r"(a), "r"(&v->counter) - : "memory"); -} - -#define ATOMIC64_OP(op, op1, op2) \ -static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ -{ \ - s64 val; \ - \ - __asm__ __volatile__( \ - "1: \n" \ - " llockd %0, [%1] \n" \ - " " #op1 " %L0, %L0, %L2 \n" \ - " " #op2 " %H0, %H0, %H2 \n" \ - " scondd %0, [%1] \n" \ - " bnz 1b \n" \ - : "=&r"(val) \ - : "r"(&v->counter), "ir"(a) \ - : "cc"); \ -} \ - -#define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ -{ \ - s64 val; \ - \ - smp_mb(); \ - \ - __asm__ __volatile__( \ - "1: \n" \ - " llockd %0, [%1] \n" \ - " " #op1 " %L0, %L0, %L2 \n" \ - " " #op2 " %H0, %H0, %H2 \n" \ - " scondd %0, [%1] \n" \ - " bnz 1b \n" \ - : [val] "=&r"(val) \ - : "r"(&v->counter), "ir"(a) \ - : "cc"); /* memory clobber comes from smp_mb() */ \ - \ - smp_mb(); \ - \ - return val; \ -} - -#define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ -{ \ - s64 val, orig; \ - \ - smp_mb(); \ - \ - __asm__ __volatile__( \ - "1: \n" \ - " llockd %0, [%2] \n" \ - " " #op1 " %L1, %L0, %L3 \n" \ - " " #op2 " %H1, %H0, %H3 \n" \ - " scondd %1, [%2] \n" \ - " bnz 1b \n" \ - : "=&r"(orig), "=&r"(val) \ - : "r"(&v->counter), "ir"(a) \ - : "cc"); /* memory clobber comes from smp_mb() */ \ - \ - smp_mb(); \ - \ - return orig; \ -} - -#define ATOMIC64_OPS(op, op1, op2) \ - ATOMIC64_OP(op, op1, op2) \ - ATOMIC64_OP_RETURN(op, op1, op2) \ - ATOMIC64_FETCH_OP(op, op1, op2) - -ATOMIC64_OPS(add, add.f, adc) -ATOMIC64_OPS(sub, sub.f, sbc) -ATOMIC64_OPS(and, and, and) -ATOMIC64_OPS(andnot, bic, bic) -ATOMIC64_OPS(or, or, or) -ATOMIC64_OPS(xor, xor, xor) - -#define arch_atomic64_andnot arch_atomic64_andnot -#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot - -#undef ATOMIC64_OPS -#undef ATOMIC64_FETCH_OP -#undef ATOMIC64_OP_RETURN -#undef ATOMIC64_OP - -static inline s64 -arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) -{ - s64 prev; - - smp_mb(); - - __asm__ __volatile__( - "1: llockd %0, [%1] \n" - " brne %L0, %L2, 2f \n" - " brne %H0, %H2, 2f \n" - " scondd %3, [%1] \n" - " bnz 1b \n" - "2: \n" - : "=&r"(prev) - : "r"(ptr), "ir"(expected), "r"(new) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); - - return prev; -} - -static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) -{ - s64 prev; - - smp_mb(); - - __asm__ __volatile__( - "1: llockd %0, [%1] \n" - " scondd %2, [%1] \n" - " bnz 1b \n" - "2: \n" - : "=&r"(prev) - : "r"(ptr), "r"(new) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); - - return prev; -} - -/** - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic64_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ - -static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) -{ - s64 val; - - smp_mb(); - - __asm__ __volatile__( - "1: llockd %0, [%1] \n" - " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n" - " sub.c %H0, %H0, 1 # if C set, w1 - 1\n" - " brlt %H0, 0, 2f \n" - " scondd %0, [%1] \n" - " bnz 1b \n" - "2: \n" - : "=&r"(val) - : "r"(&v->counter) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); - - return val; -} -#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive - -/** - * arch_atomic64_fetch_add_unless - add unless the number is a given value - * @v: pointer of type atomic64_t - * @a: the amount to add to v... - * @u: ...unless v is equal to u. - * - * Atomically adds @a to @v, if it was not @u. - * Returns the old value of @v - */ -static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) -{ - s64 old, temp; - - smp_mb(); - - __asm__ __volatile__( - "1: llockd %0, [%2] \n" - " brne %L0, %L4, 2f # continue to add since v != u \n" - " breq.d %H0, %H4, 3f # return since v == u \n" - "2: \n" - " add.f %L1, %L0, %L3 \n" - " adc %H1, %H0, %H3 \n" - " scondd %1, [%2] \n" - " bnz 1b \n" - "3: \n" - : "=&r"(old), "=&r" (temp) - : "r"(&v->counter), "r"(a), "r"(u) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); - - return old; -} -#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless - -#endif /* !CONFIG_GENERIC_ATOMIC64 */ +#else +#include +#endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h new file mode 100644 index 000000000000..53996b11b551 --- /dev/null +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD) + * - The address HAS to be 64-bit aligned + */ + +#ifndef _ASM_ARC_ATOMIC64_ARCV2_H +#define _ASM_ARC_ATOMIC64_ARCV2_H + +typedef struct { + s64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(a) { (a) } + +static inline s64 arch_atomic64_read(const atomic64_t *v) +{ + s64 val; + + __asm__ __volatile__( + " ldd %0, [%1] \n" + : "=r"(val) + : "r"(&v->counter)); + + return val; +} + +static inline void arch_atomic64_set(atomic64_t *v, s64 a) +{ + /* + * This could have been a simple assignment in "C" but would need + * explicit volatile. Otherwise gcc optimizers could elide the store + * which borked atomic64 self-test + * In the inline asm version, memory clobber needed for exact same + * reason, to tell gcc about the store. + * + * This however is not needed for sibling atomic64_add() etc since both + * load/store are explicitly done in inline asm. As long as API is used + * for each access, gcc has no way to optimize away any load/store + */ + __asm__ __volatile__( + " std %0, [%1] \n" + : + : "r"(a), "r"(&v->counter) + : "memory"); +} + +#define ATOMIC64_OP(op, op1, op2) \ +static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ +{ \ + s64 val; \ + \ + __asm__ __volatile__( \ + "1: \n" \ + " llockd %0, [%1] \n" \ + " " #op1 " %L0, %L0, %L2 \n" \ + " " #op2 " %H0, %H0, %H2 \n" \ + " scondd %0, [%1] \n" \ + " bnz 1b \n" \ + : "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ + : "cc"); \ +} \ + +#define ATOMIC64_OP_RETURN(op, op1, op2) \ +static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ +{ \ + s64 val; \ + \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: \n" \ + " llockd %0, [%1] \n" \ + " " #op1 " %L0, %L0, %L2 \n" \ + " " #op2 " %H0, %H0, %H2 \n" \ + " scondd %0, [%1] \n" \ + " bnz 1b \n" \ + : [val] "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ + : "cc"); /* memory clobber comes from smp_mb() */ \ + \ + smp_mb(); \ + \ + return val; \ +} + +#define ATOMIC64_FETCH_OP(op, op1, op2) \ +static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ +{ \ + s64 val, orig; \ + \ + smp_mb(); \ + \ + __asm__ __volatile__( \ + "1: \n" \ + " llockd %0, [%2] \n" \ + " " #op1 " %L1, %L0, %L3 \n" \ + " " #op2 " %H1, %H0, %H3 \n" \ + " scondd %1, [%2] \n" \ + " bnz 1b \n" \ + : "=&r"(orig), "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ + : "cc"); /* memory clobber comes from smp_mb() */ \ + \ + smp_mb(); \ + \ + return orig; \ +} + +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_OP_RETURN(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) + +ATOMIC64_OPS(add, add.f, adc) +ATOMIC64_OPS(sub, sub.f, sbc) +ATOMIC64_OPS(and, and, and) +ATOMIC64_OPS(andnot, bic, bic) +ATOMIC64_OPS(or, or, or) +ATOMIC64_OPS(xor, xor, xor) + +#define arch_atomic64_andnot arch_atomic64_andnot +#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +static inline s64 +arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) +{ + s64 prev; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%1] \n" + " brne %L0, %L2, 2f \n" + " brne %H0, %H2, 2f \n" + " scondd %3, [%1] \n" + " bnz 1b \n" + "2: \n" + : "=&r"(prev) + : "r"(ptr), "ir"(expected), "r"(new) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return prev; +} + +static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) +{ + s64 prev; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%1] \n" + " scondd %2, [%1] \n" + " bnz 1b \n" + "2: \n" + : "=&r"(prev) + : "r"(ptr), "r"(new) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return prev; +} + +/** + * arch_atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic64_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ + +static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) +{ + s64 val; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%1] \n" + " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n" + " sub.c %H0, %H0, 1 # if C set, w1 - 1\n" + " brlt %H0, 0, 2f \n" + " scondd %0, [%1] \n" + " bnz 1b \n" + "2: \n" + : "=&r"(val) + : "r"(&v->counter) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return val; +} +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive + +/** + * arch_atomic64_fetch_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if it was not @u. + * Returns the old value of @v + */ +static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) +{ + s64 old, temp; + + smp_mb(); + + __asm__ __volatile__( + "1: llockd %0, [%2] \n" + " brne %L0, %L4, 2f # continue to add since v != u \n" + " breq.d %H0, %H4, 3f # return since v == u \n" + "2: \n" + " add.f %L1, %L0, %L3 \n" + " adc %H1, %H0, %H3 \n" + " scondd %1, [%2] \n" + " bnz 1b \n" + "3: \n" + : "=&r"(old), "=&r" (temp) + : "r"(&v->counter), "r"(a), "r"(u) + : "cc"); /* memory clobber comes from smp_mb() */ + + smp_mb(); + + return old; +} +#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless + +#endif -- cgit From b1040148b2ea4ecbdb60c0d20393f4d15eee13ac Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 21 Jan 2020 09:56:33 -0800 Subject: ARC: atomic: !LLSC: remove hack in atomic_set() for for UP !LLSC atomics use spinlock (SMP) or irq-disable (UP) to implement criticla regions. UP atomic_set() however was "cheating" by not doing any of that so and still being functional. Remove this anomaly (primarily as cleanup for future code improvements) given that this config is not worth hassle of special case code. Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic-spinlock.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h index bdf87610b2d7..8c6fd0e651e5 100644 --- a/arch/arc/include/asm/atomic-spinlock.h +++ b/arch/arc/include/asm/atomic-spinlock.h @@ -3,12 +3,10 @@ #ifndef _ASM_ARC_ATOMIC_SPLOCK_H #define _ASM_ARC_ATOMIC_SPLOCK_H -#ifndef CONFIG_SMP - - /* violating atomic_xxx API locking protocol in UP for optimization sake */ -#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) - -#else +/* + * Non hardware assisted Atomic-R-M-W + * Locking would change to irq-disabling only (UP) and spinlocks (SMP) + */ static inline void arch_atomic_set(atomic_t *v, int i) { @@ -30,13 +28,6 @@ static inline void arch_atomic_set(atomic_t *v, int i) #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) -#endif - -/* - * Non hardware assisted Atomic-R-M-W - * Locking would change to irq-disabling only (UP) and spinlocks (SMP) - */ - #define ATOMIC_OP(op, c_op, asm_op) \ static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ -- cgit From ca766f04ad1dc4527e3e388fe3736e6b1bf0e6b2 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 5 May 2020 22:59:57 -0700 Subject: ARC: atomic: !LLSC: use int data type consistently Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic-spinlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h index 8c6fd0e651e5..2c830347bfb4 100644 --- a/arch/arc/include/asm/atomic-spinlock.h +++ b/arch/arc/include/asm/atomic-spinlock.h @@ -42,7 +42,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ { \ unsigned long flags; \ - unsigned long temp; \ + unsigned int temp; \ \ /* \ * spin lock/unlock provides the needed smp_mb() before/after \ @@ -60,7 +60,7 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ { \ unsigned long flags; \ - unsigned long orig; \ + unsigned int orig; \ \ /* \ * spin lock/unlock provides the needed smp_mb() before/after \ -- cgit From 7e8f8cbb43990861e5881594e14d491f81931f8d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Apr 2020 17:41:24 -0700 Subject: ARC: atomic64: LLSC: elide unused atomic_{and,or,xor,andnot}_return This is a non-functional change since those wrappers are not used in kernel sources at all. Link: http://lists.infradead.org/pipermail/linux-snps-arc/2018-August/004246.html Suggested-by: Peter Zijlstra (Intel) Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic64-arcv2.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 53996b11b551..22ef1cbb94e2 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -116,6 +116,12 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ ATOMIC64_OPS(add, add.f, adc) ATOMIC64_OPS(sub, sub.f, sbc) + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(op, op1, op2) \ + ATOMIC64_OP(op, op1, op2) \ + ATOMIC64_FETCH_OP(op, op1, op2) + ATOMIC64_OPS(and, and, and) ATOMIC64_OPS(andnot, bic, bic) ATOMIC64_OPS(or, or, or) -- cgit From b64be6836993c431e54fad239fcba0543854ee35 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Apr 2020 18:07:49 -0700 Subject: ARC: atomics: implement relaxed variants The current ARC fetch/return atomics provide fully ordered semantics only with 2 full barriers around the operation. Instead implement them as relaxed variants without any barriers and rely on generic code to generate the fully-ordered, acquire and release varaints by adding the appropriate full barriers. This helps elide some extra barriers in case of acquire/release/relaxed calls. bloat-o-meter for hsdk defconfig shows codegen improvements, although numbers below inflated due to unrelated inlining heuristic changes | bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed | add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154) | Function old new delta | .. | sys_renameat 462 476 +14 | ip_mc_inc_group 424 436 +12 | do_read_cache_page 1882 1894 +12 | .. | refcount_dec_and_mutex_lock 254 250 -4 | refcount_dec_and_lock_irqsave 258 254 -4 | refcount_dec_and_lock 254 250 -4 | .. | tcp_v6_route_req 246 238 -8 | tcp_v4_destroy_sock 286 278 -8 | tcp_twsk_unique 352 344 -8 Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net Suggested-by: Peter Zijlstra (Intel) Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic-llsc.h | 32 +++++++++++++------------------- arch/arc/include/asm/atomic64-arcv2.h | 24 +++++++++++++----------- 2 files changed, 26 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h index aab4f2855457..088d348781c1 100644 --- a/arch/arc/include/asm/atomic-llsc.h +++ b/arch/arc/include/asm/atomic-llsc.h @@ -22,16 +22,10 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ } \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ -static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ +static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ { \ unsigned int val; \ \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: llock %[val], [%[ctr]] \n" \ " " #asm_op " %[val], %[val], %[i] \n" \ @@ -42,22 +36,17 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ [i] "ir" (i) \ : "cc"); \ \ - smp_mb(); \ - \ return val; \ } +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed + #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ { \ unsigned int val, orig; \ \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such semantics \ - */ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: llock %[orig], [%[ctr]] \n" \ " " #asm_op " %[val], %[orig], %[i] \n" \ @@ -69,11 +58,17 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ [i] "ir" (i) \ : "cc"); \ \ - smp_mb(); \ - \ return orig; \ } +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed + +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed + #define ATOMIC_OPS(op, c_op, asm_op) \ ATOMIC_OP(op, c_op, asm_op) \ ATOMIC_OP_RETURN(op, c_op, asm_op) \ @@ -93,7 +88,6 @@ ATOMIC_OPS(or, |=, or) ATOMIC_OPS(xor, ^=, xor) #define arch_atomic_andnot arch_atomic_andnot -#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot #undef ATOMIC_OPS #undef ATOMIC_FETCH_OP diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 22ef1cbb94e2..c5a8010fdc97 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -64,12 +64,10 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ +static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \ { \ s64 val; \ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: \n" \ " llockd %0, [%1] \n" \ @@ -81,18 +79,17 @@ static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ : "r"(&v->counter), "ir"(a) \ : "cc"); /* memory clobber comes from smp_mb() */ \ \ - smp_mb(); \ - \ return val; \ } +#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed + #define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ +static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \ { \ s64 val, orig; \ \ - smp_mb(); \ - \ __asm__ __volatile__( \ "1: \n" \ " llockd %0, [%2] \n" \ @@ -104,11 +101,17 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ : "r"(&v->counter), "ir"(a) \ : "cc"); /* memory clobber comes from smp_mb() */ \ \ - smp_mb(); \ - \ return orig; \ } +#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed + +#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed +#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed + #define ATOMIC64_OPS(op, op1, op2) \ ATOMIC64_OP(op, op1, op2) \ ATOMIC64_OP_RETURN(op, op1, op2) \ @@ -128,7 +131,6 @@ ATOMIC64_OPS(or, or, or) ATOMIC64_OPS(xor, xor, xor) #define arch_atomic64_andnot arch_atomic64_andnot -#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot #undef ATOMIC64_OPS #undef ATOMIC64_FETCH_OP -- cgit From cea43147905f1c2b7b48104a5304cf5229f45bec Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 4 Sep 2018 16:11:29 -0700 Subject: ARC: switch to generic bitops - !LLSC now only needs a single spinlock for atomics and bitops - Some codegen changes (slight bloat) with generic bitops 1. code increase due to LD-check-atomic paradigm vs. unconditonal atomic (but dirty'ing the cache line even if set already). So despite increase, generic is right thing to do. 2. code decrease (but use of costlier instructions such as DIV vs. shifts based math) due to signed arithmetic. This needs to be revisited seperately. arc: static inline int test_bit(unsigned int nr, const volatile unsigned long *addr) ^^^^^^^^^^^^ generic: static inline int test_bit(int nr, const volatile unsigned long *addr) ^^^ Link: https://lore.kernel.org/r/20180830135749.GA13005@arm.com Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) [vgupta: wrote patch based on Will's poc, analysed codegen diffs] Signed-off-by: Vineet Gupta --- arch/arc/include/asm/bitops.h | 184 +----------------------------------------- arch/arc/include/asm/smp.h | 14 ---- arch/arc/kernel/smp.c | 2 - 3 files changed, 2 insertions(+), 198 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index fb98440c0bd4..4f35130f5ba3 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -14,188 +14,6 @@ #include #include -#include -#ifndef CONFIG_ARC_HAS_LLSC -#include -#endif - -#ifdef CONFIG_ARC_HAS_LLSC - -/* - * Hardware assisted Atomic-R-M-W - */ - -#define BIT_OP(op, c_op, asm_op) \ -static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ -{ \ - unsigned int temp; \ - \ - m += nr >> 5; \ - \ - nr &= 0x1f; \ - \ - __asm__ __volatile__( \ - "1: llock %0, [%1] \n" \ - " " #asm_op " %0, %0, %2 \n" \ - " scond %0, [%1] \n" \ - " bnz 1b \n" \ - : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ - : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \ - "ir"(nr) \ - : "cc"); \ -} - -/* - * Semantically: - * Test the bit - * if clear - * set it and return 0 (old value) - * else - * return 1 (old value). - * - * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally - * and the old value of bit is returned - */ -#define TEST_N_BIT_OP(op, c_op, asm_op) \ -static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ -{ \ - unsigned long old, temp; \ - \ - m += nr >> 5; \ - \ - nr &= 0x1f; \ - \ - /* \ - * Explicit full memory barrier needed before/after as \ - * LLOCK/SCOND themselves don't provide any such smenatic \ - */ \ - smp_mb(); \ - \ - __asm__ __volatile__( \ - "1: llock %0, [%2] \n" \ - " " #asm_op " %1, %0, %3 \n" \ - " scond %1, [%2] \n" \ - " bnz 1b \n" \ - : "=&r"(old), "=&r"(temp) \ - : "r"(m), "ir"(nr) \ - : "cc"); \ - \ - smp_mb(); \ - \ - return (old & (1 << nr)) != 0; \ -} - -#else /* !CONFIG_ARC_HAS_LLSC */ - -/* - * Non hardware assisted Atomic-R-M-W - * Locking would change to irq-disabling only (UP) and spinlocks (SMP) - * - * There's "significant" micro-optimization in writing our own variants of - * bitops (over generic variants) - * - * (1) The generic APIs have "signed" @nr while we have it "unsigned" - * This avoids extra code to be generated for pointer arithmatic, since - * is "not sure" that index is NOT -ve - * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc - * only consider bottom 5 bits of @nr, so NO need to mask them off. - * (GCC Quirk: however for constant @nr we still need to do the masking - * at compile time) - */ - -#define BIT_OP(op, c_op, asm_op) \ -static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ -{ \ - unsigned long temp, flags; \ - m += nr >> 5; \ - \ - /* \ - * spin lock/unlock provide the needed smp_mb() before/after \ - */ \ - bitops_lock(flags); \ - \ - temp = *m; \ - *m = temp c_op (1UL << (nr & 0x1f)); \ - \ - bitops_unlock(flags); \ -} - -#define TEST_N_BIT_OP(op, c_op, asm_op) \ -static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ -{ \ - unsigned long old, flags; \ - m += nr >> 5; \ - \ - bitops_lock(flags); \ - \ - old = *m; \ - *m = old c_op (1UL << (nr & 0x1f)); \ - \ - bitops_unlock(flags); \ - \ - return (old & (1UL << (nr & 0x1f))) != 0; \ -} - -#endif - -/*************************************** - * Non atomic variants - **************************************/ - -#define __BIT_OP(op, c_op, asm_op) \ -static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ -{ \ - unsigned long temp; \ - m += nr >> 5; \ - \ - temp = *m; \ - *m = temp c_op (1UL << (nr & 0x1f)); \ -} - -#define __TEST_N_BIT_OP(op, c_op, asm_op) \ -static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ -{ \ - unsigned long old; \ - m += nr >> 5; \ - \ - old = *m; \ - *m = old c_op (1UL << (nr & 0x1f)); \ - \ - return (old & (1UL << (nr & 0x1f))) != 0; \ -} - -#define BIT_OPS(op, c_op, asm_op) \ - \ - /* set_bit(), clear_bit(), change_bit() */ \ - BIT_OP(op, c_op, asm_op) \ - \ - /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\ - TEST_N_BIT_OP(op, c_op, asm_op) \ - \ - /* __set_bit(), __clear_bit(), __change_bit() */ \ - __BIT_OP(op, c_op, asm_op) \ - \ - /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\ - __TEST_N_BIT_OP(op, c_op, asm_op) - -BIT_OPS(set, |, bset) -BIT_OPS(clear, & ~, bclr) -BIT_OPS(change, ^, bxor) - -/* - * This routine doesn't need to be atomic. - */ -static inline int -test_bit(unsigned int nr, const volatile unsigned long *addr) -{ - unsigned long mask; - - addr += nr >> 5; - - mask = 1UL << (nr & 0x1f); - - return ((mask & *addr) != 0); -} #ifdef CONFIG_ISA_ARCOMPACT @@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include #include #include +#include +#include #include #include diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index c5de4008d19f..d856491606ac 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void) #include extern arch_spinlock_t smp_atomic_ops_lock; -extern arch_spinlock_t smp_bitops_lock; #define atomic_ops_lock(flags) do { \ local_irq_save(flags); \ @@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock; local_irq_restore(flags); \ } while (0) -#define bitops_lock(flags) do { \ - local_irq_save(flags); \ - arch_spin_lock(&smp_bitops_lock); \ -} while (0) - -#define bitops_unlock(flags) do { \ - arch_spin_unlock(&smp_bitops_lock); \ - local_irq_restore(flags); \ -} while (0) - #else /* !CONFIG_SMP */ #define atomic_ops_lock(flags) local_irq_save(flags) #define atomic_ops_unlock(flags) local_irq_restore(flags) -#define bitops_lock(flags) local_irq_save(flags) -#define bitops_unlock(flags) local_irq_restore(flags) - #endif /* !CONFIG_SMP */ #endif /* !CONFIG_ARC_HAS_LLSC */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index b5b5fe2c6e1e..78e6d069b1c1 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -29,10 +29,8 @@ #ifndef CONFIG_ARC_HAS_LLSC arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; -arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; EXPORT_SYMBOL_GPL(smp_atomic_ops_lock); -EXPORT_SYMBOL_GPL(smp_bitops_lock); #endif struct plat_smp_ops __weak plat_smp_ops; -- cgit From 9d011e12075dc51fb57f8203a08cc5229fbcb2ef Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 5 May 2020 13:54:24 -0700 Subject: ARC: bitops: fls/ffs to take int (vs long) per asm-generic defines Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/bitops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 4f35130f5ba3..a7daaf64ae34 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -114,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word) * @result: [1-32] * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0 */ -static inline __attribute__ ((const)) int fls(unsigned long x) +static inline __attribute__ ((const)) int fls(unsigned int x) { int n; @@ -141,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x) * ffs = Find First Set in word (LSB to MSB) * @result: [1-32], 0 if all 0's */ -static inline __attribute__ ((const)) int ffs(unsigned long x) +static inline __attribute__ ((const)) int ffs(unsigned int x) { int n; -- cgit From ecf51c9fa0960fd25cd66f2280fb1980b0d2e300 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 May 2020 14:41:12 -0700 Subject: ARC: xchg: !LLSC: remove UP micro-optimization/hack It gets in the way of cleaning things up and is a maintenance pain-in-neck ! Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cmpxchg.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index d42917e803e1..f9564dbe39b7 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -113,15 +113,9 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, * - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot * of kernel code which calls xchg()/cmpxchg() on same data (see llist.h) * Hence xchg() needs to follow same locking rules. - * - * Technically the lock is also needed for UP (boils down to irq save/restore) - * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to - * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg() - * Other way around, xchg is one instruction anyways, so can't be interrupted - * as such */ -#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP) +#ifndef CONFIG_ARC_HAS_LLSC #define arch_xchg(ptr, with) \ ({ \ -- cgit From e188f3330a13df904d77003846eafd3edf99009d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 May 2020 15:13:25 -0700 Subject: ARC: cmpxchg/xchg: rewrite as macros to make type safe Existing code forces/assume args to type "long" which won't work in LP64 regime, so prepare code for that Interestingly this should be a non functional change but I do see some codegen changes | bloat-o-meter vmlinux-cmpxchg-A vmlinux-cmpxchg-B | add/remove: 0/0 grow/shrink: 17/12 up/down: 218/-150 (68) | | Function old new delta | rwsem_optimistic_spin 518 550 +32 | rwsem_down_write_slowpath 1244 1274 +30 | __do_sys_perf_event_open 2576 2600 +24 | down_read 192 200 +8 | __down_read 192 200 +8 ... | task_work_run 168 148 -20 | dma_fence_chain_walk.part 760 736 -24 | __genradix_ptr_alloc 674 646 -28 Total: Before=6187409, After=6187477, chg +0.00% Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cmpxchg.h | 213 ++++++++++++++++++++++------------------- 1 file changed, 117 insertions(+), 96 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index f9564dbe39b7..00deb076d6f6 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -6,6 +6,7 @@ #ifndef __ASM_ARC_CMPXCHG_H #define __ASM_ARC_CMPXCHG_H +#include #include #include @@ -13,62 +14,77 @@ #ifdef CONFIG_ARC_HAS_LLSC -static inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) -{ - unsigned long prev; - - /* - * Explicit full memory barrier needed before/after as - * LLOCK/SCOND themselves don't provide any such semantics - */ - smp_mb(); - - __asm__ __volatile__( - "1: llock %0, [%1] \n" - " brne %0, %2, 2f \n" - " scond %3, [%1] \n" - " bnz 1b \n" - "2: \n" - : "=&r"(prev) /* Early clobber, to prevent reg reuse */ - : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ - "ir"(expected), - "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ - : "cc", "memory"); /* so that gcc knows memory is being written here */ - - smp_mb(); - - return prev; -} - -#else /* !CONFIG_ARC_HAS_LLSC */ - -static inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) -{ - unsigned long flags; - int prev; - volatile unsigned long *p = ptr; - - /* - * spin lock/unlock provide the needed smp_mb() before/after - */ - atomic_ops_lock(flags); - prev = *p; - if (prev == expected) - *p = new; - atomic_ops_unlock(flags); - return prev; -} +/* + * if (*ptr == @old) + * *ptr = @new + */ +#define __cmpxchg(ptr, old, new) \ +({ \ + __typeof__(*(ptr)) _prev; \ + \ + __asm__ __volatile__( \ + "1: llock %0, [%1] \n" \ + " brne %0, %2, 2f \n" \ + " scond %3, [%1] \n" \ + " bnz 1b \n" \ + "2: \n" \ + : "=&r"(_prev) /* Early clobber prevent reg reuse */ \ + : "r"(ptr), /* Not "m": llock only supports reg */ \ + "ir"(old), \ + "r"(new) /* Not "ir": scond can't take LIMM */ \ + : "cc", \ + "memory"); /* gcc knows memory is clobbered */ \ + \ + _prev; \ +}) -#endif +#define arch_cmpxchg(ptr, old, new) \ +({ \ + __typeof__(ptr) _p_ = (ptr); \ + __typeof__(*(ptr)) _o_ = (old); \ + __typeof__(*(ptr)) _n_ = (new); \ + __typeof__(*(ptr)) _prev_; \ + \ + switch(sizeof((_p_))) { \ + case 4: \ + /* \ + * Explicit full memory barrier needed before/after \ + */ \ + smp_mb(); \ + _prev_ = __cmpxchg(_p_, _o_, _n_); \ + smp_mb(); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + _prev_; \ +}) -#define arch_cmpxchg(ptr, o, n) ({ \ - (typeof(*(ptr)))__cmpxchg((ptr), \ - (unsigned long)(o), \ - (unsigned long)(n)); \ +#else + +#define arch_cmpxchg(ptr, old, new) \ +({ \ + volatile __typeof__(ptr) _p_ = (ptr); \ + __typeof__(*(ptr)) _o_ = (old); \ + __typeof__(*(ptr)) _n_ = (new); \ + __typeof__(*(ptr)) _prev_; \ + unsigned long __flags; \ + \ + BUILD_BUG_ON(sizeof(_p_) != 4); \ + \ + /* \ + * spin lock/unlock provide the needed smp_mb() before/after \ + */ \ + atomic_ops_lock(__flags); \ + _prev_ = *_p_; \ + if (_prev_ == _o_) \ + *_p_ = _n_; \ + atomic_ops_unlock(__flags); \ + _prev_; \ }) +#endif + /* * atomic_cmpxchg is same as cmpxchg * LLSC: only different in data-type, semantics are exactly same @@ -77,61 +93,66 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) */ #define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) - /* - * xchg (reg with memory) based on "Native atomic" EX insn + * xchg */ -static inline unsigned long __xchg(unsigned long val, volatile void *ptr, - int size) -{ - extern unsigned long __xchg_bad_pointer(void); - - switch (size) { - case 4: - smp_mb(); - - __asm__ __volatile__( - " ex %0, [%1] \n" - : "+r"(val) - : "r"(ptr) - : "memory"); +#ifdef CONFIG_ARC_HAS_LLSC - smp_mb(); +#define __xchg(ptr, val) \ +({ \ + __asm__ __volatile__( \ + " ex %0, [%1] \n" /* set new value */ \ + : "+r"(val) \ + : "r"(ptr) \ + : "memory"); \ + _val_; /* get old value */ \ +}) - return val; - } - return __xchg_bad_pointer(); -} +#define arch_xchg(ptr, val) \ +({ \ + __typeof__(ptr) _p_ = (ptr); \ + __typeof__(*(ptr)) _val_ = (val); \ + \ + switch(sizeof(*(_p_))) { \ + case 4: \ + smp_mb(); \ + _val_ = __xchg(_p_, _val_); \ + smp_mb(); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + _val_; \ +}) -#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \ - sizeof(*(ptr)))) +#else /* !CONFIG_ARC_HAS_LLSC */ /* - * xchg() maps directly to ARC EX instruction which guarantees atomicity. - * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock - * due to a subtle reason: - * - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot - * of kernel code which calls xchg()/cmpxchg() on same data (see llist.h) - * Hence xchg() needs to follow same locking rules. + * EX instructions is baseline and present in !LLSC too. But in this + * regime it still needs use @atomic_ops_lock spinlock to allow interop + * with cmpxchg() which uses spinlock in !LLSC + * (llist.h use xchg and cmpxchg on sama data) */ -#ifndef CONFIG_ARC_HAS_LLSC - -#define arch_xchg(ptr, with) \ -({ \ - unsigned long flags; \ - typeof(*(ptr)) old_val; \ - \ - atomic_ops_lock(flags); \ - old_val = _xchg(ptr, with); \ - atomic_ops_unlock(flags); \ - old_val; \ +#define arch_xchg(ptr, val) \ +({ \ + __typeof__(ptr) _p_ = (ptr); \ + __typeof__(*(ptr)) _val_ = (val); \ + \ + unsigned long __flags; \ + \ + atomic_ops_lock(__flags); \ + \ + __asm__ __volatile__( \ + " ex %0, [%1] \n" \ + : "+r"(_val_) \ + : "r"(_p_) \ + : "memory"); \ + \ + atomic_ops_unlock(__flags); \ + _val_; \ }) -#else - -#define arch_xchg(ptr, with) _xchg(ptr, with) - #endif /* -- cgit From ddc348c44d82d0b55236210d5606ac868e6cc364 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 9 May 2020 00:15:36 -0700 Subject: ARC: cmpxchg/xchg: implement relaxed variants (LLSC config only) It only makes sense to do this for the LLSC config Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cmpxchg.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 00deb076d6f6..e2ae0eb1ca07 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -38,7 +38,7 @@ _prev; \ }) -#define arch_cmpxchg(ptr, old, new) \ +#define arch_cmpxchg_relaxed(ptr, old, new) \ ({ \ __typeof__(ptr) _p_ = (ptr); \ __typeof__(*(ptr)) _o_ = (old); \ @@ -47,12 +47,7 @@ \ switch(sizeof((_p_))) { \ case 4: \ - /* \ - * Explicit full memory barrier needed before/after \ - */ \ - smp_mb(); \ _prev_ = __cmpxchg(_p_, _o_, _n_); \ - smp_mb(); \ break; \ default: \ BUILD_BUG(); \ @@ -108,16 +103,14 @@ _val_; /* get old value */ \ }) -#define arch_xchg(ptr, val) \ +#define arch_xchg_relaxed(ptr, val) \ ({ \ __typeof__(ptr) _p_ = (ptr); \ __typeof__(*(ptr)) _val_ = (val); \ \ switch(sizeof(*(_p_))) { \ case 4: \ - smp_mb(); \ _val_ = __xchg(_p_, _val_); \ - smp_mb(); \ break; \ default: \ BUILD_BUG(); \ -- cgit From 301014cf6d72836dd5fd5b3e9c92633f35b298c0 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 11 May 2020 13:27:23 -0700 Subject: ARC: atomic_cmpxchg/atomic_xchg: implement relaxed variants And move them out of cmpxchg.h to canonical atomic.h Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic.h | 27 +++++++++++++++++++++++++++ arch/arc/include/asm/cmpxchg.h | 23 ----------------------- 2 files changed, 27 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index ee88e1dbaab5..52ee51e1ff7c 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -22,6 +22,33 @@ #include #endif +#define arch_atomic_cmpxchg(v, o, n) \ +({ \ + arch_cmpxchg(&((v)->counter), (o), (n)); \ +}) + +#ifdef arch_cmpxchg_relaxed +#define arch_atomic_cmpxchg_relaxed(v, o, n) \ +({ \ + arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \ +}) +#endif + +#define arch_atomic_xchg(v, n) \ +({ \ + arch_xchg(&((v)->counter), (n)); \ +}) + +#ifdef arch_xchg_relaxed +#define arch_atomic_xchg_relaxed(v, n) \ +({ \ + arch_xchg_relaxed(&((v)->counter), (n)); \ +}) +#endif + +/* + * 64-bit atomics + */ #ifdef CONFIG_GENERIC_ATOMIC64 #include #else diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index e2ae0eb1ca07..c5b544a5fe81 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -80,14 +80,6 @@ #endif -/* - * atomic_cmpxchg is same as cmpxchg - * LLSC: only different in data-type, semantics are exactly same - * !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee - * semantics, and this lock also happens to be used by atomic_*() - */ -#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) - /* * xchg */ @@ -148,19 +140,4 @@ #endif -/* - * "atomic" variant of xchg() - * REQ: It needs to follow the same serialization rules as other atomic_xxx() - * Since xchg() doesn't always do that, it would seem that following definition - * is incorrect. But here's the rationale: - * SMP : Even xchg() takes the atomic_ops_lock, so OK. - * LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC - * is natively "SMP safe", no serialization required). - * UP : other atomics disable IRQ, so no way a difft ctxt atomic_xchg() - * could clobber them. atomic_xchg() itself would be 1 insn, so it - * can't be clobbered by others. Thus no serialization required when - * atomic_xchg is involved. - */ -#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) - #endif -- cgit From 767a697e75769fdae092a15d92bf562d5631a490 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Aug 2021 23:38:44 -0700 Subject: ARC: retire ARC750 support There's no known/active customer using them with latest kernels anyways. Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b5bf68e74732..fb3c319084c7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -116,16 +116,9 @@ choice default ARC_CPU_770 if ISA_ARCOMPACT default ARC_CPU_HS if ISA_ARCV2 -if ISA_ARCOMPACT - -config ARC_CPU_750D - bool "ARC750D" - select ARC_CANT_LLSC - help - Support for ARC750 core - config ARC_CPU_770 bool "ARC770" + depends on ISA_ARCOMPACT select ARC_HAS_SWAPE help Support for ARC770 core introduced with Rel 4.10 (Summer 2011) @@ -135,8 +128,6 @@ config ARC_CPU_770 -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr -endif #ISA_ARCOMPACT - config ARC_CPU_HS bool "ARC-HS" depends on ISA_ARCV2 @@ -275,7 +266,6 @@ config ARC_DCCM_BASE choice prompt "MMU Version" default ARC_MMU_V3 if ARC_CPU_770 - default ARC_MMU_V2 if ARC_CPU_750D default ARC_MMU_V4 if ARC_CPU_HS if ISA_ARCOMPACT -- cgit From 288ff7de62af0936353c9394de9d0b2c6dd22c80 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 9 Sep 2019 17:36:34 -0700 Subject: ARC: retire MMUv1 and MMUv2 support There's no known/active customer using them with latest kernels anyways. Removal helps cleanup code and remove the hack for MMU_VER to MMU_V[3-4] conversion Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 22 +----- arch/arc/include/asm/mmu.h | 22 +----- arch/arc/include/asm/pgtable.h | 26 +------ arch/arc/include/asm/tlb-mmu1.h | 101 ------------------------- arch/arc/mm/cache.c | 110 ++++----------------------- arch/arc/mm/tlb.c | 163 +++++----------------------------------- arch/arc/mm/tlbex.S | 10 +-- 7 files changed, 42 insertions(+), 412 deletions(-) delete mode 100644 arch/arc/include/asm/tlb-mmu1.h (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index fb3c319084c7..53d143fc42fe 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -265,32 +265,17 @@ config ARC_DCCM_BASE choice prompt "MMU Version" - default ARC_MMU_V3 if ARC_CPU_770 - default ARC_MMU_V4 if ARC_CPU_HS - -if ISA_ARCOMPACT - -config ARC_MMU_V1 - bool "MMU v1" - help - Orig ARC700 MMU - -config ARC_MMU_V2 - bool "MMU v2" - help - Fixed the deficiency of v1 - possible thrashing in memcpy scenario - when 2 D-TLB and 1 I-TLB entries index into same 2way set. + default ARC_MMU_V3 if ISA_ARCOMPACT + default ARC_MMU_V4 if ISA_ARCV2 config ARC_MMU_V3 bool "MMU v3" - depends on ARC_CPU_770 + depends on ISA_ARCOMPACT help Introduced with ARC700 4.10: New Features Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Shared Address Spaces (SASID) -endif - config ARC_MMU_V4 bool "MMU v4" depends on ISA_ARCV2 @@ -309,7 +294,6 @@ config ARC_PAGE_SIZE_8K config ARC_PAGE_SIZE_16K bool "16KB" - depends on ARC_MMU_V3 || ARC_MMU_V4 config ARC_PAGE_SIZE_4K bool "4KB" diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 26b731d32a2b..a81d1975866a 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -10,19 +10,10 @@ #include /* NR_CPUS */ #endif -#if defined(CONFIG_ARC_MMU_V1) -#define CONFIG_ARC_MMU_VER 1 -#elif defined(CONFIG_ARC_MMU_V2) -#define CONFIG_ARC_MMU_VER 2 -#elif defined(CONFIG_ARC_MMU_V3) -#define CONFIG_ARC_MMU_VER 3 -#elif defined(CONFIG_ARC_MMU_V4) -#define CONFIG_ARC_MMU_VER 4 -#endif - /* MMU Management regs */ #define ARC_REG_MMU_BCR 0x06f -#if (CONFIG_ARC_MMU_VER < 4) + +#ifdef CONFIG_ARC_MMU_V3 #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 #define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */ @@ -52,7 +43,7 @@ /* Error code if probe fails */ #define TLB_LKUP_ERR 0x80000000 -#if (CONFIG_ARC_MMU_VER < 4) +#ifdef CONFIG_ARC_MMU_V3 #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) #else #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) @@ -63,15 +54,10 @@ #define TLBRead 0x2 #define TLBGetIndex 0x3 #define TLBProbe 0x4 - -#if (CONFIG_ARC_MMU_VER >= 2) #define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ -#else -#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */ -#endif -#if (CONFIG_ARC_MMU_VER >= 4) +#ifdef CONFIG_ARC_MMU_V4 #define TLBInsertEntry 0x7 #define TLBDeleteEntry 0x8 #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 320cc0ae8a08..0c3e220bd2b4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -35,7 +35,7 @@ #include #include #include -#include /* to propagate CONFIG_ARC_MMU_VER */ +#include /************************************************************************** * Page Table Flags @@ -51,20 +51,6 @@ * (saves some bit shift ops in TLB Miss hdlrs) */ -#if (CONFIG_ARC_MMU_VER <= 2) - -#define _PAGE_ACCESSED (1<<1) /* Page is accessed (S) */ -#define _PAGE_CACHEABLE (1<<2) /* Page is cached (H) */ -#define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */ -#define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ -#define _PAGE_READ (1<<5) /* Page has user read perm (H) */ -#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ -#define _PAGE_SPECIAL (1<<7) -#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ -#define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ - -#else /* MMU v3 onwards */ - #define _PAGE_CACHEABLE (1<<0) /* Page is cached (H) */ #define _PAGE_EXECUTE (1<<1) /* Page has user execute perm (H) */ #define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ @@ -73,23 +59,15 @@ #define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ #define _PAGE_SPECIAL (1<<6) -#if (CONFIG_ARC_MMU_VER >= 4) -#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ -#endif - #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ -#if (CONFIG_ARC_MMU_VER >= 4) +#ifdef CONFIG_ARC_MMU_V4 #define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ #endif #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ - -#define _PAGE_UNUSED_BIT (1<<12) -#endif - /* vmalloc permissions */ #define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ _PAGE_GLOBAL | _PAGE_PRESENT) diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h deleted file mode 100644 index a3083b36f5f4..000000000000 --- a/arch/arc/include/asm/tlb-mmu1.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - */ - -#ifndef __ASM_TLB_MMU_V1_H__ -#define __ASM_TLB_MMU_V1_H__ - -#include - -#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1) - -.macro TLB_WRITE_HEURISTICS - -#define JH_HACK1 -#undef JH_HACK2 -#undef JH_HACK3 - -#ifdef JH_HACK3 -; Calculate set index for 2-way MMU -; -avoiding use of GetIndex from MMU -; and its unpleasant LFSR pseudo-random sequence -; -; r1 = TLBPD0 from TLB_RELOAD above -; -; -- jh_ex_way_set not cleared on startup -; didn't want to change setup.c -; hence extra instruction to clean -; -; -- should be in cache since in same line -; as r0/r1 saves above -; -ld r0,[jh_ex_way_sel] ; victim pointer -and r0,r0,1 ; clean -xor.f r0,r0,1 ; flip -st r0,[jh_ex_way_sel] ; store back -asr r0,r1,12 ; get set # <<1, note bit 12=R=0 -or.nz r0,r0,1 ; set way bit -and r0,r0,0xff ; clean -sr r0,[ARC_REG_TLBINDEX] -#endif - -#ifdef JH_HACK2 -; JH hack #2 -; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU -; Slower in thrash case (where it matters) because more code is executed -; Inefficient due to two-register paradigm of this miss handler -; -/* r1 = data TLBPD0 at this point */ -lr r0,[eret] /* instruction address */ -xor r0,r0,r1 /* compare set # */ -and.f r0,r0,0x000fe000 /* 2-way MMU mask */ -bne 88f /* not in same set - no need to probe */ - -lr r0,[eret] /* instruction address */ -and r0,r0,PAGE_MASK /* VPN of instruction address */ -; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/ -and r1,r1,0xff /* Data ASID */ -or r0,r0,r1 /* Instruction address + Data ASID */ - -lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/ -sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */ -sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */ -lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */ -sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */ - -xor r0,r0,1 /* flip bottom bit of data index */ -b.d 89f -sr r0,[ARC_REG_TLBINDEX] /* and put it back */ -88: -sr TLBGetIndex, [ARC_REG_TLBCOMMAND] -89: -#endif - -#ifdef JH_HACK1 -; -; Always checks whether instruction will be kicked out by dtlb miss -; -mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3 -lr r0,[eret] /* instruction address */ -and r0,r0,PAGE_MASK /* VPN of instruction address */ -bmsk r1,r3,7 /* Data ASID, bits 7-0 */ -or_s r0,r0,r1 /* Instruction address + Data ASID */ - -sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */ -sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */ -lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */ -sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */ - -sr TLBGetIndex, [ARC_REG_TLBCOMMAND] -lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */ -cmp r0,r1 /* if no match on indices, go around */ -xor.eq r1,r1,1 /* flip bottom bit of data index */ -sr r1,[ARC_REG_TLBINDEX] /* and put it back */ -#endif - -.endm - -#endif - -#endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 102418ac5ff4..8aa1231865d1 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -205,93 +205,24 @@ slc_chk: #define OP_INV_IC 0x4 /* - * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) + * Cache Flush programming model * - * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. - * The orig Cache Management Module "CDU" only required paddr to invalidate a - * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. - * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching - * the exact same line. + * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias. + * Programming model requires both paddr and vaddr irrespecive of aliasing + * considerations: + * - vaddr in {I,D}C_IV?L + * - paddr in {I,D}C_PTAG * - * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, - * paddr alone could not be used to correctly index the cache. + * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias. + * Programming model is different for aliasing vs. non-aliasing I$ + * - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L + * - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$) * - * ------------------ - * MMU v1/v2 (Fixed Page Size 8k) - * ------------------ - * The solution was to provide CDU with these additonal vaddr bits. These - * would be bits [x:13], x would depend on cache-geometry, 13 comes from - * standard page size of 8k. - * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits - * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the - * orig 5 bits of paddr were anyways ignored by CDU line ops, as they - * represent the offset within cache-line. The adv of using this "clumsy" - * interface for additional info was no new reg was needed in CDU programming - * model. - * - * 17:13 represented the max num of bits passable, actual bits needed were - * fewer, based on the num-of-aliases possible. - * -for 2 alias possibility, only bit 13 needed (32K cache) - * -for 4 alias possibility, bits 14:13 needed (64K cache) - * - * ------------------ - * MMU v3 - * ------------------ - * This ver of MMU supports variable page sizes (1k-16k): although Linux will - * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggravate aliasing - * meaning more vaddr bits needed to disambiguate the cache-line-op ; - * the existing scheme of piggybacking won't work for certain configurations. - * Two new registers IC_PTAG and DC_PTAG inttoduced. - * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs + * - If PAE40 is enabled, independent of aliasing considerations, the higher + * bits needs to be written into PTAG_HI */ static inline -void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int op, const int full_page) -{ - unsigned int aux_cmd; - int num_lines; - - if (op == OP_INV_IC) { - aux_cmd = ARC_REG_IC_IVIL; - } else { - /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; - } - - /* Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @paddr - aligned to cache line and integral @num_lines. - * This however can be avoided for page sized since: - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!full_page) { - sz += paddr & ~CACHE_LINE_MASK; - paddr &= CACHE_LINE_MASK; - vaddr &= CACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); - - /* MMUv2 and before: paddr contains stuffed vaddrs bits */ - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; - - while (num_lines-- > 0) { - write_aux_reg(aux_cmd, paddr); - paddr += L1_CACHE_BYTES; - } -} - -/* - * For ARC700 MMUv3 I-cache and D-cache flushes - * - ARC700 programming model requires paddr and vaddr be passed in seperate - * AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the - * caches actually alias or not. - * - For HS38, only the aliasing I-cache configuration uses the PTAG reg - * (non aliasing I-cache version doesn't; while D-cache can't possibly alias) - */ -static inline void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op, const int full_page) { @@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, #ifndef USE_RGN_FLSH /* - * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT - * Here's how cache ops are implemented - * - * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) - * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) - * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG - * respectively, similar to MMU v3 programming model, hence - * __cache_line_loop_v3() is used) - * - * If PAE40 is enabled, independent of aliasing considerations, the higher bits - * needs to be written into PTAG_HI */ static inline void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, @@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, #endif -#if (CONFIG_ARC_MMU_VER < 3) -#define __cache_line_loop __cache_line_loop_v2 -#elif (CONFIG_ARC_MMU_VER == 3) +#ifdef CONFIG_ARC_MMU_V3 #define __cache_line_loop __cache_line_loop_v3 -#elif (CONFIG_ARC_MMU_VER > 3) +#else #define __cache_line_loop __cache_line_loop_v4 #endif diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9c7c68247289..8696829d37c0 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -1,51 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * TLB Management (flush/create/diagnostics) for ARC700 + * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4 * * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * - * vineetg: Aug 2011 - * -Reintroduce duplicate PD fixup - some customer chips still have the issue - * - * vineetg: May 2011 - * -No need to flush_cache_page( ) for each call to update_mmu_cache() - * some of the LMBench tests improved amazingly - * = page-fault thrice as fast (75 usec to 28 usec) - * = mmap twice as fast (9.6 msec to 4.6 msec), - * = fork (5.3 msec to 3.7 msec) - * - * vineetg: April 2011 : - * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, - * helps avoid a shift when preparing PD0 from PTE - * - * vineetg: April 2011 : Preparing for MMU V3 - * -MMU v2/v3 BCRs decoded differently - * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512 - * -tlb_entry_erase( ) can be void - * -local_flush_tlb_range( ): - * = need not "ceil" @end - * = walks MMU only if range spans < 32 entries, as opposed to 256 - * - * Vineetg: Sept 10th 2008 - * -Changes related to MMU v2 (Rel 4.8) - * - * Vineetg: Aug 29th 2008 - * -In TLB Flush operations (Metal Fix MMU) there is a explicit command to - * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, - * it fails. Thus need to load it with ANY valid value before invoking - * TLBIVUTLB cmd - * - * Vineetg: Aug 21th 2008: - * -Reduced the duration of IRQ lockouts in TLB Flush routines - * -Multiple copies of TLB erase code separated into a "single" function - * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID - * in interrupt-safe region. - * - * Vineetg: April 23rd Bug #93131 - * Problem: tlb_flush_kernel_range() doesn't do anything if the range to - * flush is more than the size of TLB itself. - * - * Rahul Trivedi : Codito Technologies 2004 */ #include @@ -57,47 +15,6 @@ #include #include -/* Need for ARC MMU v2 - * - * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc. - * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages - * map into same set, there would be contention for the 2 ways causing severe - * Thrashing. - * - * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has - * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. - * Given this, the thrashing problem should never happen because once the 3 - * J-TLB entries are created (even though 3rd will knock out one of the prev - * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy - * - * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs. - * This is a simple design for keeping them in sync. So what do we do? - * The solution which James came up was pretty neat. It utilised the assoc - * of uTLBs by not invalidating always but only when absolutely necessary. - * - * - Existing TLB commands work as before - * - New command (TLBWriteNI) for TLB write without clearing uTLBs - * - New command (TLBIVUTLB) to invalidate uTLBs. - * - * The uTLBs need only be invalidated when pages are being removed from the - * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB - * as a result of a miss, the removed entry is still allowed to exist in the - * uTLBs as it is still valid and present in the OS page table. This allows the - * full associativity of the uTLBs to hide the limited associativity of the main - * TLB. - * - * During a miss handler, the new "TLBWriteNI" command is used to load - * entries without clearing the uTLBs. - * - * When the OS page table is updated, TLB entries that may be associated with a - * removed page are removed (flushed) from the TLB using TLBWrite. In this - * circumstance, the uTLBs must also be cleared. This is done by using the - * existing TLBWrite command. An explicit IVUTLB is also required for those - * corner cases when TLBWrite was not executed at all because the corresp - * J-TLB entry got evicted/replaced. - */ - - /* A copy of the ASID from the PID reg is kept in asid_cache */ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; @@ -120,32 +37,10 @@ static inline void __tlb_entry_erase(void) static void utlb_invalidate(void) { -#if (CONFIG_ARC_MMU_VER >= 2) - -#if (CONFIG_ARC_MMU_VER == 2) - /* MMU v2 introduced the uTLB Flush command. - * There was however an obscure hardware bug, where uTLB flush would - * fail when a prior probe for J-TLB (both totally unrelated) would - * return lkup err - because the entry didn't exist in MMU. - * The Workaround was to set Index reg with some valid value, prior to - * flush. This was fixed in MMU v3 - */ - unsigned int idx; - - /* make sure INDEX Reg is valid */ - idx = read_aux_reg(ARC_REG_TLBINDEX); - - /* If not write some dummy val */ - if (unlikely(idx & TLB_LKUP_ERR)) - write_aux_reg(ARC_REG_TLBINDEX, 0xa); -#endif - write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); -#endif - } -#if (CONFIG_ARC_MMU_VER < 4) +#ifdef CONFIG_ARC_MMU_V3 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { @@ -206,7 +101,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } -#else /* CONFIG_ARC_MMU_VER >= 4) */ +#else /* MMUv4 */ static void tlb_entry_erase(unsigned int vaddr_n_asid) { @@ -706,14 +601,6 @@ void read_decode_mmu_bcr(void) { struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; unsigned int tmp; - struct bcr_mmu_1_2 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; -#else - unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; -#endif - } *mmu2; - struct bcr_mmu_3 { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, @@ -738,23 +625,14 @@ void read_decode_mmu_bcr(void) tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); - if (is_isa_arcompact()) { - if (mmu->ver <= 2) { - mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz_k = TO_KB(0x2000); - mmu->sets = 1 << mmu2->sets; - mmu->ways = 1 << mmu2->ways; - mmu->u_dtlb = mmu2->u_dtlb; - mmu->u_itlb = mmu2->u_itlb; - } else { - mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); - mmu->sets = 1 << mmu3->sets; - mmu->ways = 1 << mmu3->ways; - mmu->u_dtlb = mmu3->u_dtlb; - mmu->u_itlb = mmu3->u_itlb; - mmu->sasid = mmu3->sasid; - } + if (is_isa_arcompact() && mmu->ver == 3) { + mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + mmu->u_dtlb = mmu3->u_dtlb; + mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; } else { mmu4 = (struct bcr_mmu_4 *)&tmp; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); @@ -815,22 +693,17 @@ void arc_mmu_init(void) /* * Ensure that MMU features assumed by kernel exist in hardware. - * For older ARC700 cpus, it has to be exact match, since the MMU - * revisions were not backwards compatible (MMUv3 TLB layout changed - * so even if kernel for v2 didn't use any new cmds of v3, it would - * still not work. - * For HS cpus, MMUv4 was baseline and v5 is backwards compatible - * (will run older software). + * - For older ARC700 cpus, only v3 supported + * - For HS cpus, v4 was baseline and v5 is backwards compatible + * (will run older software). */ - if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER) + if (is_isa_arcompact() && mmu->ver == 3) compat = 1; - else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER) + else if (is_isa_arcv2() && mmu->ver >= 4) compat = 1; - if (!compat) { - panic("MMU ver %d doesn't match kernel built for %d...\n", - mmu->ver, CONFIG_ARC_MMU_VER); - } + if (!compat) + panic("MMU ver %d doesn't match kernel built for\n", mmu->ver); if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 062fae46c3f8..96c3a5de9dd4 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -39,7 +39,6 @@ #include #include #include -#include #ifdef CONFIG_ISA_ARCOMPACT ;----------------------------------------------------------------- @@ -279,7 +278,7 @@ ex_saved_reg1: ; Commit the TLB entry into MMU .macro COMMIT_ENTRY_TO_MMU -#if (CONFIG_ARC_MMU_VER < 4) +#ifdef CONFIG_ARC_MMU_V3 /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] @@ -375,13 +374,6 @@ ENTRY(EV_TLBMissD) CONV_PTE_TO_TLB -#if (CONFIG_ARC_MMU_VER == 1) - ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of - ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. - ; But only for old MMU or one with Metal Fix - TLB_WRITE_HEURISTICS -#endif - COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation -- cgit From 6128df5be48f48d63efdc7c52022dd163f612373 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 13 Jan 2020 09:16:06 -0800 Subject: ARC: mm: use SCRATCH_DATA0 register for caching pgdir in ARCv2 only MMU SCRATCH_DATA0 register is intended to cache task pgd. However in ARC700 SMP port, it has to be repurposed for re-entrant interrupt handling, while UP port doesn't. We currently handle these use-cases using a fabricated #define which has usual issues of dependency nesting and obvious ugliness. So clean this up: for ARC700 don't use to cache pgd (even in UP) and do the opposite for ARCv2. And while here, switch to canonical pgd_offset(). Acked-by: Mike Rapoport Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 8 -------- arch/arc/include/asm/mmu.h | 4 ---- arch/arc/include/asm/mmu_context.h | 2 +- arch/arc/include/asm/pgtable.h | 23 ----------------------- arch/arc/mm/fault.c | 2 +- arch/arc/mm/tlb.c | 4 ++-- arch/arc/mm/tlbex.S | 2 +- 7 files changed, 5 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 6dbf5cecc8cc..5aab4f93ab8a 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -126,19 +126,11 @@ * to be saved again on kernel mode stack, as part of pt_regs. *-------------------------------------------------------------*/ .macro PROLOG_FREEUP_REG reg, mem -#ifndef ARC_USE_SCRATCH_REG - sr \reg, [ARC_REG_SCRATCH_DATA0] -#else st \reg, [\mem] -#endif .endm .macro PROLOG_RESTORE_REG reg, mem -#ifndef ARC_USE_SCRATCH_REG - lr \reg, [ARC_REG_SCRATCH_DATA0] -#else ld \reg, [\mem] -#endif .endm /*-------------------------------------------------------------- diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index a81d1975866a..4065335a7922 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -31,10 +31,6 @@ #define ARC_REG_SCRATCH_DATA0 0x46c #endif -#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP) -#define ARC_USE_SCRATCH_REG -#endif - /* Bits in MMU PID register */ #define __TLB_ENABLE (1 << 31) #define __PROG_ENABLE (1 << 30) diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index df164066e172..49318a126879 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -146,7 +146,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ cpumask_set_cpu(cpu, mm_cpumask(next)); -#ifdef ARC_USE_SCRATCH_REG +#ifdef CONFIG_ISA_ARCV2 /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 0c3e220bd2b4..80b57c14b430 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -284,29 +284,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pteval); } -/* - * Macro to quickly access the PGD entry, utlising the fact that some - * arch may cache the pointer to Page Directory of "current" task - * in a MMU register - * - * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply - * becomes read a register - * - * ********CAUTION*******: - * Kernel code might be dealing with some mm_struct of NON "current" - * Thus use this macro only when you are certain that "current" is current - * e.g. when dealing with signal frame setup code etc - */ -#ifdef ARC_USE_SCRATCH_REG -#define pgd_offset_fast(mm, addr) \ -({ \ - pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \ - pgd_base + pgd_index(addr); \ -}) -#else -#define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) -#endif - extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index f5657cb68e4f..41f154320964 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -33,7 +33,7 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address) pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(current->active_mm, address); + pgd = pgd_offset(current->active_mm, address); pgd_k = pgd_offset_k(address); if (!pgd_present(*pgd_k)) diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 8696829d37c0..349fb7a75d1d 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -719,8 +719,8 @@ void arc_mmu_init(void) /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); - /* In smp we use this reg for interrupt 1 scratch */ -#ifdef ARC_USE_SCRATCH_REG + /* In arc700/smp needed for re-entrant interrupt handling */ +#ifdef CONFIG_ISA_ARCV2 /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); #endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 96c3a5de9dd4..bcd2909c691f 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -202,7 +202,7 @@ ex_saved_reg1: lr r2, [efa] -#ifdef ARC_USE_SCRATCH_REG +#ifdef CONFIG_ISA_ARCV2 lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd #else GET_CURR_TASK_ON_CPU r1 -- cgit From 12e7804c264143c63d1cae7e3a62d21fbed06f59 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 9 Sep 2019 16:59:59 -0700 Subject: ARC: mm: remove tlb paranoid code This was used back in arc700 days when ASID allocator was fragile. Not needed in last 5 years Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 3 --- arch/arc/include/asm/mmu.h | 6 ------ arch/arc/mm/tlb.c | 40 ------------------------------------- arch/arc/mm/tlbex.S | 50 ---------------------------------------------- 4 files changed, 99 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 53d143fc42fe..56aad105ad13 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -537,9 +537,6 @@ config ARC_DW2_UNWIND If you don't debug the kernel, you can say N, but we may not be able to solve problems without frame unwind information -config ARC_DBG_TLB_PARANOIA - bool "Paranoia Checks in Low Level TLB Handlers" - config ARC_DBG_JUMP_LABEL bool "Paranoid checks in Static Keys (jump labels) code" depends on JUMP_LABEL diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 4065335a7922..38a036508699 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -64,12 +64,6 @@ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA -void tlb_paranoid_check(unsigned int mm_asid, unsigned long address); -#else -#define tlb_paranoid_check(a, b) -#endif - void arc_mmu_init(void); extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); void read_decode_mmu_bcr(void); diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 349fb7a75d1d..6079dfd129b9 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -400,7 +400,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) * * Removing the assumption involves * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. - * -Fix the TLB paranoid debug code to not trigger false negatives. * -More importantly it makes this handler inconsistent with fast-path * TLB Refill handler which always deals with "current" * @@ -423,8 +422,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); - vaddr &= PAGE_MASK; /* update this PTE credentials */ @@ -818,40 +815,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, local_irq_restore(flags); } - -/*********************************************************************** - * Diagnostic Routines - * -Called from Low Level TLB Handlers if things don;t look good - **********************************************************************/ - -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA - -/* - * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS - * don't match - */ -void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path) -{ - pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", - is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid); - - __asm__ __volatile__("flag 1"); -} - -void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr) -{ - unsigned int mmu_asid; - - mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff; - - /* - * At the time of a TLB miss/installation - * - HW version needs to match SW version - * - SW needs to have a valid ASID - */ - if (addr < 0x70000000 && - ((mm_asid == MM_CTXT_NO_ASID) || - (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK)))) - print_asid_mismatch(mm_asid, mmu_asid, 0); -} -#endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index bcd2909c691f..0b4bb62fa0ab 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -93,11 +93,6 @@ ex_saved_reg1: st_s r1, [r0, 4] st_s r2, [r0, 8] st_s r3, [r0, 12] - - ; VERIFY if the ASID in MMU-PID Reg is same as - ; one in Linux data structures - - tlb_paranoid_check_asm .endm .macro TLBMISS_RESTORE_REGS @@ -146,51 +141,6 @@ ex_saved_reg1: #endif -;============================================================================ -; Troubleshooting Stuff -;============================================================================ - -; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid -; When Creating TLB Entries, instead of doing 3 dependent loads from memory, -; we use the MMU PID Reg to get current ASID. -; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. -; So we try to detect this in TLB Mis shandler - -.macro tlb_paranoid_check_asm - -#ifdef CONFIG_ARC_DBG_TLB_PARANOIA - - GET_CURR_TASK_ON_CPU r3 - ld r0, [r3, TASK_ACT_MM] - ld r0, [r0, MM_CTXT+MM_CTXT_ASID] - breq r0, 0, 55f ; Error if no ASID allocated - - lr r1, [ARC_REG_PID] - and r1, r1, 0xFF - - and r2, r0, 0xFF ; MMU PID bits only for comparison - breq r1, r2, 5f - -55: - ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode - lr r2, [erstatus] - bbit0 r2, STATUS_U_BIT, 5f - - ; We sure are in troubled waters, Flag the error, but to do so - ; need to switch to kernel mode stack to call error routine - GET_TSK_STACK_BASE r3, sp - - ; Call printk to shoutout aloud - mov r2, 1 - j print_asid_mismatch - -5: ; ASIDs match so proceed normally - nop - -#endif - -.endm - ;============================================================================ ;TLB Miss handling Code ;============================================================================ -- cgit From 47910ca3ce946662f950d700f26e51c563a9821f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 13 Sep 2019 13:19:44 -0700 Subject: ARC: mm: move mmu/cache externs out to setup.h Don't pollute mmu.h and cache.h with ARC internal bootlog/setup related functions. Move them aside to setup.h Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 4 ---- arch/arc/include/asm/mmu.h | 4 ---- arch/arc/include/asm/setup.h | 12 ++++++++++-- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index d8ece4292388..f0f1fc5d62b6 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -62,10 +62,6 @@ #define ARCH_SLAB_MINALIGN 8 #endif -extern void arc_cache_init(void); -extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); -extern void read_decode_cache_bcr(void); - extern int ioc_enable; extern unsigned long perip_base, perip_end; diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 38a036508699..762cfe66e16b 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -64,10 +64,6 @@ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; -void arc_mmu_init(void); -extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); -void read_decode_mmu_bcr(void); - static inline int is_pae40_enabled(void) { return IS_ENABLED(CONFIG_ARC_HAS_PAE40); diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 01f85478170d..028a8cf76206 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -2,8 +2,8 @@ /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) */ -#ifndef __ASMARC_SETUP_H -#define __ASMARC_SETUP_H +#ifndef __ASM_ARC_SETUP_H +#define __ASM_ARC_SETUP_H #include @@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void); #define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) #define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) +extern void arc_mmu_init(void); +extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); +extern void read_decode_mmu_bcr(void); + +extern void arc_cache_init(void); +extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); +extern void read_decode_cache_bcr(void); + #endif /* __ASMARC_SETUP_H */ -- cgit From 366440eec855dad6527adaf2f1dcc305fb5eef99 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 28 Oct 2019 12:02:47 -0700 Subject: ARC: mm: Fixes to allow STRICT_MM_TYPECHECKS Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 6079dfd129b9..15cbc285b0de 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -71,7 +71,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) } } -static void tlb_entry_insert(unsigned int pd0, pte_t pd1) +static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) { unsigned int idx; @@ -109,13 +109,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); } -static void tlb_entry_insert(unsigned int pd0, pte_t pd1) +static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) { write_aux_reg(ARC_REG_TLBPD0, pd0); - write_aux_reg(ARC_REG_TLBPD1, pd1); - if (is_pae40_enabled()) + if (!is_pae40_enabled()) { + write_aux_reg(ARC_REG_TLBPD1, pd1); + } else { + write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF); write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + } write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); } @@ -391,7 +394,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) unsigned long flags; unsigned int asid_or_sasid, rwx; unsigned long pd0; - pte_t pd1; + phys_addr_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since -- cgit From 1b4013b9aebca87636dc9edc8903ffa87267704f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 28 Oct 2019 13:49:41 -0700 Subject: ARC: mm: Enable STRICT_MM_TYPECHECKS In the past I've refrained from doing this (at least 2 times) due to the slight code bloat due to ABI implications of pte_t etc becoming struct Per ARC ABI, functions return struct via memory and not through register r0, even if the struct would fit in register(s) - caller allocates space on stack and passes the address as first arg (r0), shifting rest of args by one - callee creates return struct in memory (referenced via r0) This time around the code actually shrunk slightly (due to subtle inlining heuristic effects), but still slightly inefficient due to return values passed through memory. That however seems like a small cost compared to maintenance burden given the impending new mmu support for page walk etc Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 26 -------------------------- arch/arc/mm/ioremap.c | 2 +- 2 files changed, 1 insertion(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 4a9d33372fe2..c4ac827379cd 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -34,12 +34,6 @@ void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma); void clear_user_page(void *to, unsigned long u_vaddr, struct page *page); -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS -/* - * These are used to make use of C type-checking.. - */ typedef struct { #ifdef CONFIG_ARC_HAS_PAE40 unsigned long long pte; @@ -64,26 +58,6 @@ typedef struct { #define pte_pgprot(x) __pgprot(pte_val(x)) -#else /* !STRICT_MM_TYPECHECKS */ - -#ifdef CONFIG_ARC_HAS_PAE40 -typedef unsigned long long pte_t; -#else -typedef unsigned long pte_t; -#endif -typedef unsigned long pgd_t; -typedef unsigned long pgprot_t; - -#define pte_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) -#define __pte(x) (x) -#define __pgd(x) (x) -#define __pgprot(x) (x) -#define pte_pgprot(x) (x) - -#endif - typedef pte_t * pgtable_t; /* diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 95c649fbc95a..052bbd8b1e5f 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -39,7 +39,7 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size) if (arc_uncached_addr_space(paddr)) return (void __iomem *)(u32)paddr; - return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE); + return ioremap_prot(paddr, size, pgprot_val(PAGE_KERNEL_NO_CACHE)); } EXPORT_SYMBOL(ioremap); -- cgit From da773cf20eb3745e18be995e00be0d57aa862564 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 12 Sep 2019 12:01:04 -0700 Subject: ARC: ioremap: use more commonly used PAGE_KERNEL based uncached flag and remove the one off uncached definition for ARC Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable.h | 3 --- arch/arc/mm/ioremap.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 80b57c14b430..b054c14f8bf6 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -103,9 +103,6 @@ */ #define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) -/* ioremap */ -#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) - /* Masks for actual TLB "PD"s */ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 052bbd8b1e5f..0ee75aca6e10 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size) if (arc_uncached_addr_space(paddr)) return (void __iomem *)(u32)paddr; - return ioremap_prot(paddr, size, pgprot_val(PAGE_KERNEL_NO_CACHE)); + return ioremap_prot(paddr, size, + pgprot_val(pgprot_noncached(PAGE_KERNEL))); } EXPORT_SYMBOL(ioremap); -- cgit From e93e59ac1e699d07a2bfd0bb03f36b0d2f931834 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 30 Oct 2019 16:54:32 -0700 Subject: ARC: mm: pmd_populate* to use the canonical set_pmd (and drop pmd_set) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgalloc.h | 14 ++++++++++---- arch/arc/include/asm/pgtable.h | 6 ------ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index a32ca3104ced..408bc4b0842d 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -35,13 +35,19 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, pte); + /* + * The cast to long below is OK in 32-bit PAE40 regime with long long pte + * Despite "wider" pte, the pte table needs to be in non-PAE low memory + * as all higher levels can only hold long pointers. + * + * The cast itself is needed given simplistic definition of set_pmd() + */ + set_pmd(pmd, __pmd((unsigned long)pte)); } -static inline void -pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_set(pmd, (pte_t *) ptep); + set_pmd(pmd, __pmd((unsigned long)pte)); } static inline int __get_order_pgd(void) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index b054c14f8bf6..f762bacb2358 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -222,12 +222,6 @@ extern char empty_zero_page[PAGE_SIZE]; /* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */ #define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) -/* In a 2 level sys, setup the PGD entry with PTE value */ -static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) -{ - pmd_val(*pmdp) = (unsigned long)ptep; -} - #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) #define pte_clear(mm, addr, ptep) set_pte_at(mm, addr, ptep, __pte(0)) -- cgit From be43b096ed787bad9e1a74f79486159c6cd6e648 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 30 Sep 2020 18:58:16 -0700 Subject: ARC: mm: non-functional code movement/cleanup Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index c4ac827379cd..313e6f543d2d 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -34,6 +34,13 @@ void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma); void clear_user_page(void *to, unsigned long u_vaddr, struct page *page); +typedef struct { + unsigned long pgd; +} pgd_t; + +#define pgd_val(x) ((x).pgd) +#define __pgd(x) ((pgd_t) { (x) }) + typedef struct { #ifdef CONFIG_ARC_HAS_PAE40 unsigned long long pte; @@ -41,22 +48,17 @@ typedef struct { unsigned long pte; #endif } pte_t; -typedef struct { - unsigned long pgd; -} pgd_t; + +#define pte_val(x) ((x).pte) +#define __pte(x) ((pte_t) { (x) }) + typedef struct { unsigned long pgprot; } pgprot_t; -#define pte_val(x) ((x).pte) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) }) -#define __pgd(x) ((pgd_t) { (x) }) -#define __pgprot(x) ((pgprot_t) { (x) }) - -#define pte_pgprot(x) __pgprot(pte_val(x)) +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) }) +#define pte_pgprot(x) __pgprot(pte_val(x)) typedef pte_t * pgtable_t; @@ -96,8 +98,8 @@ extern int pfn_valid(unsigned long pfn); * virt here means link-address/program-address as embedded in object code. * And for ARC, link-addr = physical address */ -#define __pa(vaddr) ((unsigned long)(vaddr)) -#define __va(paddr) ((void *)((unsigned long)(paddr))) +#define __pa(vaddr) ((unsigned long)(vaddr)) +#define __va(paddr) ((void *)((unsigned long)(paddr))) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -- cgit From 89d0d42412a116563c28c763d9c1bdee83b5b6af Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 13 Sep 2019 15:20:01 -0700 Subject: ARC: mm: move MMU specific bits out of ASID allocator And while at it, rewrite commentary on ASID allocator Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mmu.h | 13 +++++++++++++ arch/arc/include/asm/mmu_context.h | 28 +++++++++++++--------------- arch/arc/mm/tlb.c | 11 ++++------- 3 files changed, 30 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 762cfe66e16b..c8b490175eae 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -64,6 +64,19 @@ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; +static inline void mmu_setup_asid(struct mm_struct *mm, unsigned int asid) +{ + write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE); +} + +static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) +{ + /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ +#ifdef CONFIG_ISA_ARCV2 + write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd); +#endif +} + static inline int is_pae40_enabled(void) { return IS_ENABLED(CONFIG_ARC_HAS_PAE40); diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index 49318a126879..dda471f5f05b 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -15,22 +15,23 @@ #ifndef _ASM_ARC_MMU_CONTEXT_H #define _ASM_ARC_MMU_CONTEXT_H -#include -#include #include +#include #include -/* ARC700 ASID Management +/* ARC ASID Management + * + * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on + * context-switch. * - * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries - * with same vaddr (different tasks) to co-exit. This provides for - * "Fast Context Switch" i.e. no TLB flush on ctxt-switch + * ASID is managed per cpu, so task threads across CPUs can have different + * ASID. Global ASID management is needed if hardware supports TLB shootdown + * and/or shared TLB across cores, which ARC doesn't. * - * Linux assigns each task a unique ASID. A simple round-robin allocation - * of H/w ASID is done using software tracker @asid_cpu. - * When it reaches max 255, the allocation cycle starts afresh by flushing - * the entire TLB and wrapping ASID back to zero. + * Each task is assigned unique ASID, with a simple round-robin allocator + * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started + * over from 0, and TLB is flushed * * A new allocation cycle, post rollover, could potentially reassign an ASID * to a different task. Thus the rule is to refresh the ASID in a new cycle. @@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm) asid_mm(mm, cpu) = asid_cpu(cpu); set_hw: - write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE); + mmu_setup_asid(mm, hw_pid(mm, cpu)); local_irq_restore(flags); } @@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ cpumask_set_cpu(cpu, mm_cpumask(next)); -#ifdef CONFIG_ISA_ARCV2 - /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ - write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); -#endif + mmu_setup_pgd(next, next->pgd); get_new_mmu_context(next); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 15cbc285b0de..b68d5798327b 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -716,14 +716,11 @@ void arc_mmu_init(void) if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) panic("Hardware doesn't support PAE40\n"); - /* Enable the MMU */ - write_aux_reg(ARC_REG_PID, MMU_ENABLE); + /* Enable the MMU with ASID 0 */ + mmu_setup_asid(NULL, 0); - /* In arc700/smp needed for re-entrant interrupt handling */ -#ifdef CONFIG_ISA_ARCV2 - /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ - write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); -#endif + /* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */ + mmu_setup_pgd(NULL, swapper_pg_dir); if (pae40_exist_but_not_enab()) write_aux_reg(ARC_REG_TLBPD1HI, 0); -- cgit From a79a9c765f95a73e087f11f0994297cd69987bda Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 16 Sep 2019 10:54:34 -0700 Subject: ARC: mm: move MMU specific bits out of entry code ... ... to avoid polluting shared entry code (across three ISA variants) with ISA/MMU specific code. Cc: Jose Abreu Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mmu.h | 8 ++++++++ arch/arc/kernel/entry-arcv2.S | 1 + arch/arc/kernel/entry.S | 7 ++----- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index c8b490175eae..e66e1e417694 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -84,6 +84,14 @@ static inline int is_pae40_enabled(void) extern int pae40_exist_but_not_enab(void); +#else + +.macro ARC_MMU_REENABLE reg + lr \reg, [ARC_REG_PID] + or \reg, \reg, MMU_ENABLE + sr \reg, [ARC_REG_PID] +.endm + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 12d5f12d10d2..a7e6a2174187 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -10,6 +10,7 @@ #include #include #include +#include ; A maximum number of supported interrupts in the core interrupt controller. ; This number is not equal to the maximum interrupt number (256) because diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 2cb8dfe866b6..dd77a0c8f740 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck) lr r0, [efa] mov r1, sp - ; hardware auto-disables MMU, re-enable it to allow kernel vaddr - ; access for say stack unwinding of modules for crash dumps - lr r3, [ARC_REG_PID] - or r3, r3, MMU_ENABLE - sr r3, [ARC_REG_PID] + ; MC excpetions disable MMU + ARC_MMU_REENABLE r3 lsr r3, r2, 8 bmsk r3, r3, 7 -- cgit From 113ec9ccc8049c3772f0eab46b62c5d6654c09f7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 20 Aug 2021 05:16:05 +0000 Subject: powerpc/32: indirect function call use bctrl rather than blrl in ret_from_kernel_thread Copied from commit 89bbe4c798bc ("powerpc/64: indirect function call use bctrl rather than blrl in ret_from_kernel_thread") blrl is not recommended to use as an indirect function call, as it may corrupt the link stack predictor. This is not a performance critical path but this should be fixed for consistency. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/91b1d242525307ceceec7ef6e832bfbacdd4501b.1629436472.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/entry_32.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 0273a1349006..61fdd53cdd9a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -161,10 +161,10 @@ ret_from_fork: ret_from_kernel_thread: REST_NVGPRS(r1) bl schedule_tail - mtlr r14 + mtctr r14 mr r3,r15 PPC440EP_ERR42 - blrl + bctrl li r3,0 b ret_from_syscall -- cgit From f5007dbf4da729baa850b33a64dc3cc53757bdf8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Aug 2021 07:56:26 +0000 Subject: powerpc/booke: Avoid link stack corruption in several places Use bcl 20,31,+4 instead of bl in order to preserve link stack. See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption in __get_datapage()") for details. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e9fbc285eceb720e6c0e032ef47fe8b05f669b48.1629791751.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 2 +- arch/powerpc/kernel/exceptions-64e.S | 6 +++--- arch/powerpc/kernel/fsl_booke_entry_mapping.S | 8 ++++---- arch/powerpc/kernel/head_44x.S | 6 +++--- arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- arch/powerpc/mm/nohash/tlb_low.S | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index ffe712307e11..1c538a9a11e0 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -260,7 +260,7 @@ n: /* Be careful, this will clobber the lr register. */ #define LOAD_REG_ADDR_PIC(reg, name) \ - bl 0f; \ + bcl 20,31,$+4; \ 0: mflr reg; \ addis reg,reg,(name - 0b)@ha; \ addi reg,reg,(name - 0b)@l; diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 1401787b0b93..7e0943d9f9b0 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1127,7 +1127,7 @@ found_iprot: * r3 = MAS0_TLBSEL (for the iprot array) * r4 = SPRN_TLBnCFG */ - bl invstr /* Find our address */ + bcl 20,31,$+4 /* Find our address */ invstr: mflr r6 /* Make it accessible */ mfmsr r7 rlwinm r5,r7,27,31,31 /* extract MSR[IS] */ @@ -1196,7 +1196,7 @@ skpinv: addi r6,r6,1 /* Increment */ mfmsr r6 xori r6,r6,MSR_IS mtspr SPRN_SRR1,r6 - bl 1f /* Find our address */ + bcl 20,31,$+4 /* Find our address */ 1: mflr r6 addi r6,r6,(2f - 1b) mtspr SPRN_SRR0,r6 @@ -1256,7 +1256,7 @@ skpinv: addi r6,r6,1 /* Increment */ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping */ /* Now we branch the new virtual address mapped by this entry */ - bl 1f /* Find our address */ + bcl 20,31,$+4 /* Find our address */ 1: mflr r6 addi r6,r6,(2f - 1b) tovirt(r6,r6) diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index 8bccce6544b5..dedc17fac8f8 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* 1. Find the index of the entry we're executing in */ - bl invstr /* Find our address */ + bcl 20,31,$+4 /* Find our address */ invstr: mflr r6 /* Make it accessible */ mfmsr r7 rlwinm r4,r7,27,31,31 /* extract MSR[IS] */ @@ -85,7 +85,7 @@ skpinv: addi r6,r6,1 /* Increment */ addi r6,r6,10 slw r6,r8,r6 /* convert to mask */ - bl 1f /* Find our address */ + bcl 20,31,$+4 /* Find our address */ 1: mflr r7 mfspr r8,SPRN_MAS3 @@ -117,7 +117,7 @@ skpinv: addi r6,r6,1 /* Increment */ xori r6,r4,1 slwi r6,r6,5 /* setup new context with other address space */ - bl 1f /* Find our address */ + bcl 20,31,$+4 /* Find our address */ 1: mflr r9 rlwimi r7,r9,0,20,31 addi r7,r7,(2f - 1b) @@ -207,7 +207,7 @@ next_tlb_setup: lis r7,MSR_KERNEL@h ori r7,r7,MSR_KERNEL@l - bl 1f /* Find our address */ + bcl 20,31,$+4 /* Find our address */ 1: mflr r9 rlwimi r6,r9,0,20,31 addi r6,r6,(2f - 1b) diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index ddc978a2d381..02d2928d1e01 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -70,7 +70,7 @@ _ENTRY(_start); * address. * r21 will be loaded with the physical runtime address of _stext */ - bl 0f /* Get our runtime address */ + bcl 20,31,$+4 /* Get our runtime address */ 0: mflr r21 /* Make it accessible */ addis r21,r21,(_stext - 0b)@ha addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */ @@ -853,7 +853,7 @@ _GLOBAL(init_cpu_state) wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ sync - bl invstr /* Find our address */ + bcl 20,31,$+4 /* Find our address */ invstr: mflr r5 /* Make it accessible */ tlbsx r23,0,r5 /* Find entry we are in */ li r4,0 /* Start at TLB entry 0 */ @@ -1045,7 +1045,7 @@ head_start_47x: sync /* Find the entry we are running from */ - bl 1f + bcl 20,31,$+4 1: mflr r23 tlbsx r23,0,r23 tlbre r24,r23,0 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 0f9642f36b49..dbf3b89e543c 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -79,7 +79,7 @@ _ENTRY(_start); mr r23,r3 mr r25,r4 - bl 0f + bcl 20,31,$+4 0: mflr r8 addis r3,r8,(is_second_reloc - 0b)@ha lwz r19,(is_second_reloc - 0b)@l(r3) @@ -1132,7 +1132,7 @@ _GLOBAL(switch_to_as1) bne 1b /* Get the tlb entry used by the current running code */ - bl 0f + bcl 20,31,$+4 0: mflr r4 tlbsx 0,r4 @@ -1166,7 +1166,7 @@ _GLOBAL(switch_to_as1) _GLOBAL(restore_to_as0) mflr r0 - bl 0f + bcl 20,31,$+4 0: mflr r9 addi r9,r9,1f - 0b diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 4613bf8e9aae..5add4a51e51f 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2) * Touch enough instruction cache lines to ensure cache hits */ 1: mflr r9 - bl 2f + bcl 20,31,$+4 2: mflr r6 li r7,32 PPC_ICBT(0,R6,R7) /* touch next cache line */ @@ -414,7 +414,7 @@ _GLOBAL(loadcam_multi) * Set up temporary TLB entry that is the same as what we're * running from, but in AS=1. */ - bl 1f + bcl 20,31,$+4 1: mflr r6 tlbsx 0,r8 mfspr r6,SPRN_MAS1 -- cgit From 33e1402435cb9f3021439a15935ea2dc69ec1844 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Aug 2021 07:56:35 +0000 Subject: powerpc: Avoid link stack corruption in misc asm functions bl;mflr is used at several places to get code position. Use bcl 20,31,+4 instead of bl in order to preserve link stack. See commit c974809a26a1 ("powerpc/vdso: Avoid link stack corruption in __get_datapage()") for details. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c6eabb4fb6c156f75d56dcbcc6f243e5ac0fba42.1629791763.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/misc.S | 2 +- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/misc_64.S | 2 +- arch/powerpc/kernel/reloc_32.S | 2 +- arch/powerpc/kexec/relocate_32.S | 12 ++++++------ 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 5be96feccb55..fb7de3543c03 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -29,7 +29,7 @@ _GLOBAL(reloc_offset) li r3, 0 _GLOBAL(add_reloc_offset) mflr r0 - bl 1f + bcl 20,31,$+4 1: mflr r5 PPC_LL r4,(2f-1b)(r5) subf r5,r4,r5 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index d8645efff902..e5127b19fec2 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -67,7 +67,7 @@ _GLOBAL(reloc_got2) srwi. r8,r8,2 beqlr mtctr r8 - bl 1f + bcl 20,31,$+4 1: mflr r0 lis r4,1b@ha addi r4,r4,1b@l diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4b761a18a74d..d38a019b38e1 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -255,7 +255,7 @@ _GLOBAL(scom970_write) * Physical (hardware) cpu id should be in r3. */ _GLOBAL(kexec_wait) - bl 1f + bcl 20,31,$+4 1: mflr r5 addi r5,r5,kexec_flag-1b diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S index 10e96f3e22fe..0508c14b4c28 100644 --- a/arch/powerpc/kernel/reloc_32.S +++ b/arch/powerpc/kernel/reloc_32.S @@ -30,7 +30,7 @@ R_PPC_RELATIVE = 22 _GLOBAL(relocate) mflr r0 /* Save our LR */ - bl 0f /* Find our current runtime address */ + bcl 20,31,$+4 /* Find our current runtime address */ 0: mflr r12 /* Make it accessible */ mtlr r0 diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S index 61946c19e07c..cf6e52bdf8d8 100644 --- a/arch/powerpc/kexec/relocate_32.S +++ b/arch/powerpc/kexec/relocate_32.S @@ -93,7 +93,7 @@ wmmucr: * Invalidate all the TLB entries except the current entry * where we are running from */ - bl 0f /* Find our address */ + bcl 20,31,$+4 /* Find our address */ 0: mflr r5 /* Make it accessible */ tlbsx r23,0,r5 /* Find entry we are in */ li r4,0 /* Start at TLB entry 0 */ @@ -158,7 +158,7 @@ write_out: /* Switch to other address space in MSR */ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - bl 1f + bcl 20,31,$+4 1: mflr r8 addi r8, r8, (2f-1b) /* Find the target offset */ @@ -202,7 +202,7 @@ next_tlb: li r9,0 insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - bl 1f + bcl 20,31,$+4 1: mflr r8 and r8, r8, r11 /* Get our offset within page */ addi r8, r8, (2f-1b) @@ -240,7 +240,7 @@ setup_map_47x: sync /* Find the entry we are running from */ - bl 2f + bcl 20,31,$+4 2: mflr r23 tlbsx r23, 0, r23 tlbre r24, r23, 0 /* TLB Word 0 */ @@ -296,7 +296,7 @@ clear_utlb_entry: /* Update the msr to the new TS */ insrwi r5, r7, 1, 26 - bl 1f + bcl 20,31,$+4 1: mflr r6 addi r6, r6, (2f-1b) @@ -355,7 +355,7 @@ write_utlb: /* Defaults to 256M */ lis r10, 0x1000 - bl 1f + bcl 20,31,$+4 1: mflr r4 addi r4, r4, (2f-1b) /* virtual address of 2f */ -- cgit From 11f27a7fa4ca27935de74e3eb052bdc430d5f8d8 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Jul 2021 16:49:40 +0000 Subject: powerpc/ptdump: Use DEFINE_SHOW_ATTRIBUTE() Use DEFINE_SHOW_ATTRIBUTE() instead of open coding open() and fops. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b864a92693ca8413ef0b19f0c12065c212899b6e.1625762905.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/bats.c | 14 ++------------ arch/powerpc/mm/ptdump/hashpagetable.c | 12 +----------- arch/powerpc/mm/ptdump/ptdump.c | 13 +------------ arch/powerpc/mm/ptdump/segment_regs.c | 12 +----------- 4 files changed, 5 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c index 8bf7383fb26c..820c119013e4 100644 --- a/arch/powerpc/mm/ptdump/bats.c +++ b/arch/powerpc/mm/ptdump/bats.c @@ -57,7 +57,7 @@ static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool #define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d) -static int bats_show_603(struct seq_file *m, void *v) +static int bats_show(struct seq_file *m, void *v) { seq_puts(m, "---[ Instruction Block Address Translation ]---\n"); @@ -88,17 +88,7 @@ static int bats_show_603(struct seq_file *m, void *v) return 0; } -static int bats_open(struct inode *inode, struct file *file) -{ - return single_open(file, bats_show_603, NULL); -} - -static const struct file_operations bats_fops = { - .open = bats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(bats); static int __init bats_init(void) { diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index ad6df9a2e7c8..c7f824d294b2 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -526,17 +526,7 @@ static int ptdump_show(struct seq_file *m, void *v) return 0; } -static int ptdump_open(struct inode *inode, struct file *file) -{ - return single_open(file, ptdump_show, NULL); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(ptdump); static int ptdump_init(void) { diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 5062c58b1e5b..349fd8fe173f 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -397,18 +397,7 @@ static int ptdump_show(struct seq_file *m, void *v) return 0; } - -static int ptdump_open(struct inode *inode, struct file *file) -{ - return single_open(file, ptdump_show, NULL); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(ptdump); static void build_pgtable_complete_mask(void) { diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c index 9223dfb85c51..9df3af8d481f 100644 --- a/arch/powerpc/mm/ptdump/segment_regs.c +++ b/arch/powerpc/mm/ptdump/segment_regs.c @@ -41,17 +41,7 @@ static int sr_show(struct seq_file *m, void *v) return 0; } -static int sr_open(struct inode *inode, struct file *file) -{ - return single_open(file, sr_show, NULL); -} - -static const struct file_operations sr_fops = { - .open = sr_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(sr); static int __init sr_init(void) { -- cgit From 64b87b0c70e0fd28352895cba3c0a9631e0072dd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Jul 2021 16:49:41 +0000 Subject: powerpc/ptdump: Remove unused 'page_size' parameter note_page_update_state() doesn't use page_size. Remove it. Could also be removed to note_page() but as a following patch will remove all current users of note_page(), just leave it as is for now. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e2f80d052001155251bfe009c360d0c5d9242c6b.1625762906.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/ptdump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 349fd8fe173f..3eb8732641da 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -189,7 +189,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page_update_state(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val, unsigned long page_size) + unsigned int level, u64 val) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -213,7 +213,7 @@ static void note_page(struct pg_state *st, unsigned long addr, /* At first no level is set */ if (!st->level) { pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); - note_page_update_state(st, addr, level, val, page_size); + note_page_update_state(st, addr, level, val); /* * Dump the section of virtual memory when: * - the PTE flags from one entry to the next differs. @@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, * Address indicates we have passed the end of the * current section of virtual memory */ - note_page_update_state(st, addr, level, val, page_size); + note_page_update_state(st, addr, level, val); } } -- cgit From cf98d2b6eea6a1b2c43f85680ad58fcc3ea9496b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Jul 2021 16:49:42 +0000 Subject: powerpc/ptdump: Reduce level numbers by 1 in note_page() and add p4d level Do the same as commit f8f0d0b6fa20 ("mm: ptdump: reduce level numbers by 1 in note_page()") and add missing p4d level. This will align powerpc to the users of generic ptdump. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/d76495c574132b197b445a1f133755cca4b912a4.1625762906.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/ptdump/8xx.c | 6 ++++-- arch/powerpc/mm/ptdump/book3s64.c | 6 ++++-- arch/powerpc/mm/ptdump/ptdump.c | 17 +++++++++-------- arch/powerpc/mm/ptdump/shared.c | 6 ++++-- 4 files changed, 21 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c index 86da2a669680..fac932eb8f9a 100644 --- a/arch/powerpc/mm/ptdump/8xx.c +++ b/arch/powerpc/mm/ptdump/8xx.c @@ -75,8 +75,10 @@ static const struct flag_info flag_array[] = { }; struct pgtable_level pg_level[5] = { - { - }, { /* pgd */ + { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* p4d */ .flag = flag_array, .num = ARRAY_SIZE(flag_array), }, { /* pud */ diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c index 14f73868db66..5ad92d9dc5d1 100644 --- a/arch/powerpc/mm/ptdump/book3s64.c +++ b/arch/powerpc/mm/ptdump/book3s64.c @@ -103,8 +103,10 @@ static const struct flag_info flag_array[] = { }; struct pgtable_level pg_level[5] = { - { - }, { /* pgd */ + { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* p4d */ .flag = flag_array, .num = ARRAY_SIZE(flag_array), }, { /* pud */ diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 3eb8732641da..fb531bc64fc5 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -58,7 +58,7 @@ struct pg_state { const struct addr_marker *marker; unsigned long start_address; unsigned long start_pa; - unsigned int level; + int level; u64 current_flags; bool check_wx; unsigned long wx_pages; @@ -188,10 +188,9 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_page_update_state(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) +static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val) { - u64 flag = val & pg_level[level].mask; + u64 flag = level >= 0 ? val & pg_level[level].mask : 0; u64 pa = val & PTE_RPN_MASK; st->level = level; @@ -206,12 +205,12 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val, unsigned long page_size) + int level, u64 val, unsigned long page_size) { - u64 flag = val & pg_level[level].mask; + u64 flag = level >= 0 ? val & pg_level[level].mask : 0; /* At first no level is set */ - if (!st->level) { + if (st->level == -1) { pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); note_page_update_state(st, addr, level, val); /* @@ -383,6 +382,7 @@ static int ptdump_show(struct seq_file *m, void *v) struct pg_state st = { .seq = m, .marker = address_markers, + .level = -1, .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; @@ -393,7 +393,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0, 0); + note_page(&st, 0, -1, 0, 0); return 0; } @@ -415,6 +415,7 @@ void ptdump_check_wx(void) struct pg_state st = { .seq = NULL, .marker = address_markers, + .level = -1, .check_wx = true, .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, }; diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index c005fe041c18..03607ab90c66 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -68,8 +68,10 @@ static const struct flag_info flag_array[] = { }; struct pgtable_level pg_level[5] = { - { - }, { /* pgd */ + { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* p4d */ .flag = flag_array, .num = ARRAY_SIZE(flag_array), }, { /* pud */ -- cgit From e084728393a58e7fdafeee2e6b20e0faff09b557 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Jul 2021 16:49:43 +0000 Subject: powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP This patch converts powerpc to the generic PTDUMP implementation. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/03166d569526be70214fe9370a7bad219d2f41c8.1625762907.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 2 + arch/powerpc/Kconfig.debug | 30 --------- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/ptdump/Makefile | 9 ++- arch/powerpc/mm/ptdump/ptdump.c | 146 ++++++++++------------------------------ 6 files changed, 47 insertions(+), 144 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d..ec866085ff8b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -123,6 +123,7 @@ config PPC select ARCH_HAS_COPY_MC if PPC64 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE + select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES select ARCH_HAS_ELF_RANDOMIZE @@ -182,6 +183,7 @@ config PPC select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 205cd77f321f..192f0ed0097f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -365,36 +365,6 @@ config FAIL_IOMMU If you are unsure, say N. -config PPC_PTDUMP - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL && DEBUG_FS - help - This option exports the state of the kernel pagetables to a - debugfs file. This is only useful for kernel developers who are - working in architecture specific areas of the kernel - probably - not a good idea to enable this feature in a production kernel. - - If you are unsure, say N. - -config PPC_DEBUG_WX - bool "Warn on W+X mappings at boot" - depends on PPC_PTDUMP && STRICT_KERNEL_RWX - help - Generate a warning if any W+X mappings are found at boot. - - This is useful for discovering cases where the kernel is leaving - W+X mappings after applying NX, as such mappings are a security risk. - - Note that even if the check fails, your kernel is possibly - still fine, as W+X mappings are not a security hole in - themselves, what they do is that they make the exploitation - of other unfixed kernel bugs easier. - - There is no runtime or memory usage effect of this option - once the kernel has booted up - it's a one time check. - - If in doubt, say "Y". - config PPC_FAST_ENDIAN_SWITCH bool "Deprecated fast endian-switch syscall" depends on DEBUG_KERNEL && PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index eae4ec2988fc..df8172da2301 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -18,5 +18,5 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o -obj-$(CONFIG_PPC_PTDUMP) += ptdump/ +obj-$(CONFIG_PTDUMP_CORE) += ptdump/ obj-$(CONFIG_KASAN) += kasan/ diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 7dac910c0b21..dd1cabc2ea0f 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -180,7 +180,7 @@ static inline void mmu_mark_rodata_ro(void) { } void __init mmu_mapin_immr(void); #endif -#ifdef CONFIG_PPC_DEBUG_WX +#ifdef CONFIG_DEBUG_WX void ptdump_check_wx(void); #else static inline void ptdump_check_wx(void) { } diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile index 712762be3cb1..4050cbb55acf 100644 --- a/arch/powerpc/mm/ptdump/Makefile +++ b/arch/powerpc/mm/ptdump/Makefile @@ -5,5 +5,10 @@ obj-y += ptdump.o obj-$(CONFIG_4xx) += shared.o obj-$(CONFIG_PPC_8xx) += 8xx.o obj-$(CONFIG_PPC_BOOK3E_MMU) += shared.o -obj-$(CONFIG_PPC_BOOK3S_32) += shared.o bats.o segment_regs.o -obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o hashpagetable.o +obj-$(CONFIG_PPC_BOOK3S_32) += shared.o +obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o + +ifdef CONFIG_PTDUMP_DEBUGFS +obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o +obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o +endif diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index fb531bc64fc5..2d80d775d15e 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -54,6 +55,7 @@ * */ struct pg_state { + struct ptdump_state ptdump; struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; @@ -102,6 +104,11 @@ static struct addr_marker address_markers[] = { { -1, NULL }, }; +static struct ptdump_range ptdump_range[] __ro_after_init = { + {TASK_SIZE_MAX, ~0UL}, + {0, 0} +}; + #define pt_dump_seq_printf(m, fmt, args...) \ ({ \ if (m) \ @@ -204,10 +211,10 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, int } } -static void note_page(struct pg_state *st, unsigned long addr, - int level, u64 val, unsigned long page_size) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { u64 flag = level >= 0 ? val & pg_level[level].mask : 0; + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); /* At first no level is set */ if (st->level == -1) { @@ -245,94 +252,6 @@ static void note_page(struct pg_state *st, unsigned long addr, } } -static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) -{ - pte_t *pte = pte_offset_kernel(pmd, 0); - unsigned long addr; - unsigned int i; - - for (i = 0; i < PTRS_PER_PTE; i++, pte++) { - addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); - - } -} - -static void walk_hugepd(struct pg_state *st, hugepd_t *phpd, unsigned long start, - int pdshift, int level) -{ -#ifdef CONFIG_ARCH_HAS_HUGEPD - unsigned int i; - int shift = hugepd_shift(*phpd); - int ptrs_per_hpd = pdshift - shift > 0 ? 1 << (pdshift - shift) : 1; - - if (start & ((1 << shift) - 1)) - return; - - for (i = 0; i < ptrs_per_hpd; i++) { - unsigned long addr = start + (i << shift); - pte_t *pte = hugepte_offset(*phpd, addr, pdshift); - - note_page(st, addr, level + 1, pte_val(*pte), 1 << shift); - } -#endif -} - -static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) -{ - pmd_t *pmd = pmd_offset(pud, 0); - unsigned long addr; - unsigned int i; - - for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { - addr = start + i * PMD_SIZE; - if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd)) - /* pmd exists */ - walk_pte(st, pmd, addr); - else - note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); - } -} - -static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) -{ - pud_t *pud = pud_offset(p4d, 0); - unsigned long addr; - unsigned int i; - - for (i = 0; i < PTRS_PER_PUD; i++, pud++) { - addr = start + i * PUD_SIZE; - if (!pud_none(*pud) && !pud_is_leaf(*pud)) - /* pud exists */ - walk_pmd(st, pud, addr); - else - note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); - } -} - -static void walk_pagetables(struct pg_state *st) -{ - unsigned int i; - unsigned long addr = st->start_address & PGDIR_MASK; - pgd_t *pgd = pgd_offset_k(addr); - - /* - * Traverse the linux pagetable structure and dump pages that are in - * the hash pagetable. - */ - for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { - p4d_t *p4d = p4d_offset(pgd, 0); - - if (p4d_none(*p4d) || p4d_is_leaf(*p4d)) - note_page(st, addr, 1, p4d_val(*p4d), PGDIR_SIZE); - else if (is_hugepd(__hugepd(p4d_val(*p4d)))) - walk_hugepd(st, (hugepd_t *)p4d, addr, PGDIR_SHIFT, 1); - else - /* p4d exists */ - walk_pud(st, p4d, addr); - } -} - static void populate_markers(void) { int i = 0; @@ -383,17 +302,14 @@ static int ptdump_show(struct seq_file *m, void *v) .seq = m, .marker = address_markers, .level = -1, - .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, + .ptdump = { + .note_page = note_page, + .range = ptdump_range, + } }; -#ifdef CONFIG_PPC64 - if (!radix_enabled()) - st.start_address = KERN_VIRT_START; -#endif - /* Traverse kernel page tables */ - walk_pagetables(&st); - note_page(&st, 0, -1, 0, 0); + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); return 0; } @@ -409,23 +325,24 @@ static void build_pgtable_complete_mask(void) pg_level[i].mask |= pg_level[i].flag[j].mask; } -#ifdef CONFIG_PPC_DEBUG_WX +#ifdef CONFIG_DEBUG_WX void ptdump_check_wx(void) { struct pg_state st = { .seq = NULL, - .marker = address_markers, + .marker = (struct addr_marker[]) { + { 0, NULL}, + { -1, NULL}, + }, .level = -1, .check_wx = true, - .start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE, + .ptdump = { + .note_page = note_page, + .range = ptdump_range, + } }; -#ifdef CONFIG_PPC64 - if (!radix_enabled()) - st.start_address = KERN_VIRT_START; -#endif - - walk_pagetables(&st); + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); if (st.wx_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", @@ -435,12 +352,21 @@ void ptdump_check_wx(void) } #endif -static int ptdump_init(void) +static int __init ptdump_init(void) { +#ifdef CONFIG_PPC64 + if (!radix_enabled()) + ptdump_range[0].start = KERN_VIRT_START; + else + ptdump_range[0].start = PAGE_OFFSET; +#endif + populate_markers(); build_pgtable_complete_mask(); - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, - &ptdump_fops); + + if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); + return 0; } device_initcall(ptdump_init); -- cgit From 316389e904f968d24d44cd96a6d171ee1ef269cf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 25 Jun 2021 10:58:33 +0000 Subject: powerpc/syscalls: Simplify do_mmap2() When shift is nul, operations remain valid so no test needed. And 'ret' is unnecessary. And use IS_ALIGNED() to check alignment, that's more clear. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/373ec500f386374bc5735007df3d3869eac47be1.1624618701.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/syscalls.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index bf4ae0f0e36c..825931e400df 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -41,20 +41,13 @@ static inline long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) { - long ret = -EINVAL; - if (!arch_validate_prot(prot, addr)) - goto out; + return -EINVAL; - if (shift) { - if (off & ((1 << shift) - 1)) - goto out; - off >>= shift; - } + if (!IS_ALIGNED(off, 1 << shift)) + return -EINVAL; - ret = ksys_mmap_pgoff(addr, len, prot, flags, fd, off); -out: - return ret; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> shift); } SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, -- cgit From 19e932eb6ea47f4f37513eb2ae0daee19117765c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Aug 2021 16:00:14 +0000 Subject: powerpc/ptrace: Make user_mode() common to PPC32 and PPC64 Today we have: #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else #define user_mode(regs) (((regs)->msr & MSR_PR) != 0) #endif With ppc64_defconfig, we get: if (!user_mode(regs)) 14b4: e9 3e 01 08 ld r9,264(r30) 14b8: 71 29 40 00 andi. r9,r9,16384 14bc: 41 82 07 a4 beq 1c60 <.emulate_instruction+0x7d0> If taking the ppc32 definition of user_mode(), the exact same code is generated for ppc64_defconfig. So, only keep one version of user_mode(), preferably the one not using MSR_PR_LG which should be kept internal to reg.h. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/000a28c51808bbd802b505af42d2cb316c2be7d3.1629216000.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 3e5d470a6155..333979cf5d1e 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -181,11 +181,7 @@ static inline unsigned long frame_pointer(struct pt_regs *regs) return 0; } -#ifdef __powerpc64__ -#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) -#else #define user_mode(regs) (((regs)->msr & MSR_PR) != 0) -#endif #define force_successful_syscall_return() \ do { \ -- cgit From 9401f4e46cf6965e23738f70e149172344a01eef Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Mar 2021 08:48:11 +0000 Subject: powerpc: Use lwarx/ldarx directly instead of PPC_LWARX/LDARX macros Force the eh flag at 0 on PPC32. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1fc81f07cabebb875b963e295408cc3dd38c8d85.1614674882.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/asm-compat.h | 4 ++-- arch/powerpc/include/asm/atomic.h | 4 ++-- arch/powerpc/include/asm/bitops.h | 8 ++++---- arch/powerpc/include/asm/ppc-opcode.h | 2 -- arch/powerpc/include/asm/simple_spinlock.h | 6 +++--- 5 files changed, 11 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 19b70c5b5f18..2b736d9fbb1b 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -17,7 +17,7 @@ #define PPC_LONG stringify_in_c(.8byte) #define PPC_LONG_ALIGN stringify_in_c(.balign 8) #define PPC_TLNEI stringify_in_c(tdnei) -#define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh) +#define PPC_LLARX stringify_in_c(ldarx) #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) #define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS) @@ -50,7 +50,7 @@ #define PPC_LONG stringify_in_c(.long) #define PPC_LONG_ALIGN stringify_in_c(.balign 4) #define PPC_TLNEI stringify_in_c(twnei) -#define PPC_LLARX(t, a, b, eh) PPC_LWARX(t, a, b, eh) +#define PPC_LLARX stringify_in_c(lwarx) #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index a1732a79e92a..6a53ef178bfd 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -207,7 +207,7 @@ arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) int r, o = *old; __asm__ __volatile__ ( -"1:\t" PPC_LWARX(%0,0,%2,1) " # atomic_try_cmpxchg_acquire \n" +"1: lwarx %0,0,%2,%5 # atomic_try_cmpxchg_acquire \n" " cmpw 0,%0,%3 \n" " bne- 2f \n" " stwcx. %4,0,%2 \n" @@ -215,7 +215,7 @@ arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new) "\t" PPC_ACQUIRE_BARRIER " \n" "2: \n" : "=&r" (r), "+m" (v->counter) - : "r" (&v->counter), "r" (o), "r" (new) + : "r" (&v->counter), "r" (o), "r" (new), "i" (IS_ENABLED(CONFIG_PPC64) ? 1 : 0) : "cr0", "memory"); if (unlikely(r != o)) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 299ab33505a6..11847b6a244e 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -70,7 +70,7 @@ static inline void fn(unsigned long mask, \ unsigned long *p = (unsigned long *)_p; \ __asm__ __volatile__ ( \ prefix \ -"1:" PPC_LLARX(%0,0,%3,0) "\n" \ +"1:" PPC_LLARX "%0,0,%3,0\n" \ stringify_in_c(op) "%0,%0,%2\n" \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ @@ -115,13 +115,13 @@ static inline unsigned long fn( \ unsigned long *p = (unsigned long *)_p; \ __asm__ __volatile__ ( \ prefix \ -"1:" PPC_LLARX(%0,0,%3,eh) "\n" \ +"1:" PPC_LLARX "%0,0,%3,%4\n" \ stringify_in_c(op) "%1,%0,%2\n" \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ : "=&r" (old), "=&r" (t) \ - : "r" (mask), "r" (p) \ + : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ : "cc", "memory"); \ return (old & mask); \ } @@ -170,7 +170,7 @@ clear_bit_unlock_return_word(int nr, volatile unsigned long *addr) __asm__ __volatile__ ( PPC_RELEASE_BARRIER -"1:" PPC_LLARX(%0,0,%3,0) "\n" +"1:" PPC_LLARX "%0,0,%3,0\n" "andc %1,%0,%2\n" PPC_STLCX "%1,0,%3\n" "bne- 1b\n" diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index bede76dd3db7..baea657bc868 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -576,8 +576,6 @@ #define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) #define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) -#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh)) -#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh)) #define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) #define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) #define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c)) diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 552f325412cc..8985791a2ba5 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -51,7 +51,7 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) token = LOCK_TOKEN; __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ +"1: lwarx %0,0,%2,1\n\ cmpwi 0,%0,0\n\ bne- 2f\n\ stwcx. %1,0,%2\n\ @@ -179,7 +179,7 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw) long tmp; __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%1,1) "\n" +"1: lwarx %0,0,%1,1\n" __DO_SIGN_EXTEND " addic. %0,%0,1\n\ ble- 2f\n" @@ -203,7 +203,7 @@ static inline long __arch_write_trylock(arch_rwlock_t *rw) token = WRLOCK_TOKEN; __asm__ __volatile__( -"1: " PPC_LWARX(%0,0,%2,1) "\n\ +"1: lwarx %0,0,%2,1\n\ cmpwi 0,%0,0\n\ bne- 2f\n" " stwcx. %1,0,%2\n\ -- cgit From 7f85b04b08ca264923358e2c4e93422bad59a6e0 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 5 Aug 2021 00:29:08 +0800 Subject: riscv: Keep the riscv Kconfig selects sorted Move three Kconfig selects: ARCH_STACKWALK, ARCH_SUPPORTS_ATOMIC_RMW and ARCH_SUPPORTS_DEBUG_PAGEALLOC to the right place. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 387cf68a5d52..a0caf1d4211c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -13,9 +13,6 @@ config 32BIT config RISCV def_bool y select ARCH_CLOCKSOURCE_INIT - select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU - select ARCH_STACKWALK select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU @@ -33,6 +30,9 @@ config RISCV select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + select ARCH_STACKWALK + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_USE_MEMTEST select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU -- cgit From 8341dcfbd8dda98a3b2836a421016f7d88e35b1c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 5 Aug 2021 00:30:59 +0800 Subject: riscv: Enable Undefined Behavior Sanitizer UBSAN Select ARCH_HAS_UBSAN_SANITIZE_ALL in order to allow the user to enable CONFIG_UBSAN_SANITIZE_ALL and instrument the entire kernel for ubsan checks. VDSO is excluded because its build doesn't include the __ubsan_handle_*() functions from lib/ubsan.c, and the VDSO has no sane way to report errors even if it has definitions of these functions. Passed lib/test_ubsan.c test. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/vdso/Makefile | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a0caf1d4211c..e26e255dce3c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -28,6 +28,7 @@ config RISCV select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 24d936c147cd..30a5335bd317 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -36,6 +36,7 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os GCOV_PROFILE := n KCOV_INSTRUMENT := n KASAN_SANITIZE := n +UBSAN_SANITIZE := n # Force dependency $(obj)/vdso.o: $(obj)/vdso.so -- cgit From fde9c59aebafb91caeed816cc510b56f14aa63ae Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 4 Aug 2021 17:32:14 +0000 Subject: riscv: explicitly use symbol offsets for VDSO The current implementation of the `__rt_sigaction` reference computed an absolute offset relative to the mapped base of the VDSO. While this can be handled in the medlow model, the medany model cannot handle this as it is meant to be position independent. The current implementation relied on the BFD linker relaxing the PC-relative relocation into an absolute relocation as it was a near-zero address allowing it to be referenced relative to `zero`. We now extract the offsets and create a generated header allowing the build with LLVM and lld to succeed as we no longer depend on the linker rewriting address references near zero. This change was largely modelled after the ARM64 target which does something similar. Signed-off-by: Saleem Abdulrasool Tested-by: Nathan Chancellor Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 4 ++++ arch/riscv/include/asm/vdso.h | 14 ++------------ arch/riscv/kernel/vdso/Makefile | 25 +++++++++++-------------- arch/riscv/kernel/vdso/gen_vdso_offsets.sh | 5 +++++ arch/riscv/kernel/vdso/so2s.sh | 6 ------ 5 files changed, 22 insertions(+), 32 deletions(-) create mode 100755 arch/riscv/kernel/vdso/gen_vdso_offsets.sh delete mode 100755 arch/riscv/kernel/vdso/so2s.sh (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index bc74afdbf31e..e026b2d0a5a4 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -108,6 +108,10 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h + ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) KBUILD_IMAGE := $(boot)/loader.bin diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 1453a2f563bc..d8d003c2b5a3 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -9,25 +9,15 @@ #define _ASM_RISCV_VDSO_H #include +#include #ifndef CONFIG_GENERIC_TIME_VSYSCALL struct vdso_data { }; #endif -/* - * The VDSO symbols are mapped into Linux so we can just use regular symbol - * addressing to get their offsets in userspace. The symbols are mapped at an - * offset of 0, but since the linker must support setting weak undefined - * symbols to the absolute address 0 it also happens to support other low - * addresses even when the code model suggests those low addresses would not - * otherwise be availiable. - */ #define VDSO_SYMBOL(base, name) \ -({ \ - extern const char __vdso_##name[]; \ - (void __user *)((unsigned long)(base) + __vdso_##name); \ -}) + (void __user *)((unsigned long)(base) + __vdso_##name##_offset) asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 30a5335bd317..f2e065671e4d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -23,10 +23,10 @@ ifneq ($(c-gettimeofday-y),) endif # Build rules -targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -obj-y += vdso.o vdso-syms.o +obj-y += vdso.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Disable -pg to prevent insert call site @@ -44,20 +44,22 @@ $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) -LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \ +LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \ --build-id=sha1 --hash-style=both --eh-frame-hdr -# We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld --just-symbols we can then -# refer to these symbols in the kernel code rather than hand-coded addresses. -$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE - $(call if_changed,so2s) - # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + # actual build commands # The DSO images are built using a special linker script # Make sure only to export the intended __vdso_xxx symbol offsets. @@ -66,11 +68,6 @@ quiet_cmd_vdsold = VDSOLD $@ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ rm $@.tmp -# Extracts symbol offsets from the VDSO, converting them into an assembly file -# that contains the same symbols at the same offsets. -quiet_cmd_so2s = SO2S $@ - cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@ - # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh new file mode 100755 index 000000000000..c2e5613f3495 --- /dev/null +++ b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +LC_ALL=C +sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p' diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh deleted file mode 100755 index e64cb6d9440e..000000000000 --- a/arch/riscv/kernel/vdso/so2s.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0+ -# Copyright 2020 Palmer Dabbelt - -sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \ -| grep '^\.' -- cgit From fd42b7b09c602c904452c0c3e5955ca21d8e387a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:35 +1000 Subject: KVM: PPC: Book3S HV: Initialise vcpu MSR with MSR_ME It is possible to create a VCPU without setting the MSR before running it, which results in a warning in kvmhv_vcpu_entry_p9() that MSR_ME is not set. This is pretty harmless because the MSR_ME bit is added to HSRR1 before HRFID to guest, and a normal qemu guest doesn't hit it. Initialise the vcpu MSR with MSR_ME set. Reported-by: Alexey Kardashevskiy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-2-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 085fb8ecbf68..c402eb0276b0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2684,6 +2684,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.vpa_update_lock); spin_lock_init(&vcpu->arch.tbacct_lock); vcpu->arch.busy_preempt = TB_NIL; + vcpu->arch.shregs.msr = MSR_ME; vcpu->arch.intr_msr = MSR_SF | MSR_ME; /* -- cgit From daac40e8d7a63ab8608132a7cfce411986feac32 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:36 +1000 Subject: KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path TM fake-suspend emulation is only used by POWER9. Remove it from the old code path. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-3-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 42 --------------------------------- 1 file changed, 42 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 8dd437d7a2c6..75079397c2a5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE beq kvmppc_hisi -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* For softpatch interrupt, go off and do TM instruction emulation */ - cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH - beq kvmppc_tm_emul -#endif - /* See if this is a leftover HDEC interrupt */ cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER bne 2f @@ -1599,42 +1593,6 @@ maybe_reenter_guest: blt deliver_guest_interrupt b guest_exit_cont -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* - * Softpatch interrupt for transactional memory emulation cases - * on POWER9 DD2.2. This is early in the guest exit path - we - * haven't saved registers or done a treclaim yet. - */ -kvmppc_tm_emul: - /* Save instruction image in HEIR */ - mfspr r3, SPRN_HEIR - stw r3, VCPU_HEIR(r9) - - /* - * The cases we want to handle here are those where the guest - * is in real suspend mode and is trying to transition to - * transactional mode. - */ - lbz r0, HSTATE_FAKE_SUSPEND(r13) - cmpwi r0, 0 /* keep exiting guest if in fake suspend */ - bne guest_exit_cont - rldicl r3, r11, 64 - MSR_TS_S_LG, 62 - cmpwi r3, 1 /* or if not in suspend state */ - bne guest_exit_cont - - /* Call C code to do the emulation */ - mr r3, r9 - bl kvmhv_p9_tm_emulation_early - nop - ld r9, HSTATE_KVM_VCPU(r13) - li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH - cmpwi r3, 0 - beq guest_exit_cont /* continue exiting if not handled */ - ld r10, VCPU_PC(r9) - ld r11, VCPU_MSR(r9) - b fast_interrupt_c_return /* go back to guest if handled */ -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing -- cgit From 4782e0cd0d184d727ad3b0cfe20d1d44d9f98239 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:37 +1000 Subject: KVM: PPC: Book3S HV P9: Fixes for TM softpatch interrupt NIP The softpatch interrupt sets HSRR0 to the faulting instruction +4, so it should subtract 4 for the faulting instruction address in the case it is a TM softpatch interrupt (the instruction was not executed) and it was not emulated. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-4-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_tm.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index cc90b8b82329..e7c36f8bf205 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -46,6 +46,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) u64 newmsr, bescr; int ra, rs; + /* + * The TM softpatch interrupt sets NIP to the instruction following + * the faulting instruction, which is not executed. Rewind nip to the + * faulting instruction so it looks like a normal synchronous + * interrupt, then update nip in the places where the instruction is + * emulated. + */ + vcpu->arch.regs.nip -= 4; + /* * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit * in these instructions, so masking bit 31 out doesn't change these @@ -67,7 +76,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) (newmsr & MSR_TM))); newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; - vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.cfar = vcpu->arch.regs.nip; vcpu->arch.regs.nip = vcpu->arch.shregs.srr0; return RESUME_GUEST; @@ -100,7 +109,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.bescr = bescr; msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; vcpu->arch.shregs.msr = msr; - vcpu->arch.cfar = vcpu->arch.regs.nip - 4; + vcpu->arch.cfar = vcpu->arch.regs.nip; vcpu->arch.regs.nip = vcpu->arch.ebbrr; return RESUME_GUEST; @@ -116,6 +125,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); newmsr = sanitize_msr(newmsr); vcpu->arch.shregs.msr = newmsr; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; /* ignore bit 31, see comment above */ @@ -152,6 +162,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) msr = (msr & ~MSR_TS_MASK) | MSR_TS_S; } vcpu->arch.shregs.msr = msr; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; /* ignore bit 31, see comment above */ @@ -189,6 +200,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr &= ~MSR_TS_MASK; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; /* ignore bit 31, see comment above */ @@ -220,6 +232,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr = msr | MSR_TS_S; + vcpu->arch.regs.nip += 4; return RESUME_GUEST; } -- cgit From d82b392d9b3556b63e3f9916cf057ea847e173a9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:38 +1000 Subject: KVM: PPC: Book3S HV Nested: Fix TM softpatch HFAC interrupt emulation Have the TM softpatch emulation code set up the HFAC interrupt and return -1 in case an instruction was executed with HFSCR bits clear, and have the interrupt exit handler fall through to the HFAC handler. When the L0 is running a nested guest, this ensures the HFAC interrupt is correctly passed up to the L1. The "direct guest" exit handler will turn these into PROGILL program interrupts so functionality in practice will be unchanged. But it's possible an L1 would want to handle these in a different way. Also rearrange the FAC interrupt emulation code to match the HFAC format while here (mainly, adding the FSCR_INTR_CAUSE mask). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-5-npiggin@gmail.com --- arch/powerpc/include/asm/reg.h | 3 ++- arch/powerpc/kvm/book3s_hv.c | 35 ++++++++++++++++++++------------ arch/powerpc/kvm/book3s_hv_tm.c | 44 ++++++++++++++++++++++------------------- 3 files changed, 48 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index be85cf156a1f..e9d27265253b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -415,6 +415,7 @@ #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) +#define FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */ #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ #define HFSCR_PREFIX __MASK(FSCR_PREFIX_LG) #define HFSCR_MSGP __MASK(FSCR_MSGP_LG) @@ -426,7 +427,7 @@ #define HFSCR_DSCR __MASK(FSCR_DSCR_LG) #define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG) #define HFSCR_FP __MASK(FSCR_FP_LG) -#define HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */ +#define HFSCR_INTR_CAUSE FSCR_INTR_CAUSE #define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ #define LPCR_VPM0 ASM_CONST(0x8000000000000000) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c402eb0276b0..e7df8a3ca62c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1679,6 +1679,21 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, r = RESUME_GUEST; } break; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + case BOOK3S_INTERRUPT_HV_SOFTPATCH: + /* + * This occurs for various TM-related instructions that + * we need to emulate on POWER9 DD2.2. We have already + * handled the cases where the guest was in real-suspend + * mode and was transitioning to transactional state. + */ + r = kvmhv_p9_tm_emulation(vcpu); + if (r != -1) + break; + fallthrough; /* go to facility unavailable handler */ +#endif + /* * This occurs if the guest (kernel or userspace), does something that * is prohibited by HFSCR. @@ -1697,18 +1712,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, } break; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - case BOOK3S_INTERRUPT_HV_SOFTPATCH: - /* - * This occurs for various TM-related instructions that - * we need to emulate on POWER9 DD2.2. We have already - * handled the cases where the guest was in real-suspend - * mode and was transitioning to transactional state. - */ - r = kvmhv_p9_tm_emulation(vcpu); - break; -#endif - case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; break; @@ -1811,9 +1814,15 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * mode and was transitioning to transactional state. */ r = kvmhv_p9_tm_emulation(vcpu); - break; + if (r != -1) + break; + fallthrough; /* go to facility unavailable handler */ #endif + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + r = RESUME_HOST; + break; + case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; r = RESUME_GUEST; diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index e7c36f8bf205..866cadd70094 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -88,14 +88,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* check EBB facility is available */ if (!(vcpu->arch.hfscr & HFSCR_EBB)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_EBB_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; } @@ -138,14 +139,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if (!(msr & MSR_TM)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_TM_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; @@ -169,14 +171,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK): /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if (!(msr & MSR_TM)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_TM_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; @@ -208,14 +211,15 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) /* XXX do we need to check for PR=0 here? */ /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { - /* generate an illegal instruction interrupt */ - kvmppc_core_queue_program(vcpu, SRR1_PROGILL); - return RESUME_GUEST; + vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE; + vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56; + vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL; + return -1; /* rerun host interrupt handler */ } if (!(msr & MSR_TM)) { /* generate a facility unavailable interrupt */ - vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | - ((u64)FSCR_TM_LG << 56); + vcpu->arch.fscr &= ~FSCR_INTR_CAUSE; + vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); return RESUME_GUEST; -- cgit From 7487cabc7ed2f7716bf304e4e97c57fd995cf70e Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 12 Aug 2021 02:00:39 +1000 Subject: KVM: PPC: Book3S HV Nested: Sanitise vcpu registers As one of the arguments of the H_ENTER_NESTED hypercall, the nested hypervisor (L1) prepares a structure containing the values of various hypervisor-privileged registers with which it wants the nested guest (L2) to run. Since the nested HV runs in supervisor mode it needs the host to write to these registers. To stop a nested HV manipulating this mechanism and using a nested guest as a proxy to access a facility that has been made unavailable to it, we have a routine that sanitises the values of the HV registers before copying them into the nested guest's vcpu struct. However, when coming out of the guest the values are copied as they were back into L1 memory, which means that any sanitisation we did during guest entry will be exposed to L1 after H_ENTER_NESTED returns. This patch alters this sanitisation to have effect on the vcpu->arch registers directly before entering and after exiting the guest, leaving the structure that is copied back into L1 unchanged (except when we really want L1 to access the value, e.g the Cause bits of HFSCR). Signed-off-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin Link: https://lore.kernel.org/r/20210811160134.904987-6-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_nested.c | 94 ++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 898f942eb198..1eb4e989edc7 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -105,7 +105,6 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, struct kvmppc_vcore *vc = vcpu->arch.vcore; hr->dpdes = vc->dpdes; - hr->hfscr = vcpu->arch.hfscr; hr->purr = vcpu->arch.purr; hr->spurr = vcpu->arch.spurr; hr->ic = vcpu->arch.ic; @@ -128,55 +127,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, case BOOK3S_INTERRUPT_H_INST_STORAGE: hr->asdr = vcpu->arch.fault_gpa; break; + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) | + (HFSCR_INTR_CAUSE & vcpu->arch.hfscr)); + break; case BOOK3S_INTERRUPT_H_EMUL_ASSIST: hr->heir = vcpu->arch.emul_inst; break; } } -/* - * This can result in some L0 HV register state being leaked to an L1 - * hypervisor when the hv_guest_state is copied back to the guest after - * being modified here. - * - * There is no known problem with such a leak, and in many cases these - * register settings could be derived by the guest by observing behaviour - * and timing, interrupts, etc., but it is an issue to consider. - */ -static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) -{ - struct kvmppc_vcore *vc = vcpu->arch.vcore; - u64 mask; - - /* - * Don't let L1 change LPCR bits for the L2 except these: - */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; - - /* - * Additional filtering is required depending on hardware - * and configuration. - */ - hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, - (vc->lpcr & ~mask) | (hr->lpcr & mask)); - - /* - * Don't let L1 enable features for L2 which we've disabled for L1, - * but preserve the interrupt cause field. - */ - hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr); - - /* Don't let data address watchpoint match in hypervisor state */ - hr->dawrx0 &= ~DAWRX_HYP; - hr->dawrx1 &= ~DAWRX_HYP; - - /* Don't let completed instruction address breakpt match in HV state */ - if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) - hr->ciabr &= ~CIABR_PRIV; -} - -static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) +static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -288,6 +249,43 @@ static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu, sizeof(struct pt_regs)); } +static void load_l2_hv_regs(struct kvm_vcpu *vcpu, + const struct hv_guest_state *l2_hv, + const struct hv_guest_state *l1_hv, u64 *lpcr) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + u64 mask; + + restore_hv_regs(vcpu, l2_hv); + + /* + * Don't let L1 change LPCR bits for the L2 except these: + */ + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | + LPCR_LPES | LPCR_MER; + + /* + * Additional filtering is required depending on hardware + * and configuration. + */ + *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, + (vc->lpcr & ~mask) | (*lpcr & mask)); + + /* + * Don't let L1 enable features for L2 which we've disabled for L1, + * but preserve the interrupt cause field. + */ + vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr); + + /* Don't let data address watchpoint match in hypervisor state */ + vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP; + vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP; + + /* Don't let completed instruction address breakpt match in HV state */ + if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) + vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV; +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; @@ -296,7 +294,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) struct hv_guest_state l2_hv = {0}, saved_l1_hv; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 hv_ptr, regs_ptr; - u64 hdec_exp; + u64 hdec_exp, lpcr; s64 delta_purr, delta_spurr, delta_ic, delta_vtb; if (vcpu->kvm->arch.l1_ptcr == 0) @@ -369,8 +367,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* Guest must always run with ME enabled, HV disabled. */ vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; - sanitise_hv_regs(vcpu, &l2_hv); - restore_hv_regs(vcpu, &l2_hv); + lpcr = l2_hv.lpcr; + load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr); vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -380,7 +378,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) r = RESUME_HOST; break; } - r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr); + r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ -- cgit From 8b210a880b35ba75eb42b79dfd65e369c1feb119 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:40 +1000 Subject: KVM: PPC: Book3S HV Nested: Make nested HFSCR state accessible When the L0 runs a nested L2, there are several permutations of HFSCR that can be relevant. The HFSCR that the L1 vcpu L1 requested, the HFSCR that the L1 vcpu may use, and the HFSCR that is actually being used to run the L2. The L1 requested HFSCR is not accessible outside the nested hcall handler, so copy that into a new kvm_nested_guest.hfscr field. The permitted HFSCR is taken from the HFSCR that the L1 runs with, which is also not accessible while the hcall is being made. Move this into a new kvm_vcpu_arch.hfscr_permitted field. These will be used by the next patch to improve facility handling for nested guests, and later by facility demand faulting patches. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-7-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 1 + arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/kvm/book3s_hv.c | 2 ++ arch/powerpc/kvm/book3s_hv_nested.c | 5 +++-- 4 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index eaf3a562bf1e..19b6942c6969 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -39,6 +39,7 @@ struct kvm_nested_guest { pgd_t *shadow_pgtable; /* our page table for this guest */ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ u64 process_table; /* process table entry for this guest */ + u64 hfscr; /* HFSCR that the L1 requested for this nested guest */ long refcnt; /* number of pointers to this struct */ struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9f52f282b1aa..a779f7849cfb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -811,6 +811,8 @@ struct kvm_vcpu_arch { u32 online; + u64 hfscr_permitted; /* A mask of permitted HFSCR facilities */ + /* For support of nested guests */ struct kvm_nested_guest *nested; u32 nested_vcpu_id; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e7df8a3ca62c..9d31267d26b3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2715,6 +2715,8 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; + vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; + kvmppc_mmu_book3s_hv_init(vcpu); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 1eb4e989edc7..5ad5014c6f68 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -272,10 +272,10 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu, (vc->lpcr & ~mask) | (*lpcr & mask)); /* - * Don't let L1 enable features for L2 which we've disabled for L1, + * Don't let L1 enable features for L2 which we don't allow for L1, * but preserve the interrupt cause field. */ - vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr); + vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted); /* Don't let data address watchpoint match in hypervisor state */ vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP; @@ -362,6 +362,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* set L1 state to L2 state */ vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; + l2->hfscr = l2_hv.hfscr; vcpu->arch.regs = l2_regs; /* Guest must always run with ME enabled, HV disabled. */ -- cgit From 7c3ded5735141ff4d049747c9f76672a8b737c49 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 12 Aug 2021 02:00:41 +1000 Subject: KVM: PPC: Book3S HV Nested: Stop forwarding all HFUs to L1 If the nested hypervisor has no access to a facility because it has been disabled by the host, it should also not be able to see the Hypervisor Facility Unavailable that arises from one of its guests trying to access the facility. This patch turns a HFU that happened in L2 into a Hypervisor Emulation Assistance interrupt and forwards it to L1 for handling. The ones that happened because L1 explicitly disabled the facility for L2 are still let through, along with the corresponding Cause bits in the HFSCR. Signed-off-by: Fabiano Rosas [np: move handling into kvmppc_handle_nested_exit] Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-8-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9d31267d26b3..15e2ba93678e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1730,6 +1730,7 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { + struct kvm_nested_guest *nested = vcpu->arch.nested; int r; int srcu_idx; @@ -1819,9 +1820,35 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) fallthrough; /* go to facility unavailable handler */ #endif - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: - r = RESUME_HOST; + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { + u64 cause = vcpu->arch.hfscr >> 56; + + /* + * Only pass HFU interrupts to the L1 if the facility is + * permitted but disabled by the L1's HFSCR, otherwise + * the interrupt does not make sense to the L1 so turn + * it into a HEAI. + */ + if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || + (nested->hfscr & (1UL << cause))) { + vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; + + /* + * If the fetch failed, return to guest and + * try executing it again. + */ + r = kvmppc_get_last_inst(vcpu, INST_GENERIC, + &vcpu->arch.emul_inst); + if (r != EMULATE_DONE) + r = RESUME_GUEST; + else + r = RESUME_HOST; + } else { + r = RESUME_HOST; + } + break; + } case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; -- cgit From f2e29db156523bf08a0524e0f4306a641912c6d9 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Thu, 12 Aug 2021 02:00:42 +1000 Subject: KVM: PPC: Book3S HV Nested: save_hv_return_state does not require trap argument vcpu is already anargument so vcpu->arch.trap can be used directly. Signed-off-by: Fabiano Rosas Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210811160134.904987-9-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv_nested.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 5ad5014c6f68..ed8a2c9f5629 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -99,7 +99,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr) hr->dawrx1 = swab64(hr->dawrx1); } -static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, +static void save_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -118,7 +118,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, hr->pidr = vcpu->arch.pid; hr->cfar = vcpu->arch.cfar; hr->ppr = vcpu->arch.ppr; - switch (trap) { + switch (vcpu->arch.trap) { case BOOK3S_INTERRUPT_H_DATA_STORAGE: hr->hdar = vcpu->arch.fault_dar; hr->hdsisr = vcpu->arch.fault_dsisr; @@ -389,7 +389,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) delta_spurr = vcpu->arch.spurr - l2_hv.spurr; delta_ic = vcpu->arch.ic - l2_hv.ic; delta_vtb = vc->vtb - l2_hv.vtb; - save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv); + save_hv_return_state(vcpu, &l2_hv); /* restore L1 state */ vcpu->arch.nested = NULL; -- cgit From 1782663897945a5cf28e564ba5eed730098e9aa4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:43 +1000 Subject: KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest SPRs are live After the L1 saves its PMU SPRs but before loading the L2's PMU SPRs, switch the pmcregs_in_use field in the L1 lppaca to the value advertised by the L2 in its VPA. On the way out of the L2, set it back after saving the L2 PMU registers (if they were in-use). This transfers the PMU liveness indication between the L1 and L2 at the points where the registers are not live. This fixes the nested HV bug for which a workaround was added to the L0 HV by commit 63279eeb7f93a ("KVM: PPC: Book3S HV: Always save guest pmu for guest capable of nesting"), which explains the problem in detail. That workaround is no longer required for guests that include this bug fix. Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20210811160134.904987-10-npiggin@gmail.com --- arch/powerpc/include/asm/pmc.h | 7 +++++++ arch/powerpc/kvm/book3s_hv.c | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index c6bbe9778d3c..3c09109e708e 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static inline int ppc_get_pmu_inuse(void) +{ + return get_paca()->pmcregs_in_use; +} +#endif + extern void power4_enable_pmcs(void); #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 15e2ba93678e..938b0948478f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -3891,6 +3892,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + if (vcpu->arch.vpa.pinned_addr) { + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; + } else { + get_lppaca()->pmcregs_in_use = 1; + } + barrier(); + } +#endif kvmhv_load_guest_pmu(vcpu); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); @@ -4025,6 +4038,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif vc->entry_exit_map = 0x101; vc->in_guest = 0; -- cgit From 0c8fb653d487d2873f5eefdcaf4cecff4e103828 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Aug 2021 02:00:44 +1000 Subject: powerpc/64s: Remove WORT SPR from POWER9/10 This register is not architected and not implemented in POWER9 or 10, it just reads back zeroes for compatibility. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20210811160134.904987-11-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 3 --- arch/powerpc/platforms/powernv/idle.c | 2 -- 2 files changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 938b0948478f..dc8f18b0aa75 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3767,7 +3767,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu) mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); mtspr(SPRN_BESCR, vcpu->arch.bescr); - mtspr(SPRN_WORT, vcpu->arch.wort); mtspr(SPRN_TIDR, vcpu->arch.tid); mtspr(SPRN_AMR, vcpu->arch.amr); mtspr(SPRN_UAMOR, vcpu->arch.uamor); @@ -3794,7 +3793,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu) vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); vcpu->arch.bescr = mfspr(SPRN_BESCR); - vcpu->arch.wort = mfspr(SPRN_WORT); vcpu->arch.tid = mfspr(SPRN_TIDR); vcpu->arch.amr = mfspr(SPRN_AMR); vcpu->arch.uamor = mfspr(SPRN_UAMOR); @@ -3826,7 +3824,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs) { mtspr(SPRN_PSPB, 0); - mtspr(SPRN_WORT, 0); mtspr(SPRN_UAMOR, 0); mtspr(SPRN_DSCR, host_os_sprs->dscr); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 528a7e0cf83a..180baecad914 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr) sprs.purr = mfspr(SPRN_PURR); sprs.spurr = mfspr(SPRN_SPURR); sprs.dscr = mfspr(SPRN_DSCR); - sprs.wort = mfspr(SPRN_WORT); sprs.ciabr = mfspr(SPRN_CIABR); sprs.mmcra = mfspr(SPRN_MMCRA); @@ -785,7 +784,6 @@ core_woken: mtspr(SPRN_PURR, sprs.purr); mtspr(SPRN_SPURR, sprs.spurr); mtspr(SPRN_DSCR, sprs.dscr); - mtspr(SPRN_WORT, sprs.wort); mtspr(SPRN_CIABR, sprs.ciabr); mtspr(SPRN_MMCRA, sprs.mmcra); -- cgit From 315511166469a641c1b838eaca6bdd3af5c362c4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:45:18 +0900 Subject: microblaze: move core-y in arch/microblaze/Makefile to arch/microblaze/Kbuild Use obj-y to clean up Makefile. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210811164518.187497-1-masahiroy@kernel.org Signed-off-by: Michal Simek --- arch/microblaze/Kbuild | 4 ++++ arch/microblaze/Makefile | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/Kbuild b/arch/microblaze/Kbuild index a4e40e534e6a..a1c597889319 100644 --- a/arch/microblaze/Kbuild +++ b/arch/microblaze/Kbuild @@ -1 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ +obj-y += mm/ +obj-$(CONFIG_PCI) += pci/ +obj-y += boot/dts/ diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 6d4af39e3890..9adc6b6434df 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -50,17 +50,12 @@ KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2) head-y := arch/microblaze/kernel/head.o libs-y += arch/microblaze/lib/ -core-y += arch/microblaze/kernel/ -core-y += arch/microblaze/mm/ -core-$(CONFIG_PCI) += arch/microblaze/pci/ boot := arch/microblaze/boot # Are we making a simpleImage. target? If so, crack out the boardname DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS))) -core-y += $(boot)/dts/ - export DTB all: linux.bin -- cgit From b1643084d164cea0c107a39bcdf0119fc52619af Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 18 Aug 2021 22:45:54 +0530 Subject: powerpc/perf: Use stack siar instead of mfspr Minor optimization in the 'perf_instruction_pointer' function code by making use of stack siar instead of mfspr. Fixes: 75382aa72f06 ("powerpc/perf: Move code to select SIAR or pt_regs into perf_read_regs") Signed-off-by: Kajol Jain Tested-by: Nageswara R Sastry Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210818171556.36912-1-kjain@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 91203ed9d0ff..3a782a35100d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2269,7 +2269,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) else return regs->nip; } else if (use_siar && siar_valid(regs)) - return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + return siar + perf_ip_adjust(regs); else if (use_siar) return 0; // no valid instruction pointer else -- cgit From cc90c6742ef5b438f4cb86029d7a794bd0a44a06 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 18 Aug 2021 22:45:55 +0530 Subject: powerpc/perf: Drop the case of returning 0 as instruction pointer Drop the case of returning 0 as instruction pointer since kernel never executes at 0 and userspace almost never does either. Fixes: e6878835ac47 ("powerpc/perf: Sample only if SIAR-Valid bit is set in P7+") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210818171556.36912-2-kjain@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3a782a35100d..9bb466d2d99e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2270,8 +2270,6 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) return regs->nip; } else if (use_siar && siar_valid(regs)) return siar + perf_ip_adjust(regs); - else if (use_siar) - return 0; // no valid instruction pointer else return regs->nip; } -- cgit From 3c69a5f22223fa3e312689ec218b5059784d49d7 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 18 Aug 2021 22:45:56 +0530 Subject: powerpc/perf: Fix the check for SIAR value Incase of random sampling, there can be scenarios where Sample Instruction Address Register(SIAR) may not latch to the sampled instruction and could result in the value of 0. In these scenarios it is preferred to return regs->nip. These corner cases are seen in the previous generation (p9) also. Patch adds the check for SIAR value along with regs_use_siar and siar_valid checks so that the function will return regs->nip incase SIAR is zero. Patch drops the code under PPMU_P10_DD1 flag check which handles SIAR 0 case only for Power10 DD1. Fixes: 2ca13a4cc56c9 ("powerpc/perf: Use regs->nip when SIAR is zero") Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210818171556.36912-3-kjain@linux.ibm.com --- arch/powerpc/perf/core-book3s.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 9bb466d2d99e..73e62e9b179b 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2260,15 +2260,9 @@ unsigned long perf_misc_flags(struct pt_regs *regs) */ unsigned long perf_instruction_pointer(struct pt_regs *regs) { - bool use_siar = regs_use_siar(regs); unsigned long siar = mfspr(SPRN_SIAR); - if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { - if (siar) - return siar; - else - return regs->nip; - } else if (use_siar && siar_valid(regs)) + if (regs_use_siar(regs) && siar_valid(regs) && siar) return siar + perf_ip_adjust(regs); else return regs->nip; -- cgit From 2cc1121bc993ca3090cc4267bc38d3da61b68602 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 1 Oct 2020 19:39:15 -0700 Subject: ARC: mm: disintegrate mmu.h (arcv2 bits out) non functional change Tested-by: kernel test robot Signed-off-by: Vineet Gupta --- arch/arc/include/asm/mmu-arcv2.h | 103 +++++++++++++++++++++++++++++++++++++++ arch/arc/include/asm/mmu.h | 80 +----------------------------- arch/arc/include/asm/pgtable.h | 6 --- 3 files changed, 105 insertions(+), 84 deletions(-) create mode 100644 arch/arc/include/asm/mmu-arcv2.h (limited to 'arch') diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h new file mode 100644 index 000000000000..ed9036d4ede3 --- /dev/null +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com) + * + * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed. + * This file contains the TLB access registers and commands + */ + +#ifndef _ASM_ARC_MMU_ARCV2_H +#define _ASM_ARC_MMU_ARCV2_H + +/* + * TLB Management regs + */ +#define ARC_REG_MMU_BCR 0x06f + +#ifdef CONFIG_ARC_MMU_V3 +#define ARC_REG_TLBPD0 0x405 +#define ARC_REG_TLBPD1 0x406 +#define ARC_REG_TLBPD1HI 0 /* Dummy: allows common code */ +#define ARC_REG_TLBINDEX 0x407 +#define ARC_REG_TLBCOMMAND 0x408 +#define ARC_REG_PID 0x409 +#define ARC_REG_SCRATCH_DATA0 0x418 +#else +#define ARC_REG_TLBPD0 0x460 +#define ARC_REG_TLBPD1 0x461 +#define ARC_REG_TLBPD1HI 0x463 +#define ARC_REG_TLBINDEX 0x464 +#define ARC_REG_TLBCOMMAND 0x465 +#define ARC_REG_PID 0x468 +#define ARC_REG_SCRATCH_DATA0 0x46c +#endif + +/* Bits in MMU PID reg */ +#define __TLB_ENABLE (1 << 31) +#define __PROG_ENABLE (1 << 30) +#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) + +/* Bits in TLB Index reg */ +#define TLB_LKUP_ERR 0x80000000 + +#ifdef CONFIG_ARC_MMU_V3 +#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) +#else +#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) +#endif + +/* + * TLB Commands + */ +#define TLBWrite 0x1 +#define TLBRead 0x2 +#define TLBGetIndex 0x3 +#define TLBProbe 0x4 +#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ +#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ + +#ifdef CONFIG_ARC_MMU_V4 +#define TLBInsertEntry 0x7 +#define TLBDeleteEntry 0x8 +#endif + +/* Masks for actual TLB "PD"s */ +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) +#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) + +#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) + +#ifndef __ASSEMBLY__ + +struct mm_struct; +extern int pae40_exist_but_not_enab(void); + +static inline int is_pae40_enabled(void) +{ + return IS_ENABLED(CONFIG_ARC_HAS_PAE40); +} + +static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid) +{ + write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE); +} + +static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) +{ + /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ +#ifdef CONFIG_ISA_ARCV2 + write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd); +#endif +} + +#else + +.macro ARC_MMU_REENABLE reg + lr \reg, [ARC_REG_PID] + or \reg, \reg, MMU_ENABLE + sr \reg, [ARC_REG_PID] +.endm + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index e66e1e417694..ca427c30f70e 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -7,91 +7,15 @@ #define _ASM_ARC_MMU_H #ifndef __ASSEMBLY__ -#include /* NR_CPUS */ -#endif - -/* MMU Management regs */ -#define ARC_REG_MMU_BCR 0x06f - -#ifdef CONFIG_ARC_MMU_V3 -#define ARC_REG_TLBPD0 0x405 -#define ARC_REG_TLBPD1 0x406 -#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */ -#define ARC_REG_TLBINDEX 0x407 -#define ARC_REG_TLBCOMMAND 0x408 -#define ARC_REG_PID 0x409 -#define ARC_REG_SCRATCH_DATA0 0x418 -#else -#define ARC_REG_TLBPD0 0x460 -#define ARC_REG_TLBPD1 0x461 -#define ARC_REG_TLBPD1HI 0x463 -#define ARC_REG_TLBINDEX 0x464 -#define ARC_REG_TLBCOMMAND 0x465 -#define ARC_REG_PID 0x468 -#define ARC_REG_SCRATCH_DATA0 0x46c -#endif - -/* Bits in MMU PID register */ -#define __TLB_ENABLE (1 << 31) -#define __PROG_ENABLE (1 << 30) -#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) - -/* Error code if probe fails */ -#define TLB_LKUP_ERR 0x80000000 - -#ifdef CONFIG_ARC_MMU_V3 -#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) -#else -#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) -#endif - -/* TLB Commands */ -#define TLBWrite 0x1 -#define TLBRead 0x2 -#define TLBGetIndex 0x3 -#define TLBProbe 0x4 -#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ -#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ -#ifdef CONFIG_ARC_MMU_V4 -#define TLBInsertEntry 0x7 -#define TLBDeleteEntry 0x8 -#endif - -#ifndef __ASSEMBLY__ +#include /* NR_CPUS */ typedef struct { unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ } mm_context_t; -static inline void mmu_setup_asid(struct mm_struct *mm, unsigned int asid) -{ - write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE); -} - -static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) -{ - /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ -#ifdef CONFIG_ISA_ARCV2 - write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd); #endif -} - -static inline int is_pae40_enabled(void) -{ - return IS_ENABLED(CONFIG_ARC_HAS_PAE40); -} - -extern int pae40_exist_but_not_enab(void); - -#else - -.macro ARC_MMU_REENABLE reg - lr \reg, [ARC_REG_PID] - or \reg, \reg, MMU_ENABLE - sr \reg, [ARC_REG_PID] -.endm -#endif /* !__ASSEMBLY__ */ +#include #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index f762bacb2358..de4576e8d17a 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -103,12 +103,6 @@ */ #define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) -/* Masks for actual TLB "PD"s */ -#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) -#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) - -#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) - /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) * -- cgit From fe6cb7b043b69cd9498616592bb9e28648fb4f7a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 13 Sep 2019 15:25:52 -0700 Subject: ARC: mm: disintegrate pgtable.h into levels and flags - pgtable-bits-arcv2.h (MMU specific page table flags) - pgtable-levels.h (paging levels) No functional changes, but paves way for easy addition of new MMU code with different bits and levels etc Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable-bits-arcv2.h | 149 ++++++++++++++++ arch/arc/include/asm/pgtable-levels.h | 95 ++++++++++ arch/arc/include/asm/pgtable.h | 277 +----------------------------- 3 files changed, 248 insertions(+), 273 deletions(-) create mode 100644 arch/arc/include/asm/pgtable-bits-arcv2.h create mode 100644 arch/arc/include/asm/pgtable-levels.h (limited to 'arch') diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h new file mode 100644 index 000000000000..183d23bc1e00 --- /dev/null +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + */ + +/* + * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS) + * There correspond to the corresponding bits in the TLB + */ + +#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H +#define _ASM_ARC_PGTABLE_BITS_ARCV2_H + +#ifdef CONFIG_ARC_CACHE_PAGES +#define _PAGE_CACHEABLE (1 << 0) /* Cached (H) */ +#else +#define _PAGE_CACHEABLE 0 +#endif + +#define _PAGE_EXECUTE (1 << 1) /* User Execute (H) */ +#define _PAGE_WRITE (1 << 2) /* User Write (H) */ +#define _PAGE_READ (1 << 3) /* User Read (H) */ +#define _PAGE_ACCESSED (1 << 4) /* Accessed (s) */ +#define _PAGE_DIRTY (1 << 5) /* Modified (s) */ +#define _PAGE_SPECIAL (1 << 6) +#define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */ +#define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */ + +#ifdef CONFIG_ARC_MMU_V4 +#define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */ +#else +#define _PAGE_HW_SZ 0 +#endif + +/* Defaults for every user page */ +#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) + +/* Set of bits not changed in pte_modify */ +#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) + +/* More Abbrevaited helpers */ +#define PAGE_U_NONE __pgprot(___DEF) +#define PAGE_U_R __pgprot(___DEF | _PAGE_READ) +#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE) +#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE) +#define PAGE_U_X_W_R __pgprot(___DEF \ + | _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE) +#define PAGE_KERNEL __pgprot(___DEF | _PAGE_GLOBAL \ + | _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE) + +#define PAGE_SHARED PAGE_U_W_R + +#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)) + +/* + * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) + * + * Certain cases have 1:1 mapping + * e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED + * which directly corresponds to PAGE_U_X_R + * + * Other rules which cause the divergence from 1:1 mapping + * + * 1. Although ARC700 can do exclusive execute/write protection (meaning R + * can be tracked independet of X/W unlike some other CPUs), still to + * keep things consistent with other archs: + * -Write implies Read: W => R + * -Execute implies Read: X => R + * + * 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W + * This is to enable COW mechanism + */ + /* xwr */ +#define __P000 PAGE_U_NONE +#define __P001 PAGE_U_R +#define __P010 PAGE_U_R /* Pvt-W => !W */ +#define __P011 PAGE_U_R /* Pvt-W => !W */ +#define __P100 PAGE_U_X_R /* X => R */ +#define __P101 PAGE_U_X_R +#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */ +#define __P111 PAGE_U_X_R /* Pvt-W => !W */ + +#define __S000 PAGE_U_NONE +#define __S001 PAGE_U_R +#define __S010 PAGE_U_W_R /* W => R */ +#define __S011 PAGE_U_W_R +#define __S100 PAGE_U_X_R /* X => R */ +#define __S101 PAGE_U_X_R +#define __S110 PAGE_U_X_W_R /* X => R */ +#define __S111 PAGE_U_X_W_R + +#ifndef __ASSEMBLY__ + +#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) +#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) +#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) +#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) + +#define PTE_BIT_FUNC(fn, op) \ + static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } + +PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); +PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); +PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); +PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); +PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); +PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); +PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); +PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); +PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + set_pte(ptep, pteval); +} + +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep); + +/* Encode swap {type,off} tuple into PTE + * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that + * PAGE_PRESENT is zero in a PTE holding swap "identifier" + */ +#define __swp_entry(type, off) ((swp_entry_t) \ + { ((type) & 0x1f) | ((off) << 13) }) + +/* Decode a PTE containing swap "identifier "into constituents */ +#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) +#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define kern_addr_valid(addr) (1) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include +#endif + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h new file mode 100644 index 000000000000..e65fb8c9da12 --- /dev/null +++ b/arch/arc/include/asm/pgtable-levels.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com) + */ + +/* + * Helpers for implemenintg paging levels + */ + +#ifndef _ASM_ARC_PGTABLE_LEVELS_H +#define _ASM_ARC_PGTABLE_LEVELS_H + +/* + * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS) + * + * [31] 32 bit virtual address [0] + * ------------------------------------------------------- + * | | <---------- PGDIR_SHIFT ----------> | + * | | | <-- PAGE_SHIFT --> | + * ------------------------------------------------------- + * | | | + * | | --> off in page frame + * | ---> index into Page Table + * ----> index into Page Directory + * + * Given software walk, the vaddr split is arbitrary set to 11:8:13 + * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to + * super page size. + */ + +#if defined(CONFIG_ARC_HUGEPAGE_16M) +#define PGDIR_SHIFT 24 +#elif defined(CONFIG_ARC_HUGEPAGE_2M) +#define PGDIR_SHIFT 21 +#else +/* + * No Super page case + * Default value provides 11:8:13 (8K), 11:9:12 (4K) + */ +#define PGDIR_SHIFT 21 + +#endif + +#define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */ +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +#define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT) + +#define PTRS_PER_PTE BIT(PGDIR_SHIFT - PAGE_SHIFT) + +#ifndef __ASSEMBLY__ + +#include + +/* + * 1st level paging: pgd + */ +#define pgd_index(addr) ((addr) >> PGDIR_SHIFT) +#define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr)) +#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#define pgd_ERROR(e) \ + pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Due to the strange way generic pgtable level folding works, in a 2 level + * setup, pmd_val() returns pgd, so these pmd_* macros actually work on pgd + */ +#define pmd_none(x) (!pmd_val(x)) +#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) +#define pmd_present(x) (pmd_val(x)) +#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) +#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) +#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) +#define set_pmd(pmdp, pmd) (*(pmdp) = pmd) +#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) + +#define pte_ERROR(e) \ + pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) + +#define pte_none(x) (!pte_val(x)) +#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) +#define pte_clear(mm,addr,ptep) set_pte_at(mm, addr, ptep, __pte(0)) +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define set_pte(ptep, pte) ((*(ptep)) = (pte)) +#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) +#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot)) +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) + +#ifdef CONFIG_ISA_ARCV2 +#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index de4576e8d17a..9320b04c04bf 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -1,304 +1,35 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * vineetg: May 2011 - * -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1. - * They are semantically the same although in different contexts - * VALID marks a TLB entry exists and it will only happen if PRESENT - * - Utilise some unused free bits to confine PTE flags to 12 bits - * This is a must for 4k pg-sz - * - * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods - * -TLB Locking never really existed, except for initial specs - * -SILENT_xxx not needed for our port - * -Per my request, MMU V3 changes the layout of some of the bits - * to avoid a few shifts in TLB Miss handlers. - * - * vineetg: April 2010 - * -PGD entry no longer contains any flags. If empty it is 0, otherwise has - * Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler - * - * vineetg: April 2010 - * -Switched form 8:11:13 split for page table lookup to 11:8:13 - * -this speeds up page table allocation itself as we now have to memset 1K - * instead of 8k per page table. - * -TODO: Right now page table alloc is 8K and rest 7K is unused - * need to optimise it - * - * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 */ #ifndef _ASM_ARC_PGTABLE_H #define _ASM_ARC_PGTABLE_H #include -#include + +#include +#include #include #include -/************************************************************************** - * Page Table Flags - * - * ARC700 MMU only deals with softare managed TLB entries. - * Page Tables are purely for Linux VM's consumption and the bits below are - * suited to that (uniqueness). Hence some are not implemented in the TLB and - * some have different value in TLB. - * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in - * seperate PD0 and PD1, which combined forms a translation entry) - * while for PTE perspective, they are 8 and 9 respectively - * with MMU v3: Most bits (except SHARED) represent the exact hardware pos - * (saves some bit shift ops in TLB Miss hdlrs) - */ - -#define _PAGE_CACHEABLE (1<<0) /* Page is cached (H) */ -#define _PAGE_EXECUTE (1<<1) /* Page has user execute perm (H) */ -#define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ -#define _PAGE_READ (1<<3) /* Page has user read perm (H) */ -#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ -#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ -#define _PAGE_SPECIAL (1<<6) - -#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ -#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ - -#ifdef CONFIG_ARC_MMU_V4 -#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ -#endif - -#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr - usable for shared TLB entries (H) */ -/* vmalloc permissions */ -#define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ - _PAGE_GLOBAL | _PAGE_PRESENT) - -#ifndef CONFIG_ARC_CACHE_PAGES -#undef _PAGE_CACHEABLE -#define _PAGE_CACHEABLE 0 -#endif - -#ifndef _PAGE_HW_SZ -#define _PAGE_HW_SZ 0 -#endif - -/* Defaults for every user page */ -#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) - -/* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SPECIAL) -/* More Abbrevaited helpers */ -#define PAGE_U_NONE __pgprot(___DEF) -#define PAGE_U_R __pgprot(___DEF | _PAGE_READ) -#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE) -#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE) -#define PAGE_U_X_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \ - _PAGE_EXECUTE) - -#define PAGE_SHARED PAGE_U_W_R - -/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of - * user vaddr space - visible in all addr spaces, but kernel mode only - * Thus Global, all-kernel-access, no-user-access, cached - */ -#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) - -/************************************************************************** - * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) - * - * Certain cases have 1:1 mapping - * e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED - * which directly corresponds to PAGE_U_X_R - * - * Other rules which cause the divergence from 1:1 mapping - * - * 1. Although ARC700 can do exclusive execute/write protection (meaning R - * can be tracked independet of X/W unlike some other CPUs), still to - * keep things consistent with other archs: - * -Write implies Read: W => R - * -Execute implies Read: X => R - * - * 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W - * This is to enable COW mechanism - */ - /* xwr */ -#define __P000 PAGE_U_NONE -#define __P001 PAGE_U_R -#define __P010 PAGE_U_R /* Pvt-W => !W */ -#define __P011 PAGE_U_R /* Pvt-W => !W */ -#define __P100 PAGE_U_X_R /* X => R */ -#define __P101 PAGE_U_X_R -#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */ -#define __P111 PAGE_U_X_R /* Pvt-W => !W */ - -#define __S000 PAGE_U_NONE -#define __S001 PAGE_U_R -#define __S010 PAGE_U_W_R /* W => R */ -#define __S011 PAGE_U_W_R -#define __S100 PAGE_U_X_R /* X => R */ -#define __S101 PAGE_U_X_R -#define __S110 PAGE_U_X_W_R /* X => R */ -#define __S111 PAGE_U_X_W_R - -/**************************************************************** - * 2 tier (PGD:PTE) software page walker - * - * [31] 32 bit virtual address [0] - * ------------------------------------------------------- - * | | <------------ PGDIR_SHIFT ----------> | - * | | | - * | BITS_FOR_PGD | BITS_FOR_PTE | <-- PAGE_SHIFT --> | - * ------------------------------------------------------- - * | | | - * | | --> off in page frame - * | ---> index into Page Table - * ----> index into Page Directory - * - * In a single page size configuration, only PAGE_SHIFT is fixed - * So both PGD and PTE sizing can be tweaked - * e.g. 8K page (PAGE_SHIFT 13) can have - * - PGDIR_SHIFT 21 -> 11:8:13 address split - * - PGDIR_SHIFT 24 -> 8:11:13 address split - * - * If Super Page is configured, PGDIR_SHIFT becomes fixed too, - * so the sizing flexibility is gone. - */ - -#if defined(CONFIG_ARC_HUGEPAGE_16M) -#define PGDIR_SHIFT 24 -#elif defined(CONFIG_ARC_HUGEPAGE_2M) -#define PGDIR_SHIFT 21 -#else -/* - * Only Normal page support so "hackable" (see comment above) - * Default value provides 11:8:13 (8K), 11:9:12 (4K) - */ -#define PGDIR_SHIFT 21 -#endif - -#define BITS_FOR_PTE (PGDIR_SHIFT - PAGE_SHIFT) -#define BITS_FOR_PGD (32 - PGDIR_SHIFT) - -#define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */ -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -#define PTRS_PER_PTE BIT(BITS_FOR_PTE) -#define PTRS_PER_PGD BIT(BITS_FOR_PGD) - /* * Number of entries a user land program use. * TASK_SIZE is the maximum vaddr that can be used by a userland program. */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) - -/**************************************************************** - * Bucket load of VM Helpers - */ - #ifndef __ASSEMBLY__ -#define pte_ERROR(e) \ - pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) -#define pgd_ERROR(e) \ - pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) - -/* the zero page used for uninitialized and anonymous pages */ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) - -/* find the page descriptor of the Page Tbl ref by PMD entry */ -#define pmd_page(pmd) virt_to_page(pmd_val(pmd) & PAGE_MASK) - -/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */ -#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) - -#define pte_none(x) (!pte_val(x)) -#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm, addr, ptep) set_pte_at(mm, addr, ptep, __pte(0)) - -#define pmd_none(x) (!pmd_val(x)) -#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) -#define pmd_present(x) (pmd_val(x)) -#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) -#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) - -#define pte_page(pte) pfn_to_page(pte_pfn(pte)) -#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) -#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot)) - -/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ -#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) - -/* Zoo of pte_xxx function */ -#define pte_read(pte) (pte_val(pte) & _PAGE_READ) -#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) -#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) - -#define PTE_BIT_FUNC(fn, op) \ - static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } - -PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); -PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); -PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); -PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); -PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); -PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); -PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); -PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); -PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); -PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); -PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); - -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -{ - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); -} +extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); /* Macro to mark a page protection as uncacheable */ #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)) -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - set_pte(ptep, pteval); -} - extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, - pte_t *ptep); - -/* Encode swap {type,off} tuple into PTE - * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that - * PAGE_PRESENT is zero in a PTE holding swap "identifier" - */ -#define __swp_entry(type, off) ((swp_entry_t) { \ - ((type) & 0x1f) | ((off) << 13) }) - -/* Decode a PTE containing swap "identifier "into constituents */ -#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) -#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) - -/* NOPs, to keep generic kernel happy */ -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) - -#define kern_addr_valid(addr) (1) - -#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) - -/* - * remap a physical page `pfn' of size `size' with page protection `prot' - * into virtual address `from' - */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#include -#endif /* to cope with aliasing VIPT cache */ #define HAVE_ARCH_UNMAPPED_AREA -- cgit From f35534a2bcc7fd614a11aa7e3d91a0b1d6c962fb Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 1 Oct 2020 16:42:15 -0700 Subject: ARC: mm: hack to allow 2 level build with 4 level code PMD_SHIFT is mapped to PUD_SHIFT or PGD_SHIFT by asm-generic/pgtable-* but only for !__ASSEMBLY__ tlbex.S asm code has PTRS_PER_PTE which uses PMD_SHIFT hence barfs for CONFIG_PGTABLE_LEVEL={2,3} and works for 4. So add a workaround local to tlbex.S - the proper fix is to change asm-generic/pgtable-* headers to expose the defines for __ASSEMBLY__ too Signed-off-by: Vineet Gupta --- arch/arc/mm/tlbex.S | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 0b4bb62fa0ab..c4a5f16444ce 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -145,6 +145,14 @@ ex_saved_reg1: ;TLB Miss handling Code ;============================================================================ +#ifndef PMD_SHIFT +#define PMD_SHIFT PUD_SHIFT +#endif + +#ifndef PUD_SHIFT +#define PUD_SHIFT PGDIR_SHIFT +#endif + ;----------------------------------------------------------------------------- ; This macro does the page-table lookup for the faulting address. ; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address -- cgit From 803930ee35fafd005fd978d0c0a0d8db5bcba654 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 7 Aug 2021 15:14:27 +0800 Subject: riscv: use strscpy to replace strlcpy The strlcpy should not be used because it doesn't limit the source length. As linus says, it's a completely useless function if you can't implicitly trust the source string - but that is almost always why people think they should use it! All in all the BSD function will lead some potential bugs. But the strscpy doesn't require reading memory from the src string beyond the specified "count" bytes, and since the return value is easier to error-check than strlcpy()'s. In addition, the implementation is robust to the string changing out from underneath it, unlike the current strlcpy() implementation. Thus, We prefer using strscpy instead of strlcpy. Signed-off-by: Jason Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 18bd0e4bc36c..2dc62ebc0001 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -255,7 +255,7 @@ static void __init parse_dtb(void) pr_err("No DTB passed to the kernel\n"); #ifdef CONFIG_CMDLINE_FORCE - strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); pr_info("Forcing kernel command line to: %s\n", boot_command_line); #endif } -- cgit From a290f510a178830a01bfc06e66a54bbe4ece5d2a Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 25 Aug 2021 22:52:45 -0700 Subject: RISC-V: Fix VDSO build for !MMU We don't have a VDSO for the !MMU configurations, so don't try to build one. Fixes: fde9c59aebaf ("riscv: explicitly use symbol offsets for VDSO") Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 2 ++ arch/riscv/include/asm/vdso.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index e026b2d0a5a4..83ee0e71204c 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -108,9 +108,11 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ +ifeq ($(CONFIG_MMU),y) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h +endif ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index d8d003c2b5a3..893e47195e30 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -8,6 +8,13 @@ #ifndef _ASM_RISCV_VDSO_H #define _ASM_RISCV_VDSO_H + +/* + * All systems with an MMU have a VDSO, but systems without an MMU don't + * support shared libraries and therefor don't have one. + */ +#ifdef CONFIG_MMU + #include #include @@ -19,6 +26,8 @@ struct vdso_data { #define VDSO_SYMBOL(base, name) \ (void __user *)((unsigned long)(base) + __vdso_##name##_offset) +#endif /* CONFIG_MMU */ + asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); #endif /* _ASM_RISCV_VDSO_H */ -- cgit From 8ce8a6fce9bfd3fcabe230ad104e2caf08b2e58d Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Mon, 23 Aug 2021 22:39:40 +0000 Subject: KVM: arm64: Trim guest debug exception handling The switch-case for handling guest debug exception covers all the debug exception classes, but functionally, doesn't do anything with them other than ESR_ELx_EC_WATCHPT_LOW. Moreover, even though handled well, the 'default' case could be confusing from a security point of view, stating that the guests' actions can potentially flood the syslog. But in reality, the code is unreachable. Hence, trim down the function to only handle the case with ESR_ELx_EC_WATCHPT_LOW with a simple 'if' check. Suggested-by: Marc Zyngier Signed-off-by: Raghavendra Rao Ananta Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210823223940.1878930-1-rananta@google.com --- arch/arm64/kvm/handle_exit.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 6f48336b1d86..b143f1ab6520 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -113,34 +113,20 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) * guest and host are using the same debug facilities it will be up to * userspace to re-inject the correct exception for guest delivery. * - * @return: 0 (while setting vcpu->run->exit_reason), -1 for error + * @return: 0 (while setting vcpu->run->exit_reason) */ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 esr = kvm_vcpu_get_esr(vcpu); - int ret = 0; run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = esr; - switch (ESR_ELx_EC(esr)) { - case ESR_ELx_EC_WATCHPT_LOW: + if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW) run->debug.arch.far = vcpu->arch.fault.far_el2; - fallthrough; - case ESR_ELx_EC_SOFTSTP_LOW: - case ESR_ELx_EC_BREAKPT_LOW: - case ESR_ELx_EC_BKPT32: - case ESR_ELx_EC_BRK64: - break; - default: - kvm_err("%s: un-handled case esr: %#08x\n", - __func__, (unsigned int) esr); - ret = -1; - break; - } - return ret; + return 0; } static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) -- cgit From 4f8e78c0757e3c5a65d9d8ac76e2434c71a78f5a Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Sat, 7 Aug 2021 09:02:36 +0800 Subject: powerpc: Add esr as a synonym for pt_regs.dsisr Create an anonymous union for dsisr and esr regsiters, we can reference esr to get the exception detail when CONFIG_4xx=y or CONFIG_BOOKE=y. Otherwise, reference dsisr. This makes code more clear. Signed-off-by: Xiongwei Song [mpe: Reword commit title] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210807010239.416055-2-sxwjean@me.com --- arch/powerpc/include/asm/ptrace.h | 5 ++++- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/ptrace/ptrace.c | 2 ++ arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/platforms/44x/machine_check.c | 4 ++-- arch/powerpc/platforms/4xx/machine_check.c | 2 +- 6 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 333979cf5d1e..ab58939653db 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -44,7 +44,10 @@ struct pt_regs #endif unsigned long trap; unsigned long dar; - unsigned long dsisr; + union { + unsigned long dsisr; + unsigned long esr; + }; unsigned long result; }; }; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 185beb290580..f74af8f9133c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1499,7 +1499,7 @@ static void __show_regs(struct pt_regs *regs) trap == INTERRUPT_DATA_STORAGE || trap == INTERRUPT_ALIGNMENT) { if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) - pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->esr); else pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 0a0a33eb0d28..a222fd4d6334 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -375,6 +375,8 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, dar)); BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != offsetof(struct user_pt_regs, dsisr)); + BUILD_BUG_ON(offsetof(struct pt_regs, esr) != + offsetof(struct user_pt_regs, dsisr)); BUILD_BUG_ON(offsetof(struct pt_regs, result) != offsetof(struct user_pt_regs, result)); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 51d4f5faf425..5463d201d465 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -562,7 +562,7 @@ static inline int check_io_access(struct pt_regs *regs) #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* On 4xx, the reason for the machine check or program exception is in the ESR. */ -#define get_reason(regs) ((regs)->dsisr) +#define get_reason(regs) ((regs)->esr) #define REASON_FP ESR_FP #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c index a5c898bb9bab..5d19daacd78a 100644 --- a/arch/powerpc/platforms/44x/machine_check.c +++ b/arch/powerpc/platforms/44x/machine_check.c @@ -11,7 +11,7 @@ int machine_check_440A(struct pt_regs *regs) { - unsigned long reason = regs->dsisr; + unsigned long reason = regs->esr; printk("Machine check in kernel mode.\n"); if (reason & ESR_IMCP){ @@ -48,7 +48,7 @@ int machine_check_440A(struct pt_regs *regs) #ifdef CONFIG_PPC_47x int machine_check_47x(struct pt_regs *regs) { - unsigned long reason = regs->dsisr; + unsigned long reason = regs->esr; u32 mcsr; printk(KERN_ERR "Machine check in kernel mode.\n"); diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c index a71c29892a91..a905da1d6f41 100644 --- a/arch/powerpc/platforms/4xx/machine_check.c +++ b/arch/powerpc/platforms/4xx/machine_check.c @@ -10,7 +10,7 @@ int machine_check_4xx(struct pt_regs *regs) { - unsigned long reason = regs->dsisr; + unsigned long reason = regs->esr; if (reason & ESR_IMCP) { printk("Instruction"); -- cgit From cfa47772ca8d53d7a6c9b331a7f6e7c4c9827214 Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Sat, 7 Aug 2021 09:02:37 +0800 Subject: powerpc/64e: Get esr offset with _ESR macro Use _ESR to get the offset of esr register in pr_regs for 64e cpus. Signed-off-by: Xiongwei Song Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210807010239.416055-3-sxwjean@me.com --- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/exceptions-64e.S | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a47eefa09bcb..f4ebc435fd78 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -287,6 +287,7 @@ int main(void) STACK_PT_REGS_OFFSET(_XER, xer); STACK_PT_REGS_OFFSET(_DAR, dar); STACK_PT_REGS_OFFSET(_DSISR, dsisr); + STACK_PT_REGS_OFFSET(_ESR, esr); STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3); STACK_PT_REGS_OFFSET(RESULT, result); STACK_PT_REGS_OFFSET(_TRAP, trap); @@ -298,7 +299,6 @@ int main(void) * we use them to hold SRR0 and SRR1. */ STACK_PT_REGS_OFFSET(_DEAR, dar); - STACK_PT_REGS_OFFSET(_ESR, dsisr); #else /* CONFIG_PPC64 */ STACK_PT_REGS_OFFSET(SOFTE, softe); STACK_PT_REGS_OFFSET(_PPR, ppr); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7e0943d9f9b0..bf8822a5ae0e 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -546,7 +546,7 @@ __end_interrupts: mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR std r14,_DAR(r1) - std r15,_DSISR(r1) + std r15,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x300) @@ -559,7 +559,7 @@ __end_interrupts: li r15,0 mr r14,r10 std r14,_DAR(r1) - std r15,_DSISR(r1) + std r15,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x400) @@ -576,7 +576,7 @@ __end_interrupts: mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR std r14,_DAR(r1) - std r15,_DSISR(r1) + std r15,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) EXCEPTION_COMMON(0x600) @@ -587,7 +587,7 @@ __end_interrupts: NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM, PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR - std r14,_DSISR(r1) + std r14,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) EXCEPTION_COMMON(0x700) addi r3,r1,STACK_FRAME_OVERHEAD @@ -1058,7 +1058,7 @@ bad_stack_book3e: mfspr r10,SPRN_DEAR mfspr r11,SPRN_ESR std r10,_DAR(r1) - std r11,_DSISR(r1) + std r11,_ESR(r1) std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ -- cgit From 4872cbd0ca35ca5b20d52e2539e7e1950f126e7b Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Sat, 7 Aug 2021 09:02:38 +0800 Subject: powerpc: Add dear as a synonym for pt_regs.dar register Create an anonymous union for dar and dear regsiters, we can reference dear to get the effective address when CONFIG_4xx=y or CONFIG_BOOKE=y. Otherwise, reference dar. This makes code more clear. Signed-off-by: Xiongwei Song [mpe: Reword commit title] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210807010239.416055-4-sxwjean@me.com --- arch/powerpc/include/asm/ptrace.h | 5 ++++- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/ptrace/ptrace.c | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index ab58939653db..5114ddba6f16 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -43,7 +43,10 @@ struct pt_regs unsigned long mq; #endif unsigned long trap; - unsigned long dar; + union { + unsigned long dar; + unsigned long dear; + }; union { unsigned long dsisr; unsigned long esr; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f74af8f9133c..50436b52c213 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1499,7 +1499,7 @@ static void __show_regs(struct pt_regs *regs) trap == INTERRUPT_DATA_STORAGE || trap == INTERRUPT_ALIGNMENT) { if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) - pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->esr); + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr); else pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index a222fd4d6334..7c7093c17c45 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -373,6 +373,8 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, trap)); BUILD_BUG_ON(offsetof(struct pt_regs, dar) != offsetof(struct user_pt_regs, dar)); + BUILD_BUG_ON(offsetof(struct pt_regs, dear) != + offsetof(struct user_pt_regs, dar)); BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != offsetof(struct user_pt_regs, dsisr)); BUILD_BUG_ON(offsetof(struct pt_regs, esr) != -- cgit From d9db6e420268b2d561731468a31f0b15e2e9a145 Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Sat, 7 Aug 2021 09:02:39 +0800 Subject: powerpc/64e: Get dear offset with _DEAR macro Use _DEAR to get the offset of dear register in pr_regs for 64e cpus. Signed-off-by: Xiongwei Song Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210807010239.416055-5-sxwjean@me.com --- arch/powerpc/kernel/asm-offsets.c | 13 +++---------- arch/powerpc/kernel/exceptions-64e.S | 8 ++++---- 2 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f4ebc435fd78..8357d5fcd09e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -286,23 +286,16 @@ int main(void) STACK_PT_REGS_OFFSET(_CCR, ccr); STACK_PT_REGS_OFFSET(_XER, xer); STACK_PT_REGS_OFFSET(_DAR, dar); + STACK_PT_REGS_OFFSET(_DEAR, dear); STACK_PT_REGS_OFFSET(_DSISR, dsisr); STACK_PT_REGS_OFFSET(_ESR, esr); STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3); STACK_PT_REGS_OFFSET(RESULT, result); STACK_PT_REGS_OFFSET(_TRAP, trap); -#ifndef CONFIG_PPC64 - /* - * The PowerPC 400-class & Book-E processors have neither the DAR - * nor the DSISR SPRs. Hence, we overload them to hold the similar - * DEAR and ESR SPRs for such processors. For critical interrupts - * we use them to hold SRR0 and SRR1. - */ - STACK_PT_REGS_OFFSET(_DEAR, dar); -#else /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC64 STACK_PT_REGS_OFFSET(SOFTE, softe); STACK_PT_REGS_OFFSET(_PPR, ppr); -#endif /* CONFIG_PPC64 */ +#endif #ifdef CONFIG_PPC_PKEY STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index bf8822a5ae0e..711c66b76df1 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -545,7 +545,7 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - std r14,_DAR(r1) + std r14,_DEAR(r1) std r15,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) @@ -558,7 +558,7 @@ __end_interrupts: PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 - std r14,_DAR(r1) + std r14,_DEAR(r1) std r15,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) @@ -575,7 +575,7 @@ __end_interrupts: PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR - std r14,_DAR(r1) + std r14,_DEAR(r1) std r15,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) @@ -1057,7 +1057,7 @@ bad_stack_book3e: std r11,_CCR(r1) mfspr r10,SPRN_DEAR mfspr r11,SPRN_ESR - std r10,_DAR(r1) + std r10,_DEAR(r1) std r11,_ESR(r1) std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ -- cgit From 133c17a1788d68c9fff59d5f724a4ba14647a16d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 Aug 2021 08:24:20 +0000 Subject: powerpc: Remove MSR_PR check in interrupt_exit_{user/kernel}_prepare() In those hot functions that are called at every interrupt, any saved cycle is worth it. interrupt_exit_user_prepare() and interrupt_exit_kernel_prepare() are called from three places: - From entry_32.S - From interrupt_64.S - From interrupt_exit_user_restart() and interrupt_exit_kernel_restart() In entry_32.S, there are inambiguously called based on MSR_PR: interrupt_return: lwz r4,_MSR(r1) addi r3,r1,STACK_FRAME_OVERHEAD andi. r0,r4,MSR_PR beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare ... .Lkernel_interrupt_return: bl interrupt_exit_kernel_prepare In interrupt_64.S, that's similar: interrupt_return_\srr\(): ld r4,_MSR(r1) andi. r0,r4,MSR_PR beq interrupt_return_\srr\()_kernel interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_user_prepare ... interrupt_return_\srr\()_kernel: addi r3,r1,STACK_FRAME_OVERHEAD bl interrupt_exit_kernel_prepare In interrupt_exit_user_restart() and interrupt_exit_kernel_restart(), MSR_PR is verified respectively by BUG_ON(!user_mode(regs)) and BUG_ON(user_mode(regs)) prior to calling interrupt_exit_user_prepare() and interrupt_exit_kernel_prepare(). The verification in interrupt_exit_user_prepare() and interrupt_exit_kernel_prepare() are therefore useless and can be removed. Signed-off-by: Christophe Leroy Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/385ead49ccb66a259b25fee3eebf0bd4094068f3.1629707037.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index abc2b3dc3f20..531f0a1dbabc 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -464,7 +464,6 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) BUG_ON(!(regs->msr & MSR_RI)); - BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); @@ -498,7 +497,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) unrecoverable_exception(regs); - BUG_ON(regs->msr & MSR_PR); /* * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. -- cgit From 806c0e6e7e97adc17389c8dc1f52d4736f49299b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 Aug 2021 08:24:21 +0000 Subject: powerpc: Refactor verification of MSR_RI 40x and BOOKE don't have MSR_RI therefore all tests involving MSR_RI may be problematic on those plateforms. Create helpers to check or set MSR_RI in regs, and use them in common code. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c2fb93708196734f4176dda334aaa3055f213b89.1629707037.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 23 +++++++++++++++++++++++ arch/powerpc/kernel/interrupt.c | 9 +++------ arch/powerpc/kernel/traps.c | 8 ++++---- arch/powerpc/mm/book3s64/slb.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c | 2 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 2 +- arch/powerpc/platforms/pasemi/idle.c | 2 +- arch/powerpc/platforms/powernv/opal.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 2 +- arch/powerpc/xmon/xmon.c | 16 +++------------- 11 files changed, 40 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 5114ddba6f16..f9c235c1d6ce 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -22,6 +22,7 @@ #include #include #include +#include #ifndef __ASSEMBLY__ struct pt_regs @@ -272,6 +273,28 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) regs->gpr[3] = rc; } +static inline bool cpu_has_msr_ri(void) +{ + return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x); +} + +static inline bool regs_is_unrecoverable(struct pt_regs *regs) +{ + return unlikely(cpu_has_msr_ri() && !(regs->msr & MSR_RI)); +} + +static inline void regs_set_recoverable(struct pt_regs *regs) +{ + if (cpu_has_msr_ri()) + regs_set_return_msr(regs, regs->msr | MSR_RI); +} + +static inline void regs_set_unrecoverable(struct pt_regs *regs) +{ + if (cpu_has_msr_ri()) + regs_set_return_msr(regs, regs->msr & ~MSR_RI); +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 531f0a1dbabc..a73f3f70a657 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -92,8 +92,7 @@ notrace long system_call_exception(long r3, long r4, long r5, CT_WARN_ON(ct_state() == CONTEXT_KERNEL); user_exit_irqoff(); - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(!(regs->msr & MSR_PR)); BUG_ON(arch_irq_disabled_regs(regs)); @@ -462,8 +461,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) { unsigned long ret; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); @@ -494,8 +492,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) bool stack_store = current_thread_info()->flags & _TIF_EMULATE_STACK_STORE; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && - unlikely(!(regs->msr & MSR_RI))) + if (regs_is_unrecoverable(regs)) unrecoverable_exception(regs); /* * CT_WARN_ON comes here via program_check_exception, diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5463d201d465..3a344f5265c2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -428,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; nonrecoverable: - regs_set_return_msr(regs, regs->msr & ~MSR_RI); + regs_set_unrecoverable(regs); #endif } DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) @@ -498,7 +498,7 @@ out: die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) { + if (regs_is_unrecoverable(regs)) { /* For the reason explained in die_mce, nmi_exit before die */ nmi_exit(); die("Unrecoverable System Reset", regs, SIGABRT); @@ -550,7 +550,7 @@ static inline int check_io_access(struct pt_regs *regs) printk(KERN_DEBUG "%s bad port %lx at %p\n", (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); - regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_recoverable(regs); regs_set_return_ip(regs, extable_fixup(entry)); return 1; } @@ -840,7 +840,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) bail: /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) + if (regs_is_unrecoverable(regs)) die_mce("Unrecoverable Machine check", regs, SIGBUS); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c91bd85eb90e..f0037bcc47a0 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -822,7 +822,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) /* IRQs are not reconciled here, so can't check irqs_disabled */ VM_WARN_ON(mfmsr() & MSR_EE); - if (unlikely(!(regs->msr & MSR_RI))) + if (regs_is_unrecoverable(regs)) return -EINVAL; /* diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 85521b3e7098..7a85b117f7a4 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -251,7 +251,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_recoverable(regs); regs_set_return_ip(regs, extable_fixup(entry)); return 1; } diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index d8da6a483e59..9eb9abb5bce2 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -173,7 +173,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_recoverable(regs); regs_set_return_ip(regs, extable_fixup(entry)); return 1; } diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 9b88e3cded7d..2a5b84268b77 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) restore_astate(hard_smp_processor_id()); /* everything handled */ - regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_recoverable(regs); return 1; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2835376e61a4..e9d18519e650 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -588,7 +588,7 @@ static int opal_recover_mce(struct pt_regs *regs, { int recovered = 0; - if (!(regs->msr & MSR_RI)) { + if (regs_is_unrecoverable(regs)) { /* If MSR_RI isn't set, we cannot recover */ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 167f2e1b8d39..56092dccfdb8 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -783,7 +783,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) { int recovered = 0; - if (!(regs->msr & MSR_RI)) { + if (regs_is_unrecoverable(regs)) { /* If MSR_RI isn't set, we cannot recover */ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 5a95b8ea23d8..ff7906b48ca1 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -108,7 +108,7 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs) __func__); out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0); - regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_recoverable(regs); regs_set_return_ip(regs, extable_fixup(entry)); return 1; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index ead460b80905..dd8241c009e5 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -482,16 +482,6 @@ static inline void get_output_lock(void) {} static inline void release_output_lock(void) {} #endif -static inline int unrecoverable_excp(struct pt_regs *regs) -{ -#if defined(CONFIG_4xx) || defined(CONFIG_PPC_BOOK3E) - /* We have no MSR_RI bit on 4xx or Book3e, so we simply return false */ - return 0; -#else - return ((regs->msr & MSR_RI) == 0); -#endif -} - static void xmon_touch_watchdogs(void) { touch_softlockup_watchdog_sync(); @@ -565,7 +555,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi) bp = NULL; if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) bp = at_breakpoint(regs->nip); - if (bp || unrecoverable_excp(regs)) + if (bp || regs_is_unrecoverable(regs)) fromipi = 0; if (!fromipi) { @@ -577,7 +567,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi) cpu, BP_NUM(bp)); xmon_print_symbol(regs->nip, " ", ")\n"); } - if (unrecoverable_excp(regs)) + if (regs_is_unrecoverable(regs)) printf("WARNING: exception is not recoverable, " "can't continue\n"); release_output_lock(); @@ -693,7 +683,7 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi) printf("Stopped at breakpoint %tx (", BP_NUM(bp)); xmon_print_symbol(regs->nip, " ", ")\n"); } - if (unrecoverable_excp(regs)) + if (regs_is_unrecoverable(regs)) printf("WARNING: exception is not recoverable, " "can't continue\n"); remove_bpts(); -- cgit From c12adb0678446b3284a6135dc5fa7aa3b6a968a5 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 7 Jan 2021 08:40:38 -0500 Subject: powerpc: retire sbc8548 board support The support was for this was mainlined 13 years ago, in v2.6.25 [0e0fffe88767] just around the ppc --> powerpc migration. I believe the board was introduced a year or two before that, so it is roughly a 15 year old platform - with the CPU speed and memory size that was typical for that era. I haven't had one of these boards for several years, and availability was discontinued several years before that. Given that, there is no point in adding a burden to testing coverage that builds all possible defconfigs, so it makes sense to remove it. Of course it will remain in the git history forever, for anyone who happens to find a functional board and wants to tinker with it. Acked-by: Scott Wood Signed-off-by: Paul Gortmaker Signed-off-by: Michael Ellerman --- arch/powerpc/boot/Makefile | 1 - arch/powerpc/boot/dts/sbc8548-altflash.dts | 111 ----------- arch/powerpc/boot/dts/sbc8548-post.dtsi | 289 ---------------------------- arch/powerpc/boot/dts/sbc8548-pre.dtsi | 48 ----- arch/powerpc/boot/dts/sbc8548.dts | 106 ---------- arch/powerpc/boot/wrapper | 2 +- arch/powerpc/configs/85xx/sbc8548_defconfig | 50 ----- arch/powerpc/configs/mpc85xx_base.config | 1 - arch/powerpc/platforms/85xx/Kconfig | 6 - arch/powerpc/platforms/85xx/Makefile | 1 - arch/powerpc/platforms/85xx/sbc8548.c | 134 ------------- 11 files changed, 1 insertion(+), 748 deletions(-) delete mode 100644 arch/powerpc/boot/dts/sbc8548-altflash.dts delete mode 100644 arch/powerpc/boot/dts/sbc8548-post.dtsi delete mode 100644 arch/powerpc/boot/dts/sbc8548-pre.dtsi delete mode 100644 arch/powerpc/boot/dts/sbc8548.dts delete mode 100644 arch/powerpc/configs/85xx/sbc8548_defconfig delete mode 100644 arch/powerpc/platforms/85xx/sbc8548.c (limited to 'arch') diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e312ea802aa6..3e30ea6f2187 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -341,7 +341,6 @@ image-$(CONFIG_TQM8541) += cuImage.tqm8541 image-$(CONFIG_TQM8548) += cuImage.tqm8548 image-$(CONFIG_TQM8555) += cuImage.tqm8555 image-$(CONFIG_TQM8560) += cuImage.tqm8560 -image-$(CONFIG_SBC8548) += cuImage.sbc8548 image-$(CONFIG_KSI8560) += cuImage.ksi8560 # Board ports in arch/powerpc/platform/86xx/Kconfig diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts deleted file mode 100644 index bb7a1e712bb7..000000000000 --- a/arch/powerpc/boot/dts/sbc8548-altflash.dts +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SBC8548 Device Tree Source - * - * Configured for booting off the alternate (64MB SODIMM) flash. - * Requires switching JP12 jumpers and changing SW2.8 setting. - * - * Copyright 2013 Wind River Systems Inc. - * - * Paul Gortmaker (see MAINTAINERS for contact information) - */ - - -/dts-v1/; - -/include/ "sbc8548-pre.dtsi" - -/{ - localbus@e0000000 { - #address-cells = <2>; - #size-cells = <1>; - compatible = "simple-bus"; - reg = <0xe0000000 0x5000>; - interrupt-parent = <&mpic>; - - ranges = <0x0 0x0 0xfc000000 0x04000000 /*64MB Flash*/ - 0x3 0x0 0xf0000000 0x04000000 /*64MB SDRAM*/ - 0x4 0x0 0xf4000000 0x04000000 /*64MB SDRAM*/ - 0x5 0x0 0xf8000000 0x00b10000 /* EPLD */ - 0x6 0x0 0xef800000 0x00800000>; /*8MB Flash*/ - - flash@0,0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0x0 0x0 0x04000000>; - compatible = "intel,JS28F128", "cfi-flash"; - bank-width = <4>; - device-width = <1>; - partition@0 { - label = "space"; - /* FC000000 -> FFEFFFFF */ - reg = <0x00000000 0x03f00000>; - }; - partition@3f00000 { - label = "bootloader"; - /* FFF00000 -> FFFFFFFF */ - reg = <0x03f00000 0x00100000>; - read-only; - }; - }; - - - epld@5,0 { - compatible = "wrs,epld-localbus"; - #address-cells = <2>; - #size-cells = <1>; - reg = <0x5 0x0 0x00b10000>; - ranges = < - 0x0 0x0 0x5 0x000000 0x1fff /* LED */ - 0x1 0x0 0x5 0x100000 0x1fff /* Switches */ - 0x3 0x0 0x5 0x300000 0x1fff /* HW Rev. */ - 0xb 0x0 0x5 0xb00000 0x1fff /* EEPROM */ - >; - - led@0,0 { - compatible = "led"; - reg = <0x0 0x0 0x1fff>; - }; - - switches@1,0 { - compatible = "switches"; - reg = <0x1 0x0 0x1fff>; - }; - - hw-rev@3,0 { - compatible = "hw-rev"; - reg = <0x3 0x0 0x1fff>; - }; - - eeprom@b,0 { - compatible = "eeprom"; - reg = <0xb 0 0x1fff>; - }; - - }; - - alt-flash@6,0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "intel,JS28F640", "cfi-flash"; - reg = <0x6 0x0 0x800000>; - bank-width = <1>; - device-width = <1>; - partition@0 { - label = "space"; - /* EF800000 -> EFF9FFFF */ - reg = <0x00000000 0x007a0000>; - }; - partition@7a0000 { - label = "bootloader"; - /* EFFA0000 -> EFFFFFFF */ - reg = <0x007a0000 0x00060000>; - read-only; - }; - }; - - - }; -}; - -/include/ "sbc8548-post.dtsi" diff --git a/arch/powerpc/boot/dts/sbc8548-post.dtsi b/arch/powerpc/boot/dts/sbc8548-post.dtsi deleted file mode 100644 index 9d848d409408..000000000000 --- a/arch/powerpc/boot/dts/sbc8548-post.dtsi +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SBC8548 Device Tree Source - * - * Copyright 2007 Wind River Systems Inc. - * - * Paul Gortmaker (see MAINTAINERS for contact information) - */ - -/{ - soc8548@e0000000 { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - ranges = <0x00000000 0xe0000000 0x00100000>; - bus-frequency = <0>; - compatible = "simple-bus"; - - ecm-law@0 { - compatible = "fsl,ecm-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <10>; - }; - - ecm@1000 { - compatible = "fsl,mpc8548-ecm", "fsl,ecm"; - reg = <0x1000 0x1000>; - interrupts = <17 2>; - interrupt-parent = <&mpic>; - }; - - memory-controller@2000 { - compatible = "fsl,mpc8548-memory-controller"; - reg = <0x2000 0x1000>; - interrupt-parent = <&mpic>; - interrupts = <0x12 0x2>; - }; - - L2: l2-cache-controller@20000 { - compatible = "fsl,mpc8548-l2-cache-controller"; - reg = <0x20000 0x1000>; - cache-line-size = <0x20>; // 32 bytes - cache-size = <0x80000>; // L2, 512K - interrupt-parent = <&mpic>; - interrupts = <0x10 0x2>; - }; - - i2c@3000 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <0>; - compatible = "fsl-i2c"; - reg = <0x3000 0x100>; - interrupts = <0x2b 0x2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - i2c@3100 { - #address-cells = <1>; - #size-cells = <0>; - cell-index = <1>; - compatible = "fsl-i2c"; - reg = <0x3100 0x100>; - interrupts = <0x2b 0x2>; - interrupt-parent = <&mpic>; - dfsrr; - }; - - dma@21300 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma"; - reg = <0x21300 0x4>; - ranges = <0x0 0x21100 0x200>; - cell-index = <0>; - dma-channel@0 { - compatible = "fsl,mpc8548-dma-channel", - "fsl,eloplus-dma-channel"; - reg = <0x0 0x80>; - cell-index = <0>; - interrupt-parent = <&mpic>; - interrupts = <20 2>; - }; - dma-channel@80 { - compatible = "fsl,mpc8548-dma-channel", - "fsl,eloplus-dma-channel"; - reg = <0x80 0x80>; - cell-index = <1>; - interrupt-parent = <&mpic>; - interrupts = <21 2>; - }; - dma-channel@100 { - compatible = "fsl,mpc8548-dma-channel", - "fsl,eloplus-dma-channel"; - reg = <0x100 0x80>; - cell-index = <2>; - interrupt-parent = <&mpic>; - interrupts = <22 2>; - }; - dma-channel@180 { - compatible = "fsl,mpc8548-dma-channel", - "fsl,eloplus-dma-channel"; - reg = <0x180 0x80>; - cell-index = <3>; - interrupt-parent = <&mpic>; - interrupts = <23 2>; - }; - }; - - enet0: ethernet@24000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <0>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x24000 0x1000>; - ranges = <0x0 0x24000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>; - interrupt-parent = <&mpic>; - tbi-handle = <&tbi0>; - phy-handle = <&phy0>; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-mdio"; - reg = <0x520 0x20>; - - phy0: ethernet-phy@19 { - interrupt-parent = <&mpic>; - interrupts = <0x6 0x1>; - reg = <0x19>; - }; - phy1: ethernet-phy@1a { - interrupt-parent = <&mpic>; - interrupts = <0x7 0x1>; - reg = <0x1a>; - }; - tbi0: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - }; - - enet1: ethernet@25000 { - #address-cells = <1>; - #size-cells = <1>; - cell-index = <1>; - device_type = "network"; - model = "eTSEC"; - compatible = "gianfar"; - reg = <0x25000 0x1000>; - ranges = <0x0 0x25000 0x1000>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>; - interrupt-parent = <&mpic>; - tbi-handle = <&tbi1>; - phy-handle = <&phy1>; - - mdio@520 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,gianfar-tbi"; - reg = <0x520 0x20>; - - tbi1: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - }; - - serial0: serial@4500 { - cell-index = <0>; - device_type = "serial"; - compatible = "fsl,ns16550", "ns16550"; - reg = <0x4500 0x100>; // reg base, size - clock-frequency = <0>; // should we fill in in uboot? - interrupts = <0x2a 0x2>; - interrupt-parent = <&mpic>; - }; - - serial1: serial@4600 { - cell-index = <1>; - device_type = "serial"; - compatible = "fsl,ns16550", "ns16550"; - reg = <0x4600 0x100>; // reg base, size - clock-frequency = <0>; // should we fill in in uboot? - interrupts = <0x2a 0x2>; - interrupt-parent = <&mpic>; - }; - - global-utilities@e0000 { //global utilities reg - compatible = "fsl,mpc8548-guts"; - reg = <0xe0000 0x1000>; - fsl,has-rstcr; - }; - - crypto@30000 { - compatible = "fsl,sec2.1", "fsl,sec2.0"; - reg = <0x30000 0x10000>; - interrupts = <45 2>; - interrupt-parent = <&mpic>; - fsl,num-channels = <4>; - fsl,channel-fifo-len = <24>; - fsl,exec-units-mask = <0xfe>; - fsl,descriptor-types-mask = <0x12b0ebf>; - }; - - mpic: pic@40000 { - interrupt-controller; - #address-cells = <0>; - #interrupt-cells = <2>; - reg = <0x40000 0x40000>; - compatible = "chrp,open-pic"; - device_type = "open-pic"; - }; - }; - - pci0: pci@e0008000 { - interrupt-map-mask = <0xf800 0x0 0x0 0x7>; - interrupt-map = < - /* IDSEL 0x01 (PCI-X slot) @66MHz */ - 0x0800 0x0 0x0 0x1 &mpic 0x2 0x1 - 0x0800 0x0 0x0 0x2 &mpic 0x3 0x1 - 0x0800 0x0 0x0 0x3 &mpic 0x4 0x1 - 0x0800 0x0 0x0 0x4 &mpic 0x1 0x1 - - /* IDSEL 0x11 (PCI, 3.3V 32bit) @33MHz */ - 0x8800 0x0 0x0 0x1 &mpic 0x2 0x1 - 0x8800 0x0 0x0 0x2 &mpic 0x3 0x1 - 0x8800 0x0 0x0 0x3 &mpic 0x4 0x1 - 0x8800 0x0 0x0 0x4 &mpic 0x1 0x1>; - - interrupt-parent = <&mpic>; - interrupts = <0x18 0x2>; - bus-range = <0 0>; - ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x10000000 - 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00800000>; - clock-frequency = <66000000>; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0xe0008000 0x1000>; - compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci"; - device_type = "pci"; - }; - - pci1: pcie@e000a000 { - interrupt-map-mask = <0xf800 0x0 0x0 0x7>; - interrupt-map = < - - /* IDSEL 0x0 (PEX) */ - 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1 - 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1 - 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1 - 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1>; - - interrupt-parent = <&mpic>; - interrupts = <0x1a 0x2>; - bus-range = <0x0 0xff>; - ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000 - 0x01000000 0x0 0x00000000 0xe2800000 0x0 0x08000000>; - clock-frequency = <33000000>; - #interrupt-cells = <1>; - #size-cells = <2>; - #address-cells = <3>; - reg = <0xe000a000 0x1000>; - compatible = "fsl,mpc8548-pcie"; - device_type = "pci"; - pcie@0 { - reg = <0x0 0x0 0x0 0x0 0x0>; - #size-cells = <2>; - #address-cells = <3>; - device_type = "pci"; - ranges = <0x02000000 0x0 0xa0000000 - 0x02000000 0x0 0xa0000000 - 0x0 0x10000000 - - 0x01000000 0x0 0x00000000 - 0x01000000 0x0 0x00000000 - 0x0 0x00800000>; - }; - }; -}; diff --git a/arch/powerpc/boot/dts/sbc8548-pre.dtsi b/arch/powerpc/boot/dts/sbc8548-pre.dtsi deleted file mode 100644 index 0e3665fd15d0..000000000000 --- a/arch/powerpc/boot/dts/sbc8548-pre.dtsi +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SBC8548 Device Tree Source - * - * Copyright 2007 Wind River Systems Inc. - * - * Paul Gortmaker (see MAINTAINERS for contact information) - */ - -/{ - model = "SBC8548"; - compatible = "SBC8548"; - #address-cells = <1>; - #size-cells = <1>; - - aliases { - ethernet0 = &enet0; - ethernet1 = &enet1; - serial0 = &serial0; - serial1 = &serial1; - pci0 = &pci0; - pci1 = &pci1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - PowerPC,8548@0 { - device_type = "cpu"; - reg = <0>; - d-cache-line-size = <0x20>; // 32 bytes - i-cache-line-size = <0x20>; // 32 bytes - d-cache-size = <0x8000>; // L1, 32K - i-cache-size = <0x8000>; // L1, 32K - timebase-frequency = <0>; // From uboot - bus-frequency = <0>; - clock-frequency = <0>; - next-level-cache = <&L2>; - }; - }; - - memory { - device_type = "memory"; - reg = <0x00000000 0x10000000>; - }; - -}; diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts deleted file mode 100644 index ce0a119f496e..000000000000 --- a/arch/powerpc/boot/dts/sbc8548.dts +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SBC8548 Device Tree Source - * - * Copyright 2007 Wind River Systems Inc. - * - * Paul Gortmaker (see MAINTAINERS for contact information) - */ - - -/dts-v1/; - -/include/ "sbc8548-pre.dtsi" - -/{ - localbus@e0000000 { - #address-cells = <2>; - #size-cells = <1>; - compatible = "simple-bus"; - reg = <0xe0000000 0x5000>; - interrupt-parent = <&mpic>; - - ranges = <0x0 0x0 0xff800000 0x00800000 /*8MB Flash*/ - 0x3 0x0 0xf0000000 0x04000000 /*64MB SDRAM*/ - 0x4 0x0 0xf4000000 0x04000000 /*64MB SDRAM*/ - 0x5 0x0 0xf8000000 0x00b10000 /* EPLD */ - 0x6 0x0 0xec000000 0x04000000>; /*64MB Flash*/ - - - flash@0,0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "intel,JS28F640", "cfi-flash"; - reg = <0x0 0x0 0x800000>; - bank-width = <1>; - device-width = <1>; - partition@0 { - label = "space"; - /* FF800000 -> FFF9FFFF */ - reg = <0x00000000 0x007a0000>; - }; - partition@7a0000 { - label = "bootloader"; - /* FFFA0000 -> FFFFFFFF */ - reg = <0x007a0000 0x00060000>; - read-only; - }; - }; - - epld@5,0 { - compatible = "wrs,epld-localbus"; - #address-cells = <2>; - #size-cells = <1>; - reg = <0x5 0x0 0x00b10000>; - ranges = < - 0x0 0x0 0x5 0x000000 0x1fff /* LED */ - 0x1 0x0 0x5 0x100000 0x1fff /* Switches */ - 0x3 0x0 0x5 0x300000 0x1fff /* HW Rev. */ - 0xb 0x0 0x5 0xb00000 0x1fff /* EEPROM */ - >; - - led@0,0 { - compatible = "led"; - reg = <0x0 0x0 0x1fff>; - }; - - switches@1,0 { - compatible = "switches"; - reg = <0x1 0x0 0x1fff>; - }; - - hw-rev@3,0 { - compatible = "hw-rev"; - reg = <0x3 0x0 0x1fff>; - }; - - eeprom@b,0 { - compatible = "eeprom"; - reg = <0xb 0 0x1fff>; - }; - - }; - - alt-flash@6,0 { - #address-cells = <1>; - #size-cells = <1>; - reg = <0x6 0x0 0x04000000>; - compatible = "intel,JS28F128", "cfi-flash"; - bank-width = <4>; - device-width = <1>; - partition@0 { - label = "space"; - /* EC000000 -> EFEFFFFF */ - reg = <0x00000000 0x03f00000>; - }; - partition@3f00000 { - label = "bootloader"; - /* EFF00000 -> EFFFFFFF */ - reg = <0x03f00000 0x00100000>; - read-only; - }; - }; - }; -}; - -/include/ "sbc8548-post.dtsi" diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1f4676bab786..1cd82564c996 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -298,7 +298,7 @@ cuboot*) *-tqm8541|*-mpc8560*|*-tqm8560|*-tqm8555|*-ksi8560*) platformo=$object/cuboot-85xx-cpm2.o ;; - *-mpc85*|*-tqm85*|*-sbc85*) + *-mpc85*|*-tqm85*) platformo=$object/cuboot-85xx.o ;; *-amigaone) diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig deleted file mode 100644 index 258881727119..000000000000 --- a/arch/powerpc/configs/85xx/sbc8548_defconfig +++ /dev/null @@ -1,50 +0,0 @@ -CONFIG_PPC_85xx=y -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -CONFIG_SLAB=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_SBC8548=y -CONFIG_GEN_RTC=y -CONFIG_BINFMT_MISC=y -CONFIG_MATH_EMULATION=y -# CONFIG_SECCOMP is not set -CONFIG_PCI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_SYN_COOKIES=y -# CONFIG_IPV6 is not set -# CONFIG_FW_LOADER is not set -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_GEOMETRY=y -CONFIG_MTD_CFI_I4=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_NETDEVICES=y -CONFIG_GIANFAR=y -CONFIG_BROADCOM_PHY=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_HW_RANDOM is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y diff --git a/arch/powerpc/configs/mpc85xx_base.config b/arch/powerpc/configs/mpc85xx_base.config index b1593fe6f70b..85907b776908 100644 --- a/arch/powerpc/configs/mpc85xx_base.config +++ b/arch/powerpc/configs/mpc85xx_base.config @@ -13,7 +13,6 @@ CONFIG_P1022_DS=y CONFIG_P1022_RDK=y CONFIG_P1023_RDB=y CONFIG_TWR_P102x=y -CONFIG_SBC8548=y CONFIG_SOCRATES=y CONFIG_STX_GP3=y CONFIG_TQM8540=y diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index b77cbb0a50e1..4142ebf01382 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -208,12 +208,6 @@ config TQM8560 select TQM85xx select CPM2 -config SBC8548 - bool "Wind River SBC8548" - select DEFAULT_UIMAGE - help - This option enables support for the Wind River SBC8548 board - config PPA8548 bool "Prodrive PPA8548" help diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index d1dd0dca5ebf..60e4e97a929d 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o obj-$(CONFIG_FB_FSL_DIU) += t1042rdb_diu.o obj-$(CONFIG_STX_GP3) += stx_gp3.o obj-$(CONFIG_TQM85xx) += tqm85xx.o -obj-$(CONFIG_SBC8548) += sbc8548.o obj-$(CONFIG_PPA8548) += ppa8548.o obj-$(CONFIG_SOCRATES) += socrates.o socrates_fpga_pic.o obj-$(CONFIG_KSI8560) += ksi8560.o diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c deleted file mode 100644 index e4acf5ce6b07..000000000000 --- a/arch/powerpc/platforms/85xx/sbc8548.c +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Wind River SBC8548 setup and early boot code. - * - * Copyright 2007 Wind River Systems Inc. - * - * By Paul Gortmaker (see MAINTAINERS for contact information) - * - * Based largely on the MPC8548CDS support - Copyright 2005 Freescale Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "mpc85xx.h" - -static int sbc_rev; - -static void __init sbc8548_pic_init(void) -{ - struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, - 0, 256, " OpenPIC "); - BUG_ON(mpic == NULL); - mpic_init(mpic); -} - -/* Extract the HW Rev from the EPLD on the board */ -static int __init sbc8548_hw_rev(void) -{ - struct device_node *np; - struct resource res; - unsigned int *rev; - int board_rev = 0; - - np = of_find_compatible_node(NULL, NULL, "hw-rev"); - if (np == NULL) { - printk("No HW-REV found in DTB.\n"); - return -ENODEV; - } - - of_address_to_resource(np, 0, &res); - of_node_put(np); - - rev = ioremap(res.start,sizeof(unsigned int)); - board_rev = (*rev) >> 28; - iounmap(rev); - - return board_rev; -} - -/* - * Setup the architecture - */ -static void __init sbc8548_setup_arch(void) -{ - if (ppc_md.progress) - ppc_md.progress("sbc8548_setup_arch()", 0); - - fsl_pci_assign_primary(); - - sbc_rev = sbc8548_hw_rev(); -} - -static void sbc8548_show_cpuinfo(struct seq_file *m) -{ - uint pvid, svid, phid1; - - pvid = mfspr(SPRN_PVR); - svid = mfspr(SPRN_SVR); - - seq_printf(m, "Vendor\t\t: Wind River\n"); - seq_printf(m, "Machine\t\t: SBC8548 v%d\n", sbc_rev); - seq_printf(m, "PVR\t\t: 0x%x\n", pvid); - seq_printf(m, "SVR\t\t: 0x%x\n", svid); - - /* Display cpu Pll setting */ - phid1 = mfspr(SPRN_HID1); - seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); -} - -machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices); - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init sbc8548_probe(void) -{ - return of_machine_is_compatible("SBC8548"); -} - -define_machine(sbc8548) { - .name = "SBC8548", - .probe = sbc8548_probe, - .setup_arch = sbc8548_setup_arch, - .init_IRQ = sbc8548_pic_init, - .show_cpuinfo = sbc8548_show_cpuinfo, - .get_irq = mpic_get_irq, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, - .pcibios_fixup_phb = fsl_pcibios_fixup_phb, -#endif - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, -}; -- cgit From d7c1814f2f4f812d7a39447f975abdc095dbf7aa Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 7 Jan 2021 13:45:32 -0500 Subject: powerpc: retire sbc8641d board support The support was for this was added to mainline over 12 years ago, in v2.6.26 [4e8aae89a35d] just around the ppc --> powerpc migration. I believe the board was introduced shortly after the sbc8548 board, making it roughly a 14 year old platform - with the CPU speed and memory size typical for that era. I haven't had one of these boards for several years, and availability was discontinued several years before that. Given that, there is no point in adding a burden to testing coverage that builds all possible defconfigs, so it makes sense to remove it. Of course it will remain in the git history forever, for anyone who happens to find a functional board and wants to tinker with it. Acked-by: Scott Wood Signed-off-by: Paul Gortmaker Signed-off-by: Michael Ellerman --- arch/powerpc/boot/dts/fsl/sbc8641d.dts | 176 ------------------------------- arch/powerpc/configs/mpc86xx_base.config | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/powerpc/platforms/86xx/Kconfig | 8 +- arch/powerpc/platforms/86xx/Makefile | 1 - arch/powerpc/platforms/86xx/sbc8641d.c | 87 --------------- 6 files changed, 1 insertion(+), 273 deletions(-) delete mode 100644 arch/powerpc/boot/dts/fsl/sbc8641d.dts delete mode 100644 arch/powerpc/platforms/86xx/sbc8641d.c (limited to 'arch') diff --git a/arch/powerpc/boot/dts/fsl/sbc8641d.dts b/arch/powerpc/boot/dts/fsl/sbc8641d.dts deleted file mode 100644 index 3dca10acc161..000000000000 --- a/arch/powerpc/boot/dts/fsl/sbc8641d.dts +++ /dev/null @@ -1,176 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SBC8641D Device Tree Source - * - * Copyright 2008 Wind River Systems Inc. - * - * Paul Gortmaker (see MAINTAINERS for contact information) - * - * Based largely on the mpc8641_hpcn.dts by Freescale Semiconductor Inc. - */ - -/include/ "mpc8641si-pre.dtsi" - -/ { - model = "SBC8641D"; - compatible = "wind,sbc8641"; - - memory { - device_type = "memory"; - reg = <0x00000000 0x20000000>; // 512M at 0x0 - }; - - lbc: localbus@f8005000 { - reg = <0xf8005000 0x1000>; - - ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash - 1 0 0xf0000000 0x00010000 // 64KB EEPROM - 2 0 0xf1000000 0x00100000 // EPLD (1MB) - 3 0 0xe0000000 0x04000000 // 64MB LB SDRAM (CS3) - 4 0 0xe4000000 0x04000000 // 64MB LB SDRAM (CS4) - 6 0 0xf4000000 0x00100000 // LCD display (1MB) - 7 0 0xe8000000 0x04000000>; // 64MB OneNAND - - flash@0,0 { - compatible = "cfi-flash"; - reg = <0 0 0x01000000>; - bank-width = <2>; - device-width = <2>; - #address-cells = <1>; - #size-cells = <1>; - partition@0 { - label = "dtb"; - reg = <0x00000000 0x00100000>; - read-only; - }; - partition@300000 { - label = "kernel"; - reg = <0x00100000 0x00400000>; - read-only; - }; - partition@400000 { - label = "fs"; - reg = <0x00500000 0x00a00000>; - }; - partition@700000 { - label = "firmware"; - reg = <0x00f00000 0x00100000>; - read-only; - }; - }; - - epld@2,0 { - compatible = "wrs,epld-localbus"; - #address-cells = <2>; - #size-cells = <1>; - reg = <2 0 0x100000>; - ranges = <0 0 5 0 1 // User switches - 1 0 5 1 1 // Board ID/Rev - 3 0 5 3 1>; // LEDs - }; - }; - - soc: soc@f8000000 { - ranges = <0x00000000 0xf8000000 0x00100000>; - - enet0: ethernet@24000 { - tbi-handle = <&tbi0>; - phy-handle = <&phy0>; - phy-connection-type = "rgmii-id"; - }; - - mdio@24520 { - phy0: ethernet-phy@1f { - reg = <0x1f>; - }; - phy1: ethernet-phy@0 { - reg = <0>; - }; - phy2: ethernet-phy@1 { - reg = <1>; - }; - phy3: ethernet-phy@2 { - reg = <2>; - }; - tbi0: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - - enet1: ethernet@25000 { - tbi-handle = <&tbi1>; - phy-handle = <&phy1>; - phy-connection-type = "rgmii-id"; - }; - - mdio@25520 { - tbi1: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - - enet2: ethernet@26000 { - tbi-handle = <&tbi2>; - phy-handle = <&phy2>; - phy-connection-type = "rgmii-id"; - }; - - mdio@26520 { - tbi2: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - - enet3: ethernet@27000 { - tbi-handle = <&tbi3>; - phy-handle = <&phy3>; - phy-connection-type = "rgmii-id"; - }; - - mdio@27520 { - tbi3: tbi-phy@11 { - reg = <0x11>; - device_type = "tbi-phy"; - }; - }; - }; - - pci0: pcie@f8008000 { - reg = <0xf8008000 0x1000>; - ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000 - 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>; - interrupt-map-mask = <0xff00 0 0 7>; - - pcie@0 { - ranges = <0x02000000 0x0 0x80000000 - 0x02000000 0x0 0x80000000 - 0x0 0x20000000 - - 0x01000000 0x0 0x00000000 - 0x01000000 0x0 0x00000000 - 0x0 0x00100000>; - }; - - }; - - pci1: pcie@f8009000 { - reg = <0xf8009000 0x1000>; - ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000 - 0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>; - - pcie@0 { - ranges = <0x02000000 0x0 0xa0000000 - 0x02000000 0x0 0xa0000000 - 0x0 0x20000000 - - 0x01000000 0x0 0x00000000 - 0x01000000 0x0 0x00000000 - 0x0 0x00100000>; - }; - }; -}; - -/include/ "mpc8641si-post.dtsi" diff --git a/arch/powerpc/configs/mpc86xx_base.config b/arch/powerpc/configs/mpc86xx_base.config index 67bd1fa036ee..588870e6af3b 100644 --- a/arch/powerpc/configs/mpc86xx_base.config +++ b/arch/powerpc/configs/mpc86xx_base.config @@ -1,6 +1,5 @@ CONFIG_PPC_86xx=y CONFIG_MPC8641_HPCN=y -CONFIG_SBC8641D=y CONFIG_MPC8610_HPCD=y CONFIG_GEF_PPC9A=y CONFIG_GEF_SBC310=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index ee09f7bb6ea9..6697c5e6682f 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -55,7 +55,6 @@ CONFIG_MPC837x_RDB=y CONFIG_ASP834x=y CONFIG_PPC_86xx=y CONFIG_MPC8641_HPCN=y -CONFIG_SBC8641D=y CONFIG_MPC8610_HPCD=y CONFIG_GEF_SBC610=y CONFIG_CPU_FREQ=y diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 07a9d60c618a..be867abebc83 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -20,12 +20,6 @@ config MPC8641_HPCN help This option enables support for the MPC8641 HPCN board. -config SBC8641D - bool "Wind River SBC8641D" - select DEFAULT_UIMAGE - help - This option enables support for the WRS SBC8641D board. - config MPC8610_HPCD bool "Freescale MPC8610 HPCD" select DEFAULT_UIMAGE @@ -74,7 +68,7 @@ config MPC8641 select FSL_PCI if PCI select PPC_UDBG_16550 select MPIC - default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \ + default y if MPC8641_HPCN || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \ || MVME7100 config MPC8610 diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile index 2c04449be107..5bbe1475bf26 100644 --- a/arch/powerpc/platforms/86xx/Makefile +++ b/arch/powerpc/platforms/86xx/Makefile @@ -6,7 +6,6 @@ obj-y := pic.o common.o obj-$(CONFIG_SMP) += mpc86xx_smp.o obj-$(CONFIG_MPC8641_HPCN) += mpc86xx_hpcn.o -obj-$(CONFIG_SBC8641D) += sbc8641d.o obj-$(CONFIG_MPC8610_HPCD) += mpc8610_hpcd.o obj-$(CONFIG_GEF_SBC610) += gef_sbc610.o obj-$(CONFIG_GEF_SBC310) += gef_sbc310.o diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c deleted file mode 100644 index dc23dd383d6e..000000000000 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ /dev/null @@ -1,87 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SBC8641D board specific routines - * - * Copyright 2008 Wind River Systems Inc. - * - * By Paul Gortmaker (see MAINTAINERS for contact information) - * - * Based largely on the 8641 HPCN support by Freescale Semiconductor Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include "mpc86xx.h" - -static void __init -sbc8641_setup_arch(void) -{ - if (ppc_md.progress) - ppc_md.progress("sbc8641_setup_arch()", 0); - - printk("SBC8641 board from Wind River\n"); - -#ifdef CONFIG_SMP - mpc86xx_smp_init(); -#endif - - fsl_pci_assign_primary(); -} - - -static void -sbc8641_show_cpuinfo(struct seq_file *m) -{ - uint svid = mfspr(SPRN_SVR); - - seq_printf(m, "Vendor\t\t: Wind River Systems\n"); - - seq_printf(m, "SVR\t\t: 0x%x\n", svid); -} - - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init sbc8641_probe(void) -{ - if (of_machine_is_compatible("wind,sbc8641")) - return 1; /* Looks good */ - - return 0; -} - -machine_arch_initcall(sbc8641, mpc86xx_common_publish_devices); - -define_machine(sbc8641) { - .name = "SBC8641D", - .probe = sbc8641_probe, - .setup_arch = sbc8641_setup_arch, - .init_IRQ = mpc86xx_init_irq, - .show_cpuinfo = sbc8641_show_cpuinfo, - .get_irq = mpic_get_irq, - .time_init = mpc86xx_time_init, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif -}; -- cgit From 8149238ffd210875f5a77e3c654bb59b58da35e3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 25 Aug 2021 13:34:45 +0000 Subject: powerpc: Redefine HMT_xxx macros as empty on PPC32 HMT_xxx macros are macros for adjusting thread priority (hardware multi-threading) are macros inherited from PPC64 via commit 5f7c690728ac ("[PATCH] powerpc: Merged ppc_asm.h") Those instructions are pointless on PPC32, but some common fonctions like arch_cpu_idle() use them. So make them empty on PPC32 to avoid those instructions. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c5a07fadea33d640ad10cecf0ac8faaec1c524e0.1629898474.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/processor.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h index e072577bc7c0..8d79f994b4aa 100644 --- a/arch/powerpc/include/asm/vdso/processor.h +++ b/arch/powerpc/include/asm/vdso/processor.h @@ -5,12 +5,21 @@ #ifndef __ASSEMBLY__ /* Macros for adjusting thread priority (hardware multi-threading) */ +#ifdef CONFIG_PPC64 #define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority") #define HMT_low() asm volatile("or 1, 1, 1 # low priority") #define HMT_medium_low() asm volatile("or 6, 6, 6 # medium low priority") #define HMT_medium() asm volatile("or 2, 2, 2 # medium priority") #define HMT_medium_high() asm volatile("or 5, 5, 5 # medium high priority") #define HMT_high() asm volatile("or 3, 3, 3 # high priority") +#else +#define HMT_very_low() +#define HMT_low() +#define HMT_medium_low() +#define HMT_medium() +#define HMT_medium_high() +#define HMT_high() +#endif #ifdef CONFIG_PPC64 #define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) -- cgit From 602d0f96563c2e0b8e1ddb22ac46bf7f58480d64 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 26 Aug 2021 21:56:51 +0930 Subject: powerpc/microwatt: Add Ethernet to device tree The liteeth network device is used in the Microwatt soc. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826122653.3236867-2-joel@jms.id.au --- arch/powerpc/boot/dts/microwatt.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index 974abbdda249..65b270a90f94 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -127,6 +127,18 @@ fifo-size = <16>; interrupts = <0x10 0x1>; }; + + ethernet@8020000 { + compatible = "litex,liteeth"; + reg = <0x8021000 0x100 + 0x8020800 0x100 + 0x8030000 0x2000>; + reg-names = "mac", "mido", "buffer"; + litex,rx-slots = <2>; + litex,tx-slots = <2>; + litex,slot-size = <0x800>; + interrupts = <0x11 0x1>; + }; }; chosen { -- cgit From ef4fcaf99cd27eb48790f2adc4eff456dbe1dec4 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 26 Aug 2021 21:56:52 +0930 Subject: powerpc/configs/microwattt: Enable Liteeth Liteeth is the network device used by Microwatt. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826122653.3236867-3-joel@jms.id.au --- arch/powerpc/configs/microwatt_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index ebc90aefbc0c..6b76232094b2 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -53,6 +53,7 @@ CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_NETDEVICES=y +CONFIG_LITEX_LITEETH=y # CONFIG_WLAN is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set -- cgit From 3e18e271182206c996a3a7efbbe70c66307ef137 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 26 Aug 2021 21:56:53 +0930 Subject: powerpc/configs/microwatt: Enable options for systemd When booting with systemd these options are required. This increases the image by about 50KB, or 2%. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826122653.3236867-4-joel@jms.id.au --- arch/powerpc/configs/microwatt_defconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index 6b76232094b2..9465209b8c5b 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -5,6 +5,7 @@ CONFIG_PREEMPT_VOLUNTARY=y CONFIG_TICK_CPU_ACCOUNTING=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +CONFIG_CGROUPS=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KALLSYMS_ALL=y @@ -78,8 +79,10 @@ CONFIG_SPI_SPIDEV=y CONFIG_EXT4_FS=y # CONFIG_FILE_LOCKING is not set # CONFIG_DNOTIFY is not set -# CONFIG_INOTIFY_USER is not set +CONFIG_AUTOFS_FS=y +CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set +CONFIG_CRYPTO_SHA256=y # CONFIG_CRYPTO_HW is not set # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_IA64 is not set -- cgit From 8efd249babea2fec268cff90b9f5ca723dbb7499 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:33:59 +0530 Subject: powerpc/smp: Fix a crash while booting kvm guest with nr_cpus=2 Aneesh reported a crash with a fairly recent upstream kernel when booting kernel whose commandline was appended with nr_cpus=2 1:mon> e cpu 0x1: Vector: 300 (Data Access) at [c000000008a67bd0] pc: c00000000002557c: cpu_to_chip_id+0x3c/0x100 lr: c000000000058380: start_secondary+0x460/0xb00 sp: c000000008a67e70 msr: 8000000000001033 dar: 10 dsisr: 80000 current = 0xc00000000891bb00 paca = 0xc0000018ff981f80 irqmask: 0x03 irq_happened: 0x01 pid = 0, comm = swapper/1 Linux version 5.13.0-rc3-15704-ga050a6d2b7e8 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #433 SMP Tue May 25 02:38:49 CDT 2021 1:mon> t [link register ] c000000000058380 start_secondary+0x460/0xb00 [c000000008a67e70] c000000008a67eb0 (unreliable) [c000000008a67eb0] c0000000000589d4 start_secondary+0xab4/0xb00 [c000000008a67f90] c00000000000c654 start_secondary_prolog+0x10/0x14 Current code assumes that num_possible_cpus() is always greater than threads_per_core. However this may not be true when using nr_cpus=2 or similar options. Handle the case where num_possible_cpus() is not an exact multiple of threads_per_core. Fixes: c1e53367dab1 ("powerpc/smp: Cache CPU to chip lookup") Reported-by: Aneesh Kumar K.V Signed-off-by: Srikar Dronamraju Debugged-by: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100401.412519-2-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f2abd88e0c25..a3eab89ee572 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1109,7 +1109,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } if (cpu_to_chip_id(boot_cpuid) != -1) { - int idx = num_possible_cpus() / threads_per_core; + int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); /* * All threads of a core will all belong to the same core, -- cgit From b8b928030332a0ca16d42433eb2c3085600d8704 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:34:00 +0530 Subject: powerpc/smp: Update cpu_core_map on all PowerPc systems lscpu() uses core_siblings to list the number of sockets in the system. core_siblings is set using topology_core_cpumask. While optimizing the powerpc bootup path, Commit 4ca234a9cbd7 ("powerpc/smp: Stop updating cpu_core_mask"). it was found that updating cpu_core_mask() ended up taking a lot of time. It was thought that on Powerpc, cpu_core_mask() would always be same as cpu_cpu_mask() i.e number of sockets will always be equal to number of nodes. As an optimization, cpu_core_mask() was made a snapshot of cpu_cpu_mask(). However that was found to be false with PowerPc KVM guests, where each node could have more than one socket. So with Commit c47f892d7aa6 ("powerpc/smp: Reintroduce cpu_core_mask"), cpu_core_mask was updated based on chip_id but in an optimized way using some mask manipulations and chip_id caching. However on non-PowerNV and non-pseries KVM guests (i.e not implementing cpu_to_chip_id(), continued to use a copy of cpu_cpu_mask(). There are two issues that were noticed on such systems 1. lscpu would report one extra socket. On a IBM,9009-42A (aka zz system) which has only 2 chips/ sockets/ nodes, lscpu would report Architecture: ppc64le Byte Order: Little Endian CPU(s): 160 On-line CPU(s) list: 0-159 Thread(s) per core: 8 Core(s) per socket: 6 Socket(s): 3 <-------------- NUMA node(s): 2 Model: 2.2 (pvr 004e 0202) Model name: POWER9 (architected), altivec supported Hypervisor vendor: pHyp Virtualization type: para L1d cache: 32K L1i cache: 32K L2 cache: 512K L3 cache: 10240K NUMA node0 CPU(s): 0-79 NUMA node1 CPU(s): 80-159 2. Currently cpu_cpu_mask is updated when a core is added/removed. However its not updated when smt mode switching or on CPUs are explicitly offlined. However all other percpu masks are updated to ensure only active/online CPUs are in the masks. This results in build_sched_domain traces since there will be CPUs in cpu_cpu_mask() but those CPUs are not present in SMT / CACHE / MC / NUMA domains. A loop of threads running smt mode switching and core add/remove will soon show this trace. Hence cpu_cpu_mask has to be update at smt mode switch. This will have impact on cpu_core_mask(). cpu_core_mask() is a snapshot of cpu_cpu_mask. Different CPUs within the same socket will end up having different cpu_core_masks since they are snapshots at different points of time. This means when lscpu will start reporting many more sockets than the actual number of sockets/ nodes / chips. Different ways to handle this problem: A. Update the snapshot aka cpu_core_mask for all CPUs whenever cpu_cpu_mask is updated. This would a non-optimal solution. B. Instead of a cpumask_var_t, make cpu_core_map a cpumask pointer pointing to cpu_cpu_mask. However percpu cpumask pointer is frowned upon and we need a clean way to handle PowerPc KVM guest which is not a snapshot. C. Update cpu_core_masks all PowerPc systems like in PowerPc KVM guests using mask manipulations. This approach is relatively simple and unifies with the existing code. D. On top of 3, we could also resurrect get_physical_package_id which could return a nid for the said CPU. However this is not needed at this time. Option C is the preferred approach for now. While this is somewhat a revert of Commit 4ca234a9cbd7 ("powerpc/smp: Stop updating cpu_core_mask"). 1. Plain revert has some conflicts 2. For chip_id == -1, the cpu_core_mask is made identical to cpu_cpu_mask, unlike previously where cpu_core_mask was set to a core if chip_id doesn't exist. This goes by the principle that if chip_id is not exposed, then sockets / chip / node share the same set of CPUs. With the fix, lscpu o/p would be Architecture: ppc64le Byte Order: Little Endian CPU(s): 160 On-line CPU(s) list: 0-159 Thread(s) per core: 8 Core(s) per socket: 6 Socket(s): 2 <-------------- NUMA node(s): 2 Model: 2.2 (pvr 004e 0202) Model name: POWER9 (architected), altivec supported Hypervisor vendor: pHyp Virtualization type: para L1d cache: 32K L1i cache: 32K L2 cache: 512K L3 cache: 10240K NUMA node0 CPU(s): 0-79 NUMA node1 CPU(s): 80-159 Fixes: 4ca234a9cbd7 ("powerpc/smp: Stop updating cpu_core_mask") Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100401.412519-3-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a3eab89ee572..4bdc339fff3c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1527,6 +1527,7 @@ static void add_cpu_to_masks(int cpu) * add it to it's own thread sibling mask. */ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) if (cpu_online(i)) @@ -1544,11 +1545,6 @@ static void add_cpu_to_masks(int cpu) if (chip_id_lookup_table && ret) chip_id = cpu_to_chip_id(cpu); - if (chip_id == -1) { - cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu)); - goto out; - } - if (shared_caches) submask_fn = cpu_l2_cache_mask; @@ -1558,6 +1554,10 @@ static void add_cpu_to_masks(int cpu) /* Skip all CPUs already part of current CPU core mask */ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); + /* If chip_id is -1; limit the cpu_core_mask to within DIE*/ + if (chip_id == -1) + cpumask_and(mask, mask, cpu_cpu_mask(cpu)); + for_each_cpu(i, mask) { if (chip_id == cpu_to_chip_id(i)) { or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask); @@ -1567,7 +1567,6 @@ static void add_cpu_to_masks(int cpu) } } -out: free_cpumask_var(mask); } -- cgit From 5bf63497b8ddf53d8973f68076119e482639b2bb Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:34:01 +0530 Subject: powerpc/smp: Enable CACHE domain for shared processor Currently CACHE domain is not enabled on shared processor mode PowerVM LPARS. On PowerVM systems, 'ibm,thread-group' device-tree property 2 under cpu-device-node indicates which all CPUs share L2-cache. However 'ibm,thread-group' device-tree property 2 is a relatively new property. In absence of 'ibm,thread-group' property 2, 'l2-cache' device property under cpu-device-node could help system to identify CPUs sharing L2-cache. However this property is not exposed by PhyP in shared processor mode configurations. In absence of properties that inform OS about which CPUs share L2-cache, fallback on core boundary. Here are some stats from Power9 shared LPAR with the changes. $ lscpu Architecture: ppc64le Byte Order: Little Endian CPU(s): 32 On-line CPU(s) list: 0-31 Thread(s) per core: 8 Core(s) per socket: 1 Socket(s): 3 NUMA node(s): 2 Model: 2.2 (pvr 004e 0202) Model name: POWER9 (architected), altivec supported Hypervisor vendor: pHyp Virtualization type: para L1d cache: 32K L1i cache: 32K NUMA node0 CPU(s): 16-23 NUMA node1 CPU(s): 0-15,24-31 Physical sockets: 2 Physical chips: 1 Physical cores/chip: 10 Before patch $ grep -r . /sys/kernel/debug/sched/domains/cpu0/domain*/name Before /sys/kernel/debug/sched/domains/cpu0/domain0/name:SMT /sys/kernel/debug/sched/domains/cpu0/domain1/name:DIE /sys/kernel/debug/sched/domains/cpu0/domain2/name:NUMA After /sys/kernel/debug/sched/domains/cpu0/domain0/name:SMT /sys/kernel/debug/sched/domains/cpu0/domain1/name:CACHE /sys/kernel/debug/sched/domains/cpu0/domain2/name:DIE /sys/kernel/debug/sched/domains/cpu0/domain3/name:NUMA $ awk '/domain/{print $1, $2}' /proc/schedstat | sort -u | sed -e 's/00000000,//g' Before domain0 00000055 domain0 000000aa domain0 00005500 domain0 0000aa00 domain0 00550000 domain0 00aa0000 domain0 55000000 domain0 aa000000 domain1 00ff0000 domain1 ff00ffff domain2 ffffffff After domain0 00000055 domain0 000000aa domain0 00005500 domain0 0000aa00 domain0 00550000 domain0 00aa0000 domain0 55000000 domain0 aa000000 domain1 000000ff domain1 0000ff00 domain1 00ff0000 domain1 ff000000 domain2 ff00ffff domain2 ffffffff domain3 ffffffff (Lower is better) perf stat -a -r 5 -n perf bench sched pipe | tail -n 2 Before 153.798 +- 0.142 seconds time elapsed ( +- 0.09% ) After 111.545 +- 0.652 seconds time elapsed ( +- 0.58% ) which is an improvement of 27.47% Signed-off-by: Srikar Dronamraju Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100401.412519-4-srikar@linux.vnet.ibm.com --- arch/powerpc/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 4bdc339fff3c..548aa58d25f9 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1400,7 +1400,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) l2_cache = cpu_to_l2cache(cpu); if (!l2_cache || !*mask) { /* Assume only core siblings share cache with this CPU */ - for_each_cpu(i, submask_fn(cpu)) + for_each_cpu(i, cpu_sibling_mask(cpu)) set_cpus_related(cpu, i, cpu_l2_cache_mask); return false; -- cgit From 544af6429777cefae2f8af9a9866df5e8cb21763 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:35:17 +0530 Subject: powerpc/numa: Drop dbg in favour of pr_debug powerpc supported numa=debug which is not documented. This option was used to print early debug output. However something more flexible can be achieved by using CONFIG_DYNAMIC_DEBUG. Hence drop dbg (and numa=debug) in favour of pr_debug Suggested-by: Michael Ellerman Signed-off-by: Srikar Dronamraju [mpe: Rebase on to powerpc/next form2 affinity changes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100521.412639-2-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index fe9c6ced40b2..73ba6b072274 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -40,9 +40,6 @@ static int numa_enabled = 1; static char *cmdline __initdata; -static int numa_debug; -#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } - int numa_cpu_lookup_table[NR_CPUS]; cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; struct pglist_data *node_data[MAX_NUMNODES]; @@ -87,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ - dbg("Node to cpumask map for %u nodes\n", nr_node_ids); + pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids); } static int __init fake_numa_create_new_node(unsigned long end_pfn, @@ -131,7 +128,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, cmdline = p; fake_nid++; *nid = fake_nid; - dbg("created new fake_node with id %d\n", fake_nid); + pr_debug("created new fake_node with id %d\n", fake_nid); return 1; } return 0; @@ -149,7 +146,7 @@ static void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); - dbg("adding cpu %d to node %d\n", cpu, node); + pr_debug("adding cpu %d to node %d\n", cpu, node); if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) cpumask_set_cpu(cpu, node_to_cpumask_map[node]); @@ -160,7 +157,7 @@ static void unmap_cpu_from_node(unsigned long cpu) { int node = numa_cpu_lookup_table[cpu]; - dbg("removing cpu %lu from node %d\n", cpu, node); + pr_debug("removing cpu %lu from node %d\n", cpu, node); if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); @@ -450,10 +447,10 @@ static int __init find_primary_domain_index(void) if (firmware_has_feature(FW_FEATURE_OPAL)) { affinity_form = FORM1_AFFINITY; } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) { - dbg("Using form 2 affinity\n"); + pr_debug("Using form 2 affinity\n"); affinity_form = FORM2_AFFINITY; } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) { - dbg("Using form 1 affinity\n"); + pr_debug("Using form 1 affinity\n"); affinity_form = FORM1_AFFINITY; } else affinity_form = FORM0_AFFINITY; @@ -482,7 +479,7 @@ static int __init find_primary_domain_index(void) &distance_ref_points_depth); if (!distance_ref_points) { - dbg("NUMA: ibm,associativity-reference-points not found.\n"); + pr_debug("NUMA: ibm,associativity-reference-points not found.\n"); goto err; } @@ -928,7 +925,7 @@ static int __init parse_numa_properties(void) return primary_domain_index; } - dbg("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); + pr_debug("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); /* * If it is FORM2 initialize the distance table here. @@ -1251,9 +1248,6 @@ static int __init early_numa(char *p) if (strstr(p, "off")) numa_enabled = 0; - if (strstr(p, "debug")) - numa_debug = 1; - p = strstr(p, "fake="); if (p) cmdline = p + strlen("fake="); @@ -1416,7 +1410,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_SUCCESS: - dbg("VPHN hcall succeeded. Reset polling...\n"); + pr_debug("VPHN hcall succeeded. Reset polling...\n"); goto out; case H_FUNCTION: -- cgit From 506c2075ffd8db352c53201ef166948a272e3bce Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:35:18 +0530 Subject: powerpc/numa: convert printk to pr_xxx Convert the remaining printk to pr_xxx One advantage would be all prints will now have prefix "numa:" from pr_fmt(). [ convert printk(KERN_ERR) to pr_warn : Suggested by Laurent Dufour ] Suggested-by: Michael Ellerman Signed-off-by: Srikar Dronamraju [mpe: Rebase onto powerpc/next, s/WARNING/Warning/] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100521.412639-3-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 73ba6b072274..f4e52581713e 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -162,8 +162,7 @@ static void unmap_cpu_from_node(unsigned long cpu) if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); } else { - printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", - cpu, node); + pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node); } } #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ @@ -479,15 +478,14 @@ static int __init find_primary_domain_index(void) &distance_ref_points_depth); if (!distance_ref_points) { - pr_debug("NUMA: ibm,associativity-reference-points not found.\n"); + pr_debug("ibm,associativity-reference-points not found.\n"); goto err; } distance_ref_points_depth /= sizeof(int); if (affinity_form == FORM0_AFFINITY) { if (distance_ref_points_depth < 2) { - printk(KERN_WARNING "NUMA: " - "short ibm,associativity-reference-points\n"); + pr_warn("short ibm,associativity-reference-points\n"); goto err; } @@ -504,8 +502,8 @@ static int __init find_primary_domain_index(void) * MAX_DISTANCE_REF_POINTS domains. */ if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { - printk(KERN_WARNING "NUMA: distance array capped at " - "%d entries\n", MAX_DISTANCE_REF_POINTS); + pr_warn("distance array capped at %d entries\n", + MAX_DISTANCE_REF_POINTS); distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; } @@ -910,7 +908,7 @@ static int __init parse_numa_properties(void) const __be32 *associativity; if (numa_enabled == 0) { - printk(KERN_WARNING "NUMA disabled by user\n"); + pr_warn("disabled by user\n"); return -1; } @@ -925,7 +923,7 @@ static int __init parse_numa_properties(void) return primary_domain_index; } - pr_debug("NUMA associativity depth for CPU/Memory: %d\n", primary_domain_index); + pr_debug("associativity depth for CPU/Memory: %d\n", primary_domain_index); /* * If it is FORM2 initialize the distance table here. @@ -1038,10 +1036,8 @@ static void __init setup_nonnuma(void) unsigned int nid = 0; int i; - printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", - top_of_ram, total_ram); - printk(KERN_DEBUG "Memory hole size: %ldMB\n", - (top_of_ram - total_ram) >> 20); + pr_debug("Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); + pr_debug("Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { fake_numa_create_new_node(end_pfn, &nid); -- cgit From 544a09ee7434b949552266d20ece538d35535bd5 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:35:19 +0530 Subject: powerpc/numa: Print debug statements only when required Currently, a debug message gets printed every time an attempt to add(remove) a CPU. However this is redundant if the CPU is already added (removed) from the node. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100521.412639-4-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f4e52581713e..52bc63ee6a6a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -146,10 +146,10 @@ static void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); - pr_debug("adding cpu %d to node %d\n", cpu, node); - - if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) + if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) { + pr_debug("adding cpu %d to node %d\n", cpu, node); cpumask_set_cpu(cpu, node_to_cpumask_map[node]); + } } #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) @@ -157,10 +157,9 @@ static void unmap_cpu_from_node(unsigned long cpu) { int node = numa_cpu_lookup_table[cpu]; - pr_debug("removing cpu %lu from node %d\n", cpu, node); - if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); + pr_debug("removing cpu %lu from node %d\n", cpu, node); } else { pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node); } -- cgit From 9a245d0e1f006bc7ccf0285d0d520ed304d00c4a Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Thu, 26 Aug 2021 15:35:20 +0530 Subject: powerpc/numa: Update cpu_cpu_map on CPU online/offline cpu_cpu_map holds all the CPUs in the DIE. However in PowerPC, when onlining/offlining of CPUs, this mask doesn't get updated. This mask is however updated when CPUs are added/removed. So when both operations like online/offline of CPUs and adding/removing of CPUs are done simultaneously, then cpumaps end up broken. WARNING: CPU: 13 PID: 1142 at kernel/sched/topology.c:898 build_sched_domains+0xd48/0x1720 Modules linked in: rpadlpar_io rpaphp mptcp_diag xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag netlink_diag bonding tls nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set rfkill nf_tables nfnetlink pseries_rng xts vmx_crypto uio_pdrv_genirq uio binfmt_misc ip_tables xfs libcrc32c dm_service_time sd_mod t10_pi sg ibmvfc scsi_transport_fc ibmveth dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 13 PID: 1142 Comm: kworker/13:2 Not tainted 5.13.0-rc6+ #28 Workqueue: events cpuset_hotplug_workfn NIP: c0000000001caac8 LR: c0000000001caac4 CTR: 00000000007088ec REGS: c00000005596f220 TRAP: 0700 Not tainted (5.13.0-rc6+) MSR: 8000000000029033 CR: 48828222 XER: 00000009 CFAR: c0000000001ea698 IRQMASK: 0 GPR00: c0000000001caac4 c00000005596f4c0 c000000001c4a400 0000000000000036 GPR04: 00000000fffdffff c00000005596f1d0 0000000000000027 c0000018cfd07f90 GPR08: 0000000000000023 0000000000000001 0000000000000027 c0000018fe68ffe8 GPR12: 0000000000008000 c00000001e9d1880 c00000013a047200 0000000000000800 GPR16: c000000001d3c7d0 0000000000000240 0000000000000048 c000000010aacd18 GPR20: 0000000000000001 c000000010aacc18 c00000013a047c00 c000000139ec2400 GPR24: 0000000000000280 c000000139ec2520 c000000136c1b400 c000000001c93060 GPR28: c00000013a047c20 c000000001d3c6c0 c000000001c978a0 000000000000000d NIP [c0000000001caac8] build_sched_domains+0xd48/0x1720 LR [c0000000001caac4] build_sched_domains+0xd44/0x1720 Call Trace: [c00000005596f4c0] [c0000000001caac4] build_sched_domains+0xd44/0x1720 (unreliable) [c00000005596f670] [c0000000001cc5ec] partition_sched_domains_locked+0x3ac/0x4b0 [c00000005596f710] [c0000000002804e4] rebuild_sched_domains_locked+0x404/0x9e0 [c00000005596f810] [c000000000283e60] rebuild_sched_domains+0x40/0x70 [c00000005596f840] [c000000000284124] cpuset_hotplug_workfn+0x294/0xf10 [c00000005596fc60] [c000000000175040] process_one_work+0x290/0x590 [c00000005596fd00] [c0000000001753c8] worker_thread+0x88/0x620 [c00000005596fda0] [c000000000181704] kthread+0x194/0x1a0 [c00000005596fe10] [c00000000000ccec] ret_from_kernel_thread+0x5c/0x70 Instruction dump: 485af049 60000000 2fa30800 409e0028 80fe0000 e89a00f8 e86100e8 38da0120 7f88e378 7ce53b78 4801fb91 60000000 <0fe00000> 39000000 38e00000 38c00000 Fix this by updating cpu_cpu_map aka cpumask_of_node() on every CPU online/offline. Signed-off-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210826100521.412639-5-srikar@linux.vnet.ibm.com --- arch/powerpc/include/asm/topology.h | 13 +++++++++++++ arch/powerpc/kernel/smp.c | 3 +++ arch/powerpc/mm/numa.c | 7 ++----- 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index a4712ecad3e9..36fcafb1fd6d 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -66,6 +66,11 @@ static inline int early_cpu_to_node(int cpu) int of_drconf_to_nid_single(struct drmem_lmb *lmb); void update_numa_distance(struct device_node *node); +extern void map_cpu_to_node(int cpu, int node); +#ifdef CONFIG_HOTPLUG_CPU +extern void unmap_cpu_from_node(unsigned long cpu); +#endif /* CONFIG_HOTPLUG_CPU */ + #else static inline int early_cpu_to_node(int cpu) { return 0; } @@ -95,6 +100,14 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) } static inline void update_numa_distance(struct device_node *node) {} + +#ifdef CONFIG_SMP +static inline void map_cpu_to_node(int cpu, int node) {} +#ifdef CONFIG_HOTPLUG_CPU +static inline void unmap_cpu_from_node(unsigned long cpu) {} +#endif /* CONFIG_HOTPLUG_CPU */ +#endif /* CONFIG_SMP */ + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 548aa58d25f9..9cc7d3dbf439 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1442,6 +1442,8 @@ static void remove_cpu_from_masks(int cpu) struct cpumask *(*mask_fn)(int) = cpu_sibling_mask; int i; + unmap_cpu_from_node(cpu); + if (shared_caches) mask_fn = cpu_l2_cache_mask; @@ -1526,6 +1528,7 @@ static void add_cpu_to_masks(int cpu) * This CPU will not be in the online mask yet so we need to manually * add it to it's own thread sibling mask. */ + map_cpu_to_node(cpu, cpu_to_node(cpu)); cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); cpumask_set_cpu(cpu, cpu_core_mask(cpu)); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 52bc63ee6a6a..6f14c8fb6359 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -142,7 +142,7 @@ static void reset_numa_cpu_lookup_table(void) numa_cpu_lookup_table[cpu] = -1; } -static void map_cpu_to_node(int cpu, int node) +void map_cpu_to_node(int cpu, int node) { update_numa_cpu_lookup_table(cpu, node); @@ -153,7 +153,7 @@ static void map_cpu_to_node(int cpu, int node) } #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR) -static void unmap_cpu_from_node(unsigned long cpu) +void unmap_cpu_from_node(unsigned long cpu) { int node = numa_cpu_lookup_table[cpu]; @@ -800,9 +800,6 @@ static int ppc_numa_cpu_prepare(unsigned int cpu) static int ppc_numa_cpu_dead(unsigned int cpu) { -#ifdef CONFIG_HOTPLUG_CPU - unmap_cpu_from_node(cpu); -#endif return 0; } -- cgit From 0c634bafe3bbee7a36dca7f1277057e05bf14d91 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:19 -0300 Subject: powerpc/pseries/iommu: Replace hard-coded page shift Some functions assume IOMMU page size can only be 4K (pageshift == 12). Update them to accept any page size passed, so we can use 64K pages. In the process, some defines like TCE_SHIFT were made obsolete, and then removed. IODA3 Revision 3.0_prd1 (OpenPowerFoundation), Figures 3.4 and 3.5 show a RPN of 52-bit, and considers a 12-bit pageshift, so there should be no need of using TCE_RPN_MASK, which masks out any bit after 40 in rpn. It's usage removed from tce_build_pSeries(), tce_build_pSeriesLP(), and tce_buildmulti_pSeriesLP(). Most places had a tbl struct, so using tbl->it_page_shift was simple. tce_free_pSeriesLP() was a special case, since callers not always have a tbl struct, so adding a tceshift parameter seems the right thing to do. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-2-leobras.c@gmail.com --- arch/powerpc/include/asm/tce.h | 8 ------- arch/powerpc/platforms/pseries/iommu.c | 39 ++++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h index db5fc2f2262d..0c34d2756d92 100644 --- a/arch/powerpc/include/asm/tce.h +++ b/arch/powerpc/include/asm/tce.h @@ -19,15 +19,7 @@ #define TCE_VB 0 #define TCE_PCI 1 -/* TCE page size is 4096 bytes (1 << 12) */ - -#define TCE_SHIFT 12 -#define TCE_PAGE_SIZE (1 << TCE_SHIFT) - #define TCE_ENTRY_SIZE 8 /* each TCE is 64 bits */ - -#define TCE_RPN_MASK 0xfffffffffful /* 40-bit RPN (4K pages) */ -#define TCE_RPN_SHIFT 12 #define TCE_VALID 0x800 /* TCE valid */ #define TCE_ALLIO 0x400 /* TCE valid for all lpars */ #define TCE_PCI_WRITE 0x2 /* write from PCI allowed */ diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 0c55b991f665..b1b8d12bab39 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -107,6 +107,8 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, u64 proto_tce; __be64 *tcep; u64 rpn; + const unsigned long tceshift = tbl->it_page_shift; + const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl); proto_tce = TCE_PCI_READ; // Read allowed @@ -117,10 +119,10 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ - rpn = __pa(uaddr) >> TCE_SHIFT; - *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); + rpn = __pa(uaddr) >> tceshift; + *tcep = cpu_to_be64(proto_tce | rpn << tceshift); - uaddr += TCE_PAGE_SIZE; + uaddr += pagesize; tcep++; } return 0; @@ -146,7 +148,7 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(unsigned long liobn, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, @@ -166,12 +168,12 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + tce = proto_tce | rpn << tceshift; rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(liobn, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, tceshift, (npages_start - (npages + 1))); break; } @@ -205,10 +207,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long tcenum_start = tcenum, npages_start = npages; int ret = 0; unsigned long flags; + const unsigned long tceshift = tbl->it_page_shift; if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { return tce_build_pSeriesLP(tbl->it_index, tcenum, - tbl->it_page_shift, npages, uaddr, + tceshift, npages, uaddr, direction, attrs); } @@ -225,13 +228,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, if (!tcep) { local_irq_restore(flags); return tce_build_pSeriesLP(tbl->it_index, tcenum, - tbl->it_page_shift, + tceshift, npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; @@ -245,12 +248,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { - tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); + tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); rpn++; } rc = plpar_tce_put_indirect((u64)tbl->it_index, - (u64)tcenum << 12, + (u64)tcenum << tceshift, (u64)__pa(tcep), limit); @@ -277,12 +280,13 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, + long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); @@ -301,9 +305,11 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n u64 rc; if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) - return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); + return tce_free_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages); - rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << tbl->it_page_shift, 0, npages); if (rc && printk_ratelimit()) { printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); @@ -319,7 +325,8 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) u64 rc; unsigned long tce_ret; - rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret); + rc = plpar_tce_get((u64)tbl->it_index, + (u64)tcenum << tbl->it_page_shift, &tce_ret); if (rc && printk_ratelimit()) { printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); -- cgit From 3c33066a21903076722a2881556a92aa3cd7d359 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:20 -0300 Subject: powerpc/kernel/iommu: Add new iommu_table_in_use() helper Having a function to check if the iommu table has any allocation helps deciding if a tbl can be reset for using a new DMA window. It should be enough to replace all instances of !bitmap_empty(tbl...). iommu_table_in_use() skips reserved memory, so we don't need to worry about releasing it before testing. This causes iommu_table_release_pages() to become unnecessary, given it is only used to remove reserved memory for testing. Also, only allow storing reserved memory values in tbl if they are valid in the table, so there is no need to check it in the new helper. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-3-leobras.c@gmail.com --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/kernel/iommu.c | 61 ++++++++++++++++++++-------------------- 2 files changed, 32 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index deef7c94d7b6..bf3b84128525 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -154,6 +154,7 @@ extern int iommu_tce_table_put(struct iommu_table *tbl); */ extern struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, unsigned long res_start, unsigned long res_end); +bool iommu_table_in_use(struct iommu_table *tbl); #define IOMMU_TABLE_GROUP_MAX_TABLES 2 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 2af89a5e379f..ed98ad63633e 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -690,32 +690,24 @@ static void iommu_table_reserve_pages(struct iommu_table *tbl, if (tbl->it_offset == 0) set_bit(0, tbl->it_map); - tbl->it_reserved_start = res_start; - tbl->it_reserved_end = res_end; - - /* Check if res_start..res_end isn't empty and overlaps the table */ - if (res_start && res_end && - (tbl->it_offset + tbl->it_size < res_start || - res_end < tbl->it_offset)) - return; + if (res_start < tbl->it_offset) + res_start = tbl->it_offset; - for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i) - set_bit(i - tbl->it_offset, tbl->it_map); -} + if (res_end > (tbl->it_offset + tbl->it_size)) + res_end = tbl->it_offset + tbl->it_size; -static void iommu_table_release_pages(struct iommu_table *tbl) -{ - int i; + /* Check if res_start..res_end is a valid range in the table */ + if (res_start >= res_end) { + tbl->it_reserved_start = tbl->it_offset; + tbl->it_reserved_end = tbl->it_offset; + return; + } - /* - * In case we have reserved the first bit, we should not emit - * the warning below. - */ - if (tbl->it_offset == 0) - clear_bit(0, tbl->it_map); + tbl->it_reserved_start = res_start; + tbl->it_reserved_end = res_end; for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i) - clear_bit(i - tbl->it_offset, tbl->it_map); + set_bit(i - tbl->it_offset, tbl->it_map); } /* @@ -779,6 +771,22 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, return tbl; } +bool iommu_table_in_use(struct iommu_table *tbl) +{ + unsigned long start = 0, end; + + /* ignore reserved bit0 */ + if (tbl->it_offset == 0) + start = 1; + end = tbl->it_reserved_start - tbl->it_offset; + if (find_next_bit(tbl->it_map, end, start) != end) + return true; + + start = tbl->it_reserved_end - tbl->it_offset; + end = tbl->it_size; + return find_next_bit(tbl->it_map, end, start) != end; +} + static void iommu_table_free(struct kref *kref) { struct iommu_table *tbl; @@ -795,10 +803,8 @@ static void iommu_table_free(struct kref *kref) iommu_debugfs_del(tbl); - iommu_table_release_pages(tbl); - /* verify that table contains no entries */ - if (!bitmap_empty(tbl->it_map, tbl->it_size)) + if (iommu_table_in_use(tbl)) pr_warn("%s: Unexpected TCEs\n", __func__); /* free bitmap */ @@ -1099,14 +1105,9 @@ int iommu_take_ownership(struct iommu_table *tbl) for (i = 0; i < tbl->nr_pools; i++) spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - iommu_table_release_pages(tbl); - - if (!bitmap_empty(tbl->it_map, tbl->it_size)) { + if (iommu_table_in_use(tbl)) { pr_err("iommu_tce: it_map is not empty"); ret = -EBUSY; - /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */ - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); } else { memset(tbl->it_map, 0xff, sz); } -- cgit From 4ff8677a0b192a58d998d1d34fc5168203041a24 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:21 -0300 Subject: powerpc/pseries/iommu: Add iommu_pseries_alloc_table() helper Creates a helper to allow allocating a new iommu_table without the need to reallocate the iommu_group. This will be helpful for replacing the iommu_table for the new DMA window, after we remove the old one with iommu_tce_table_put(). Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-4-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index b1b8d12bab39..33d82865d6e6 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -53,28 +53,31 @@ enum { DDW_EXT_QUERY_OUT_SIZE = 2 }; -static struct iommu_table_group *iommu_pseries_alloc_group(int node) +static struct iommu_table *iommu_pseries_alloc_table(int node) { - struct iommu_table_group *table_group; struct iommu_table *tbl; - table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, - node); - if (!table_group) - return NULL; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); if (!tbl) - goto free_group; + return NULL; INIT_LIST_HEAD_RCU(&tbl->it_group_list); kref_init(&tbl->it_kref); + return tbl; +} - table_group->tables[0] = tbl; +static struct iommu_table_group *iommu_pseries_alloc_group(int node) +{ + struct iommu_table_group *table_group; + + table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node); + if (!table_group) + return NULL; - return table_group; + table_group->tables[0] = iommu_pseries_alloc_table(node); + if (table_group->tables[0]) + return table_group; -free_group: kfree(table_group); return NULL; } -- cgit From 92a23219299cedde52e3298788484f4875d5ce0f Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:22 -0300 Subject: powerpc/pseries/iommu: Add ddw_list_new_entry() helper There are two functions creating direct_window_list entries in a similar way, so create a ddw_list_new_entry() to avoid duplicity and simplify those functions. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-5-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33d82865d6e6..712d1667144a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -874,6 +874,21 @@ static u64 find_existing_ddw(struct device_node *pdn, int *window_shift) return dma_addr; } +static struct direct_window *ddw_list_new_entry(struct device_node *pdn, + const struct dynamic_dma_window_prop *dma64) +{ + struct direct_window *window; + + window = kzalloc(sizeof(*window), GFP_KERNEL); + if (!window) + return NULL; + + window->device = pdn; + window->prop = dma64; + + return window; +} + static int find_existing_ddw_windows(void) { int len; @@ -886,18 +901,15 @@ static int find_existing_ddw_windows(void) for_each_node_with_property(pdn, DIRECT64_PROPNAME) { direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); - if (!direct64) - continue; - - window = kzalloc(sizeof(*window), GFP_KERNEL); - if (!window || len < sizeof(struct dynamic_dma_window_prop)) { - kfree(window); + if (!direct64 || len < sizeof(*direct64)) { remove_ddw(pdn, true); continue; } - window->device = pdn; - window->prop = direct64; + window = ddw_list_new_entry(pdn, direct64); + if (!window) + break; + spin_lock(&direct_window_list_lock); list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); @@ -1307,7 +1319,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", create.liobn, dn); - window = kzalloc(sizeof(*window), GFP_KERNEL); + window = ddw_list_new_entry(pdn, ddwprop); if (!window) goto out_clear_window; @@ -1326,8 +1338,6 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) goto out_free_window; } - window->device = pdn; - window->prop = ddwprop; spin_lock(&direct_window_list_lock); list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); -- cgit From 2ca73c54ce24489518a56d816331b774044c2445 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:23 -0300 Subject: powerpc/pseries/iommu: Allow DDW windows starting at 0x00 enable_ddw() currently returns the address of the DMA window, which is considered invalid if has the value 0x00. Also, it only considers valid an address returned from find_existing_ddw if it's not 0x00. Changing this behavior makes sense, given the users of enable_ddw() only need to know if direct mapping is possible. It can also allow a DMA window starting at 0x00 to be used. This will be helpful for using a DDW with indirect mapping, as the window address will be different than 0x00, but it will not map the whole partition. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-6-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 712d1667144a..b34b473bbdc1 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -853,25 +853,26 @@ static void remove_ddw(struct device_node *np, bool remove_prop) np, ret); } -static u64 find_existing_ddw(struct device_node *pdn, int *window_shift) +static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift) { struct direct_window *window; const struct dynamic_dma_window_prop *direct64; - u64 dma_addr = 0; + bool found = false; spin_lock(&direct_window_list_lock); /* check if we already created a window and dupe that config if so */ list_for_each_entry(window, &direct_window_list, list) { if (window->device == pdn) { direct64 = window->prop; - dma_addr = be64_to_cpu(direct64->dma_base); + *dma_addr = be64_to_cpu(direct64->dma_base); *window_shift = be32_to_cpu(direct64->window_shift); + found = true; break; } } spin_unlock(&direct_window_list_lock); - return dma_addr; + return found; } static struct direct_window *ddw_list_new_entry(struct device_node *pdn, @@ -1161,20 +1162,20 @@ static int iommu_get_page_shift(u32 query_page_size) * pdn: the parent pe node with the ibm,dma_window property * Future: also check if we can remap the base window for our base page size * - * returns the dma offset for use by the direct mapped DMA code. + * returns true if can map all pages (direct mapping), false otherwise.. */ -static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) +static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) { int len = 0, ret; int max_ram_len = order_base_2(ddw_memory_hotplug_max()); struct ddw_query_response query; struct ddw_create_response create; int page_shift; - u64 dma_addr; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct direct_window *window; struct property *win64; + bool ddw_enabled = false; struct dynamic_dma_window_prop *ddwprop; struct failed_ddw_pdn *fpdn; bool default_win_removed = false; @@ -1186,9 +1187,10 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) mutex_lock(&direct_window_init_mutex); - dma_addr = find_existing_ddw(pdn, &len); - if (dma_addr != 0) + if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { + ddw_enabled = true; goto out_unlock; + } /* * If we already went through this for a previous function of @@ -1342,7 +1344,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dma_addr = be64_to_cpu(ddwprop->dma_base); + dev->dev.archdata.dma_offset = be64_to_cpu(ddwprop->dma_base); + ddw_enabled = true; goto out_unlock; out_free_window: @@ -1374,10 +1377,10 @@ out_unlock: * as RAM, then we failed to create a window to cover persistent * memory and need to set the DMA limit. */ - if (pmem_present && dma_addr && (len == max_ram_len)) - dev->dev.bus_dma_limit = dma_addr + (1ULL << len); + if (pmem_present && ddw_enabled && (len == max_ram_len)) + dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); - return dma_addr; + return ddw_enabled; } static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) @@ -1456,11 +1459,8 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) break; } - if (pdn && PCI_DN(pdn)) { - pdev->dev.archdata.dma_offset = enable_ddw(pdev, pdn); - if (pdev->dev.archdata.dma_offset) - return true; - } + if (pdn && PCI_DN(pdn)) + return enable_ddw(pdev, pdn); return false; } -- cgit From 7ed2ed2db2685a285cb09ab330dc4efea0b64022 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:24 -0300 Subject: powerpc/pseries/iommu: Add ddw_property_create() and refactor enable_ddw() Code used to create a ddw property that was previously scattered in enable_ddw() is now gathered in ddw_property_create(), which deals with allocation and filling the property, letting it ready for of_property_add(), which now occurs in sequence. This created an opportunity to reorganize the second part of enable_ddw(): Without this patch enable_ddw() does, in order: kzalloc() property & members, create_ddw(), fill ddwprop inside property, ddw_list_new_entry(), do tce_setrange_multi_pSeriesLP_walk in all memory, of_add_property(), and list_add(). With this patch enable_ddw() does, in order: create_ddw(), ddw_property_create(), of_add_property(), ddw_list_new_entry(), do tce_setrange_multi_pSeriesLP_walk in all memory, and list_add(). This change requires of_remove_property() in case anything fails after of_add_property(), but we get to do tce_setrange_multi_pSeriesLP_walk in all memory, which looks the most expensive operation, only if everything else succeeds. Also, the error path got remove_ddw() replaced by a new helper __remove_dma_window(), which only removes the new DDW with an rtas-call. For this, a new helper clean_dma_window() was needed to clean anything that could left if walk_system_ram_range() fails. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-7-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 129 +++++++++++++++++++++------------ 1 file changed, 84 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index b34b473bbdc1..00392582fe10 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -795,17 +795,10 @@ static int __init disable_ddw_setup(char *str) early_param("disable_ddw", disable_ddw_setup); -static void remove_dma_window(struct device_node *np, u32 *ddw_avail, - struct property *win) +static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp) { - struct dynamic_dma_window_prop *dwp; - u64 liobn; int ret; - dwp = win->value; - liobn = (u64)be32_to_cpu(dwp->liobn); - - /* clear the whole window, note the arg is in kernel pages */ ret = tce_clearrange_multi_pSeriesLP(0, 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); if (ret) @@ -814,18 +807,39 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail, else pr_debug("%pOF successfully cleared tces in window.\n", np); +} + +/* + * Call only if DMA window is clean. + */ +static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn) +{ + int ret; ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); if (ret) - pr_warn("%pOF: failed to remove direct window: rtas returned " + pr_warn("%pOF: failed to remove DMA window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); else - pr_debug("%pOF: successfully removed direct window: rtas returned " + pr_debug("%pOF: successfully removed DMA window: rtas returned " "%d to ibm,remove-pe-dma-window(%x) %llx\n", np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); } +static void remove_dma_window(struct device_node *np, u32 *ddw_avail, + struct property *win) +{ + struct dynamic_dma_window_prop *dwp; + u64 liobn; + + dwp = win->value; + liobn = (u64)be32_to_cpu(dwp->liobn); + + clean_dma_window(np, dwp); + __remove_dma_window(np, ddw_avail, liobn); +} + static void remove_ddw(struct device_node *np, bool remove_prop) { struct property *win; @@ -1153,6 +1167,35 @@ static int iommu_get_page_shift(u32 query_page_size) return 0; } +static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, + u32 page_shift, u32 window_shift) +{ + struct dynamic_dma_window_prop *ddwprop; + struct property *win64; + + win64 = kzalloc(sizeof(*win64), GFP_KERNEL); + if (!win64) + return NULL; + + win64->name = kstrdup(propname, GFP_KERNEL); + ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL); + win64->value = ddwprop; + win64->length = sizeof(*ddwprop); + if (!win64->name || !win64->value) { + kfree(win64->name); + kfree(win64->value); + kfree(win64); + return NULL; + } + + ddwprop->liobn = cpu_to_be32(liobn); + ddwprop->dma_base = cpu_to_be64(dma_addr); + ddwprop->tce_shift = cpu_to_be32(page_shift); + ddwprop->window_shift = cpu_to_be32(window_shift); + + return win64; +} + /* * If the PE supports dynamic dma windows, and there is space for a table * that can map all pages in a linear offset, then setup such a table, @@ -1171,12 +1214,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct ddw_query_response query; struct ddw_create_response create; int page_shift; + u64 win_addr; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct direct_window *window; struct property *win64; bool ddw_enabled = false; - struct dynamic_dma_window_prop *ddwprop; struct failed_ddw_pdn *fpdn; bool default_win_removed = false; bool pmem_present; @@ -1293,72 +1336,68 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) 1ULL << page_shift); goto out_failed; } - win64 = kzalloc(sizeof(struct property), GFP_KERNEL); - if (!win64) { - dev_info(&dev->dev, - "couldn't allocate property for 64bit dma window\n"); - goto out_failed; - } - win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); - win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); - win64->length = sizeof(*ddwprop); - if (!win64->name || !win64->value) { - dev_info(&dev->dev, - "couldn't allocate property name and value\n"); - goto out_free_prop; - } ret = create_ddw(dev, ddw_avail, &create, page_shift, len); if (ret != 0) - goto out_free_prop; - - ddwprop->liobn = cpu_to_be32(create.liobn); - ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) | - create.addr_lo); - ddwprop->tce_shift = cpu_to_be32(page_shift); - ddwprop->window_shift = cpu_to_be32(len); + goto out_failed; dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", create.liobn, dn); - window = ddw_list_new_entry(pdn, ddwprop); + win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; + win64 = ddw_property_create(DIRECT64_PROPNAME, create.liobn, win_addr, + page_shift, len); + if (!win64) { + dev_info(&dev->dev, + "couldn't allocate property, property name, or value\n"); + goto out_remove_win; + } + + ret = of_add_property(pdn, win64); + if (ret) { + dev_err(&dev->dev, "unable to add dma window property for %pOF: %d", + pdn, ret); + goto out_free_prop; + } + + window = ddw_list_new_entry(pdn, win64->value); if (!window) - goto out_clear_window; + goto out_del_prop; ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); if (ret) { dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n", dn, ret); - goto out_free_window; - } - ret = of_add_property(pdn, win64); - if (ret) { - dev_err(&dev->dev, "unable to add dma window property for %pOF: %d", - pdn, ret); - goto out_free_window; + /* Make sure to clean DDW if any TCE was set*/ + clean_dma_window(pdn, win64->value); + goto out_del_list; } spin_lock(&direct_window_list_lock); list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); - dev->dev.archdata.dma_offset = be64_to_cpu(ddwprop->dma_base); + dev->dev.archdata.dma_offset = win_addr; ddw_enabled = true; goto out_unlock; -out_free_window: +out_del_list: kfree(window); -out_clear_window: - remove_ddw(pdn, true); +out_del_prop: + of_remove_property(pdn, win64); out_free_prop: kfree(win64->name); kfree(win64->value); kfree(win64); +out_remove_win: + /* DDW is clean, so it's ok to call this directly. */ + __remove_dma_window(pdn, ddw_avail, create.liobn); + out_failed: if (default_win_removed) reset_dma_window(dev, pdn); -- cgit From fc8cba8f989fb98e496b33a78476861e246c42a0 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:25 -0300 Subject: powerpc/pseries/iommu: Reorganize iommu_table_setparms*() with new helper Add a new helper _iommu_table_setparms(), and use it in iommu_table_setparms() and iommu_table_setparms_lpar() to avoid duplicated code. Also, setting tbl->it_ops was happening outsite iommu_table_setparms*(), so move it to the new helper. Since we need the iommu_table_ops to be declared before used, declare iommu_table_lpar_multi_ops and iommu_table_pseries_ops to before their respective iommu_table_setparms*(). Signed-off-by: Leonardo Bras Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-8-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 72 ++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 00392582fe10..a47f59a8f107 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -501,6 +501,24 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); } +static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno, + unsigned long liobn, unsigned long win_addr, + unsigned long window_size, unsigned long page_shift, + void *base, struct iommu_table_ops *table_ops) +{ + tbl->it_busno = busno; + tbl->it_index = liobn; + tbl->it_offset = win_addr >> page_shift; + tbl->it_size = window_size >> page_shift; + tbl->it_page_shift = page_shift; + tbl->it_base = (unsigned long)base; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; + tbl->it_ops = table_ops; +} + +struct iommu_table_ops iommu_table_pseries_ops; + static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl) @@ -509,8 +527,13 @@ static void iommu_table_setparms(struct pci_controller *phb, const unsigned long *basep; const u32 *sizep; - node = phb->dn; + /* Test if we are going over 2GB of DMA space */ + if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) { + udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + } + node = phb->dn; basep = of_get_property(node, "linux,tce-base", NULL); sizep = of_get_property(node, "linux,tce-size", NULL); if (basep == NULL || sizep == NULL) { @@ -519,33 +542,18 @@ static void iommu_table_setparms(struct pci_controller *phb, return; } - tbl->it_base = (unsigned long)__va(*basep); + iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur, + phb->dma_window_size, IOMMU_PAGE_SHIFT_4K, + __va(*basep), &iommu_table_pseries_ops); if (!is_kdump_kernel()) memset((void *)tbl->it_base, 0, *sizep); - tbl->it_busno = phb->bus->number; - tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; - - /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur >> tbl->it_page_shift; - - /* Test if we are going over 2GB of DMA space */ - if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { - udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); - panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); - } - phb->dma_window_base_cur += phb->dma_window_size; - - /* Set the tce table size - measured in entries */ - tbl->it_size = phb->dma_window_size >> tbl->it_page_shift; - - tbl->it_index = 0; - tbl->it_blocksize = 16; - tbl->it_type = TCE_PCI; } +struct iommu_table_ops iommu_table_lpar_multi_ops; + /* * iommu_table_setparms_lpar * @@ -557,17 +565,13 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, struct iommu_table_group *table_group, const __be32 *dma_window) { - unsigned long offset, size; + unsigned long offset, size, liobn; - of_parse_dma_window(dn, dma_window, &tbl->it_index, &offset, &size); + of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); + + iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, + &iommu_table_lpar_multi_ops); - tbl->it_busno = phb->bus->number; - tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; - tbl->it_base = 0; - tbl->it_blocksize = 16; - tbl->it_type = TCE_PCI; - tbl->it_offset = offset >> tbl->it_page_shift; - tbl->it_size = size >> tbl->it_page_shift; table_group->tce32_start = offset; table_group->tce32_size = size; @@ -647,7 +651,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) tbl = pci->table_group->tables[0]; iommu_table_setparms(pci->phb, dn, tbl); - tbl->it_ops = &iommu_table_pseries_ops; + if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) panic("Failed to initialize iommu table"); @@ -730,7 +734,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) tbl = ppci->table_group->tables[0]; iommu_table_setparms_lpar(ppci->phb, pdn, tbl, ppci->table_group, dma_window); - tbl->it_ops = &iommu_table_lpar_multi_ops; + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) panic("Failed to initialize iommu table"); iommu_register_group(ppci->table_group, @@ -760,7 +764,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); - tbl->it_ops = &iommu_table_pseries_ops; + if (!iommu_init_table(tbl, phb->node, 0, 0)) panic("Failed to initialize iommu table"); @@ -1461,7 +1465,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) tbl = pci->table_group->tables[0]; iommu_table_setparms_lpar(pci->phb, pdn, tbl, pci->table_group, dma_window); - tbl->it_ops = &iommu_table_lpar_multi_ops; + iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, pci_domain_nr(pci->phb->bus), 0); -- cgit From a5fd95120c653962a9e75e260a35436b96d2c991 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:26 -0300 Subject: powerpc/pseries/iommu: Update remove_dma_window() to accept property name Update remove_dma_window() so it can be used to remove DDW with a given property name. This enables the creation of new property names for DDW, so we can have different usage for it, like indirect mapping. Also, add return values to it so we can check if the property was found while removing the active DDW. This allows skipping the remaining property names while reducing the impact of multiple property names. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-9-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index a47f59a8f107..901f290999d0 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -844,31 +844,33 @@ static void remove_dma_window(struct device_node *np, u32 *ddw_avail, __remove_dma_window(np, ddw_avail, liobn); } -static void remove_ddw(struct device_node *np, bool remove_prop) +static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) { struct property *win; u32 ddw_avail[DDW_APPLICABLE_SIZE]; int ret = 0; + win = of_find_property(np, win_name, NULL); + if (!win) + return -EINVAL; + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", &ddw_avail[0], DDW_APPLICABLE_SIZE); if (ret) - return; + return 0; - win = of_find_property(np, DIRECT64_PROPNAME, NULL); - if (!win) - return; if (win->length >= sizeof(struct dynamic_dma_window_prop)) remove_dma_window(np, ddw_avail, win); if (!remove_prop) - return; + return 0; ret = of_remove_property(np, win); if (ret) pr_warn("%pOF: failed to remove direct window property: %d\n", np, ret); + return 0; } static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift) @@ -921,7 +923,7 @@ static int find_existing_ddw_windows(void) for_each_node_with_property(pdn, DIRECT64_PROPNAME) { direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); if (!direct64 || len < sizeof(*direct64)) { - remove_ddw(pdn, true); + remove_ddw(pdn, true, DIRECT64_PROPNAME); continue; } @@ -1565,7 +1567,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti * we have to remove the property when releasing * the device node. */ - remove_ddw(np, false); + remove_ddw(np, false, DIRECT64_PROPNAME); if (pci && pci->table_group) iommu_pseries_free_group(pci->table_group, np->full_name); -- cgit From 8599395d34f2dd7b77bef42da1d99798e7a3d58f Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:27 -0300 Subject: powerpc/pseries/iommu: Find existing DDW with given property name At the moment pseries stores information about created directly mapped DDW window in DIRECT64_PROPNAME. With the objective of implementing indirect DMA mapping with DDW, it's necessary to have another propriety name to make sure kexec'ing into older kernels does not break, as it would if we reuse DIRECT64_PROPNAME. In order to have this, find_existing_ddw_windows() needs to be able to look for different property names. Extract find_existing_ddw_windows() into find_existing_ddw_windows_named() and calls it with current property name. Signed-off-by: Leonardo Bras Reviewed-by: Alexey Kardashevskiy Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-10-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 901f290999d0..e11c00b2dc1e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -910,24 +910,21 @@ static struct direct_window *ddw_list_new_entry(struct device_node *pdn, return window; } -static int find_existing_ddw_windows(void) +static void find_existing_ddw_windows_named(const char *name) { int len; struct device_node *pdn; struct direct_window *window; - const struct dynamic_dma_window_prop *direct64; - - if (!firmware_has_feature(FW_FEATURE_LPAR)) - return 0; + const struct dynamic_dma_window_prop *dma64; - for_each_node_with_property(pdn, DIRECT64_PROPNAME) { - direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); - if (!direct64 || len < sizeof(*direct64)) { - remove_ddw(pdn, true, DIRECT64_PROPNAME); + for_each_node_with_property(pdn, name) { + dma64 = of_get_property(pdn, name, &len); + if (!dma64 || len < sizeof(*dma64)) { + remove_ddw(pdn, true, name); continue; } - window = ddw_list_new_entry(pdn, direct64); + window = ddw_list_new_entry(pdn, dma64); if (!window) break; @@ -935,6 +932,14 @@ static int find_existing_ddw_windows(void) list_add(&window->list, &direct_window_list); spin_unlock(&direct_window_list_lock); } +} + +static int find_existing_ddw_windows(void) +{ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + find_existing_ddw_windows_named(DIRECT64_PROPNAME); return 0; } -- cgit From 381ceda88c4c4c8345cad1cffa6328892f15dca6 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:28 -0300 Subject: powerpc/pseries/iommu: Make use of DDW for indirect mapping So far it's assumed possible to map the guest RAM 1:1 to the bus, which works with a small number of devices. SRIOV changes it as the user can configure hundreds VFs and since phyp preallocates TCEs and does not allow IOMMU pages bigger than 64K, it has to limit the number of TCEs per a PE to limit waste of physical pages. As of today, if the assumed direct mapping is not possible, DDW creation is skipped and the default DMA window "ibm,dma-window" is used instead. By using DDW, indirect mapping can get more TCEs than available for the default DMA window, and also get access to using much larger pagesizes (16MB as implemented in qemu vs 4k from default DMA window), causing a significant increase on the maximum amount of memory that can be IOMMU mapped at the same time. Indirect mapping will only be used if direct mapping is not a possibility. For indirect mapping, it's necessary to re-create the iommu_table with the new DMA window parameters, so iommu_alloc() can use it. Removing the default DMA window for using DDW with indirect mapping is only allowed if there is no current IOMMU memory allocated in the iommu_table. enable_ddw() is aborted otherwise. Even though there won't be both direct and indirect mappings at the same time, we can't reuse the DIRECT64_PROPNAME property name, or else an older kexec()ed kernel can assume direct mapping, and skip iommu_alloc(), causing undesirable behavior. So a new property name DMA64_PROPNAME "linux,dma64-ddr-window-info" was created to represent a DDW that does not allow direct mapping. Signed-off-by: Leonardo Bras Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-11-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 89 ++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e11c00b2dc1e..0eccc29f5573 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -375,6 +375,7 @@ static DEFINE_SPINLOCK(direct_window_list_lock); /* protects initializing window twice for same device */ static DEFINE_MUTEX(direct_window_init_mutex); #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" +#define DMA64_PROPNAME "linux,dma64-ddr-window-info" static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, unsigned long num_pfn, const void *arg) @@ -940,6 +941,7 @@ static int find_existing_ddw_windows(void) return 0; find_existing_ddw_windows_named(DIRECT64_PROPNAME); + find_existing_ddw_windows_named(DMA64_PROPNAME); return 0; } @@ -1226,14 +1228,17 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) struct ddw_create_response create; int page_shift; u64 win_addr; + const char *win_name; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; struct direct_window *window; struct property *win64; bool ddw_enabled = false; struct failed_ddw_pdn *fpdn; - bool default_win_removed = false; + bool default_win_removed = false, direct_mapping = false; bool pmem_present; + struct pci_dn *pci = PCI_DN(pdn); + struct iommu_table *tbl = pci->table_group->tables[0]; dn = of_find_node_by_type(NULL, "ibm,pmemory"); pmem_present = dn != NULL; @@ -1242,6 +1247,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) mutex_lock(&direct_window_init_mutex); if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { + direct_mapping = (len >= max_ram_len); ddw_enabled = true; goto out_unlock; } @@ -1322,8 +1328,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) query.page_size); goto out_failed; } - /* verify the window * number of ptes will map the partition */ - /* check largest block * page size > max memory hotplug addr */ + + /* * The "ibm,pmemory" can appear anywhere in the address space. * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS @@ -1339,13 +1345,25 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_info(&dev->dev, "Skipping ibm,pmemory"); } + /* check if the available block * number of ptes will map everything */ if (query.largest_available_block < (1ULL << (len - page_shift))) { dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu %llu-sized pages\n", 1ULL << len, query.largest_available_block, 1ULL << page_shift); - goto out_failed; + + /* DDW + IOMMU on single window may fail if there is any allocation */ + if (default_win_removed && iommu_table_in_use(tbl)) { + dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); + goto out_failed; + } + + len = order_base_2(query.largest_available_block << page_shift); + win_name = DMA64_PROPNAME; + } else { + direct_mapping = true; + win_name = DIRECT64_PROPNAME; } ret = create_ddw(dev, ddw_avail, &create, page_shift, len); @@ -1356,8 +1374,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) create.liobn, dn); win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; - win64 = ddw_property_create(DIRECT64_PROPNAME, create.liobn, win_addr, - page_shift, len); + win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len); + if (!win64) { dev_info(&dev->dev, "couldn't allocate property, property name, or value\n"); @@ -1375,15 +1393,54 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (!window) goto out_del_prop; - ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, - win64->value, tce_setrange_multi_pSeriesLP_walk); - if (ret) { - dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n", - dn, ret); + if (direct_mapping) { + /* DDW maps the whole partition, so enable direct DMA mapping */ + ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, + win64->value, tce_setrange_multi_pSeriesLP_walk); + if (ret) { + dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n", + dn, ret); /* Make sure to clean DDW if any TCE was set*/ clean_dma_window(pdn, win64->value); - goto out_del_list; + goto out_del_list; + } + } else { + struct iommu_table *newtbl; + int i; + + for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { + const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; + + /* Look for MMIO32 */ + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) + break; + } + + if (i == ARRAY_SIZE(pci->phb->mem_resources)) + goto out_del_list; + + /* New table for using DDW instead of the default DMA window */ + newtbl = iommu_pseries_alloc_table(pci->phb->node); + if (!newtbl) { + dev_dbg(&dev->dev, "couldn't create new IOMMU table\n"); + goto out_del_list; + } + + iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, + 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); + iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start, + pci->phb->mem_resources[i].end); + + pci->table_group->tables[1] = newtbl; + + /* Keep default DMA window stuct if removed */ + if (default_win_removed) { + tbl->it_size = 0; + kfree(tbl->it_map); + } + + set_iommu_table_base(&dev->dev, newtbl); } spin_lock(&direct_window_list_lock); @@ -1427,10 +1484,10 @@ out_unlock: * as RAM, then we failed to create a window to cover persistent * memory and need to set the DMA limit. */ - if (pmem_present && ddw_enabled && (len == max_ram_len)) + if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len) dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); - return ddw_enabled; + return ddw_enabled && direct_mapping; } static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) @@ -1572,7 +1629,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti * we have to remove the property when releasing * the device node. */ - remove_ddw(np, false, DIRECT64_PROPNAME); + if (remove_ddw(np, false, DIRECT64_PROPNAME)) + remove_ddw(np, false, DMA64_PROPNAME); + if (pci && pci->table_group) iommu_pseries_free_group(pci->table_group, np->full_name); -- cgit From 57dbbe590f152e5e8a3ff8bf5ba163df34eeae0b Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 17 Aug 2021 03:39:29 -0300 Subject: powerpc/pseries/iommu: Rename "direct window" to "dma window" A previous change introduced the usage of DDW as a bigger indirect DMA mapping when the DDW available size does not map the whole partition. As most of the code that manipulates direct mappings was reused for indirect mappings, it's necessary to rename all names and debug/info messages to reflect that it can be used for both kinds of mapping. This should cause no behavioural change, just adjust naming. Signed-off-by: Leonardo Bras Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210817063929.38701-12-leobras.c@gmail.com --- arch/powerpc/platforms/pseries/iommu.c | 87 ++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 0eccc29f5573..dab5c56ffd0e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -349,7 +349,7 @@ struct dynamic_dma_window_prop { __be32 window_shift; /* ilog2(tce_window_size) */ }; -struct direct_window { +struct dma_win { struct device_node *device; const struct dynamic_dma_window_prop *prop; struct list_head list; @@ -369,11 +369,11 @@ struct ddw_create_response { u32 addr_lo; }; -static LIST_HEAD(direct_window_list); +static LIST_HEAD(dma_win_list); /* prevents races between memory on/offline and window creation */ -static DEFINE_SPINLOCK(direct_window_list_lock); +static DEFINE_SPINLOCK(dma_win_list_lock); /* protects initializing window twice for same device */ -static DEFINE_MUTEX(direct_window_init_mutex); +static DEFINE_MUTEX(dma_win_init_mutex); #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" #define DMA64_PROPNAME "linux,dma64-ddr-window-info" @@ -713,7 +713,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - /* Find nearest ibm,dma-window, walking up the device tree */ + /* + * Find nearest ibm,dma-window (default DMA window), walking up the + * device tree + */ for (pdn = dn; pdn != NULL; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window != NULL) @@ -869,37 +872,37 @@ static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_ ret = of_remove_property(np, win); if (ret) - pr_warn("%pOF: failed to remove direct window property: %d\n", + pr_warn("%pOF: failed to remove DMA window property: %d\n", np, ret); return 0; } static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift) { - struct direct_window *window; - const struct dynamic_dma_window_prop *direct64; + struct dma_win *window; + const struct dynamic_dma_window_prop *dma64; bool found = false; - spin_lock(&direct_window_list_lock); + spin_lock(&dma_win_list_lock); /* check if we already created a window and dupe that config if so */ - list_for_each_entry(window, &direct_window_list, list) { + list_for_each_entry(window, &dma_win_list, list) { if (window->device == pdn) { - direct64 = window->prop; - *dma_addr = be64_to_cpu(direct64->dma_base); - *window_shift = be32_to_cpu(direct64->window_shift); + dma64 = window->prop; + *dma_addr = be64_to_cpu(dma64->dma_base); + *window_shift = be32_to_cpu(dma64->window_shift); found = true; break; } } - spin_unlock(&direct_window_list_lock); + spin_unlock(&dma_win_list_lock); return found; } -static struct direct_window *ddw_list_new_entry(struct device_node *pdn, - const struct dynamic_dma_window_prop *dma64) +static struct dma_win *ddw_list_new_entry(struct device_node *pdn, + const struct dynamic_dma_window_prop *dma64) { - struct direct_window *window; + struct dma_win *window; window = kzalloc(sizeof(*window), GFP_KERNEL); if (!window) @@ -915,7 +918,7 @@ static void find_existing_ddw_windows_named(const char *name) { int len; struct device_node *pdn; - struct direct_window *window; + struct dma_win *window; const struct dynamic_dma_window_prop *dma64; for_each_node_with_property(pdn, name) { @@ -929,9 +932,9 @@ static void find_existing_ddw_windows_named(const char *name) if (!window) break; - spin_lock(&direct_window_list_lock); - list_add(&window->list, &direct_window_list); - spin_unlock(&direct_window_list_lock); + spin_lock(&dma_win_list_lock); + list_add(&window->list, &dma_win_list); + spin_unlock(&dma_win_list_lock); } } @@ -1231,7 +1234,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) const char *win_name; struct device_node *dn; u32 ddw_avail[DDW_APPLICABLE_SIZE]; - struct direct_window *window; + struct dma_win *window; struct property *win64; bool ddw_enabled = false; struct failed_ddw_pdn *fpdn; @@ -1244,7 +1247,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) pmem_present = dn != NULL; of_node_put(dn); - mutex_lock(&direct_window_init_mutex); + mutex_lock(&dma_win_init_mutex); if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) { direct_mapping = (len >= max_ram_len); @@ -1324,8 +1327,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) page_shift = iommu_get_page_shift(query.page_size); if (!page_shift) { - dev_dbg(&dev->dev, "no supported direct page size in mask %x", - query.page_size); + dev_dbg(&dev->dev, "no supported page size in mask %x", + query.page_size); goto out_failed; } @@ -1384,7 +1387,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) ret = of_add_property(pdn, win64); if (ret) { - dev_err(&dev->dev, "unable to add dma window property for %pOF: %d", + dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d", pdn, ret); goto out_free_prop; } @@ -1398,7 +1401,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, win64->value, tce_setrange_multi_pSeriesLP_walk); if (ret) { - dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n", + dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", dn, ret); /* Make sure to clean DDW if any TCE was set*/ @@ -1443,9 +1446,9 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) set_iommu_table_base(&dev->dev, newtbl); } - spin_lock(&direct_window_list_lock); - list_add(&window->list, &direct_window_list); - spin_unlock(&direct_window_list_lock); + spin_lock(&dma_win_list_lock); + list_add(&window->list, &dma_win_list); + spin_unlock(&dma_win_list_lock); dev->dev.archdata.dma_offset = win_addr; ddw_enabled = true; @@ -1477,7 +1480,7 @@ out_failed: list_add(&fpdn->list, &failed_ddw_pdn_list); out_unlock: - mutex_unlock(&direct_window_init_mutex); + mutex_unlock(&dma_win_init_mutex); /* * If we have persistent memory and the window size is only as big @@ -1575,29 +1578,29 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct direct_window *window; + struct dma_win *window; struct memory_notify *arg = data; int ret = 0; switch (action) { case MEM_GOING_ONLINE: - spin_lock(&direct_window_list_lock); - list_for_each_entry(window, &direct_window_list, list) { + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, arg->nr_pages, window->prop); /* XXX log error */ } - spin_unlock(&direct_window_list_lock); + spin_unlock(&dma_win_list_lock); break; case MEM_CANCEL_ONLINE: case MEM_OFFLINE: - spin_lock(&direct_window_list_lock); - list_for_each_entry(window, &direct_window_list, list) { + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, arg->nr_pages, window->prop); /* XXX log error */ } - spin_unlock(&direct_window_list_lock); + spin_unlock(&dma_win_list_lock); break; default: break; @@ -1618,7 +1621,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti struct of_reconfig_data *rd = data; struct device_node *np = rd->dn; struct pci_dn *pci = PCI_DN(np); - struct direct_window *window; + struct dma_win *window; switch (action) { case OF_RECONFIG_DETACH_NODE: @@ -1636,15 +1639,15 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti iommu_pseries_free_group(pci->table_group, np->full_name); - spin_lock(&direct_window_list_lock); - list_for_each_entry(window, &direct_window_list, list) { + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { if (window->device == np) { list_del(&window->list); kfree(window); break; } } - spin_unlock(&direct_window_list_lock); + spin_unlock(&dma_win_list_lock); break; default: err = NOTIFY_DONE; -- cgit From b76dd9302af7803aac62243ae94d53067fc819b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jun 2021 10:34:34 +0200 Subject: um: make PCI emulation driver init/exit static The functions aren't used elsewhere, so they can be static. Reported-by: kernel test robot Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Johannes Berg Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/virt-pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 0b802834f40a..70c17de16662 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -810,7 +810,7 @@ void *pci_root_bus_fwnode(struct pci_bus *bus) return um_pci_fwnode; } -int um_pci_init(void) +static int um_pci_init(void) { int err, i; @@ -884,7 +884,7 @@ free: } module_init(um_pci_init); -void um_pci_exit(void) +static void um_pci_exit(void) { unregister_virtio_driver(&um_pci_virtio_driver); irq_domain_remove(um_pci_msi_domain); -- cgit From 21976f2b747edd9350336cdd6700b792a3bc2170 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jun 2021 10:34:36 +0200 Subject: um: virtio_uml: include linux/virtio-uml.h This fixes a sparse warning, since the function defined here should have a declaration (or be static). Reported-by: kernel test robot Fixes: 43c590cb8666 ("um: virtio/pci: enable suspend/resume") Signed-off-by: Johannes Berg Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 4412d6febade..cb79fe33d84e 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include -- cgit From 7ad28e0df7ee9dbcb793bb88dd81d4d22bb9a10e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Jun 2021 10:34:37 +0200 Subject: um: virtio_uml: fix memory leak on init failures If initialization fails, e.g. because the connection failed, we leak the 'vu_dev'. Fix that. Reported by smatch. Fixes: 5d38f324993f ("um: drivers: Add virtio vhost-user driver") Signed-off-by: Johannes Berg Acked-By: Anton Ivanov Signed-off-by: Richard Weinberger --- arch/um/drivers/virtio_uml.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index cb79fe33d84e..d51e445df797 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1140,7 +1140,7 @@ static int virtio_uml_probe(struct platform_device *pdev) rc = os_connect_socket(pdata->socket_path); } while (rc == -EINTR); if (rc < 0) - return rc; + goto error_free; vu_dev->sock = rc; spin_lock_init(&vu_dev->sock_lock); @@ -1161,6 +1161,8 @@ static int virtio_uml_probe(struct platform_device *pdev) error_init: os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); return rc; } -- cgit From 4a22c4cebd61f67bd8a2c79f8dae10df074f62b9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Aug 2021 16:58:25 +0200 Subject: um: virt-pci: don't do DMA from stack When enabling VMAP_STACK, SG helpers immediately complained that we were doing DMA from stack. Use per-CPU variables to avoid that. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/drivers/virt-pci.c | 104 +++++++++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 70c17de16662..c08066633023 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -56,6 +56,13 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; #define UM_VIRT_PCI_MAXDELAY 40000 +struct um_pci_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; + +static struct um_pci_message_buffer __percpu *um_pci_msg_bufs; + static int um_pci_send_cmd(struct um_pci_device *dev, struct virtio_pcidev_msg *cmd, unsigned int cmd_size, @@ -68,11 +75,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev, [1] = extra ? &extra_sg : &in_sg, [2] = extra ? &in_sg : NULL, }; + struct um_pci_message_buffer *buf; int delay_count = 0; int ret, len; bool posted; - if (WARN_ON(cmd_size < sizeof(*cmd))) + if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) return -EINVAL; switch (cmd->op) { @@ -88,6 +96,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev, break; } + buf = get_cpu_var(um_pci_msg_bufs); + memcpy(buf, cmd, cmd_size); + if (posted) { u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); @@ -102,7 +113,10 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } else { /* try without allocating memory */ posted = false; + cmd = (void *)buf; } + } else { + cmd = (void *)buf; } sg_init_one(&out_sg, cmd, cmd_size); @@ -118,11 +132,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev, posted ? cmd : HANDLE_NO_FREE(cmd), GFP_ATOMIC); if (ret) - return ret; + goto out; if (posted) { virtqueue_kick(dev->cmd_vq); - return 0; + ret = 0; + goto out; } /* kick and poll for getting a response on the queue */ @@ -148,6 +163,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } clear_bit(UM_PCI_STAT_WAITING, &dev->status); +out: + put_cpu_var(um_pci_msg_bufs); return ret; } @@ -161,12 +178,17 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, .size = size, .addr = offset, }; - /* maximum size - we may only use parts of it */ - u8 data[8]; + /* buf->data is maximum size - we may only use parts of it */ + struct um_pci_message_buffer *buf; + u8 *data; + unsigned long ret = ~0ULL; if (!dev) return ~0ULL; + buf = get_cpu_var(um_pci_msg_bufs); + data = buf->data; + memset(data, 0xff, sizeof(data)); switch (size) { @@ -179,27 +201,34 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - return ~0ULL; + goto out; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, - data, sizeof(data))) - return ~0ULL; + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8)) + goto out; switch (size) { case 1: - return data[0]; + ret = data[0]; + break; case 2: - return le16_to_cpup((void *)data); + ret = le16_to_cpup((void *)data); + break; case 4: - return le32_to_cpup((void *)data); + ret = le32_to_cpup((void *)data); + break; #ifdef CONFIG_64BIT case 8: - return le64_to_cpup((void *)data); + ret = le64_to_cpup((void *)data); + break; #endif default: - return ~0ULL; + break; } + +out: + put_cpu_var(um_pci_msg_bufs); + return ret; } static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, @@ -272,8 +301,13 @@ static void um_pci_bar_copy_from(void *priv, void *buffer, static unsigned long um_pci_bar_read(void *priv, unsigned int offset, int size) { - /* maximum size - we may only use parts of it */ - u8 data[8]; + /* buf->data is maximum size - we may only use parts of it */ + struct um_pci_message_buffer *buf; + u8 *data; + unsigned long ret = ~0ULL; + + buf = get_cpu_var(um_pci_msg_bufs); + data = buf->data; switch (size) { case 1: @@ -285,25 +319,33 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - return ~0ULL; + goto out; } um_pci_bar_copy_from(priv, data, offset, size); switch (size) { case 1: - return data[0]; + ret = data[0]; + break; case 2: - return le16_to_cpup((void *)data); + ret = le16_to_cpup((void *)data); + break; case 4: - return le32_to_cpup((void *)data); + ret = le32_to_cpup((void *)data); + break; #ifdef CONFIG_64BIT case 8: - return le64_to_cpup((void *)data); + ret = le64_to_cpup((void *)data); + break; #endif default: - return ~0ULL; + break; } + +out: + put_cpu_var(um_pci_msg_bufs); + return ret; } static void um_pci_bar_copy_to(void *priv, unsigned int offset, @@ -823,10 +865,16 @@ static int um_pci_init(void) "No virtio device ID configured for PCI - no PCI support\n")) return 0; - bridge = pci_alloc_host_bridge(0); - if (!bridge) + um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer); + if (!um_pci_msg_bufs) return -ENOMEM; + bridge = pci_alloc_host_bridge(0); + if (!bridge) { + err = -ENOMEM; + goto free; + } + um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci"); if (!um_pci_fwnode) { err = -ENOMEM; @@ -878,8 +926,11 @@ free: irq_domain_remove(um_pci_inner_domain); if (um_pci_fwnode) irq_domain_free_fwnode(um_pci_fwnode); - pci_free_resource_list(&bridge->windows); - pci_free_host_bridge(bridge); + if (bridge) { + pci_free_resource_list(&bridge->windows); + pci_free_host_bridge(bridge); + } + free_percpu(um_pci_msg_bufs); return err; } module_init(um_pci_init); @@ -891,5 +942,6 @@ static void um_pci_exit(void) irq_domain_remove(um_pci_inner_domain); pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); + free_percpu(um_pci_msg_bufs); } module_exit(um_pci_exit); -- cgit From bc5c49d79206b40ad16e055837a33b6dc6160bed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Aug 2021 16:58:26 +0200 Subject: um: enable VMAP_STACK This works just fine, so select HAVE_ARCH_VMAP_STACK to let users enable VMAP_STACK if desired. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 0561b73cfd9a..389c9f5bcb2b 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -21,6 +21,7 @@ config UML select HAVE_GCC_PLUGINS select SET_FS select TTY # Needed for line.c + select HAVE_ARCH_VMAP_STACK config MMU bool -- cgit From adf9ae0d159d3dc94f58d788fc4757c8749ac0df Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Jul 2021 23:47:10 +0200 Subject: um: fix stub location calculation In commit 9f0b4807a44f ("um: rework userspace stubs to not hard-code stub location") I changed stub_segv_handler() to do a calculation with a pointer to a stack variable to find the data page that we're using for the stack and the rest of the data. This same commit was meant to do it as well for stub_clone_handler(), but the change inadvertently went into commit 84b2789d6115 ("um: separate child and parent errors in clone stub") instead. This was reported to not be compiled correctly by gcc 5, causing the code to crash here. I'm not sure why, perhaps it's UB because the var isn't initialized? In any case, this trick always seemed bad, so just create a new inline function that does the calculation in assembly. Reported-by: subashab@codeaurora.org Fixes: 9f0b4807a44f ("um: rework userspace stubs to not hard-code stub location") Fixes: 84b2789d6115 ("um: separate child and parent errors in clone stub") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/kernel/skas/clone.c | 3 +-- arch/x86/um/shared/sysdep/stub_32.h | 12 ++++++++++++ arch/x86/um/shared/sysdep/stub_64.h | 12 ++++++++++++ arch/x86/um/stub_segv.c | 3 +-- 4 files changed, 26 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 5afac0fef24e..ff5061f29167 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -24,8 +24,7 @@ void __attribute__ ((__section__ (".__syscall_stub"))) stub_clone_handler(void) { - int stack; - struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1)); + struct stub_data *data = get_stub_page(); long err; err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h index b95db9daf0e8..4c6c2be0c899 100644 --- a/arch/x86/um/shared/sysdep/stub_32.h +++ b/arch/x86/um/shared/sysdep/stub_32.h @@ -101,4 +101,16 @@ static inline void remap_stack_and_trap(void) "memory"); } +static __always_inline void *get_stub_page(void) +{ + unsigned long ret; + + asm volatile ( + "movl %%esp,%0 ;" + "andl %1,%0" + : "=a" (ret) + : "g" (~(UM_KERN_PAGE_SIZE - 1))); + + return (void *)ret; +} #endif diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h index 6e2626b77a2e..e9c4b2b38803 100644 --- a/arch/x86/um/shared/sysdep/stub_64.h +++ b/arch/x86/um/shared/sysdep/stub_64.h @@ -108,4 +108,16 @@ static inline void remap_stack_and_trap(void) __syscall_clobber, "r10", "r8", "r9"); } +static __always_inline void *get_stub_page(void) +{ + unsigned long ret; + + asm volatile ( + "movq %%rsp,%0 ;" + "andq %1,%0" + : "=a" (ret) + : "g" (~(UM_KERN_PAGE_SIZE - 1))); + + return (void *)ret; +} #endif diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 21836eaf1725..f7eefba034f9 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -11,9 +11,8 @@ void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { - int stack; + struct faultinfo *f = get_stub_page(); ucontext_t *uc = p; - struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1)); GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext); trap_myself(); -- cgit From d9820ff76f95fa26d33e412254a89cd65b23142d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 12 Aug 2021 12:54:43 -0700 Subject: ARC: mm: switch pgtable_t back to struct page * So far ARC pgtable_t has not been struct page based to avoid extra page_address() calls involved. However the differences are down to noise and get in the way of using generic code, hence this patch. This also allows us to reuse generic THP depost/withdraw code. There's some additional consideration for PGDIR_SHIFT in 4K page config. Now due to page tables being PAGE_SIZE deep only, the address split can't be really arbitrary. Tested-by: kernel test robot Suggested-by: Mike Rapoport Acked-by: Mike Rapoport Signed-off-by: Vineet Gupta --- arch/arc/include/asm/hugepage.h | 8 ----- arch/arc/include/asm/page.h | 2 +- arch/arc/include/asm/pgalloc.h | 57 +++++++++++------------------------ arch/arc/include/asm/pgtable-levels.h | 8 ++++- arch/arc/mm/init.c | 3 ++ arch/arc/mm/tlb.c | 37 ----------------------- 6 files changed, 28 insertions(+), 87 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 4eef17c5c1da..11b0ff26b97b 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); -/* Generic variants assume pgtable_t is struct page *, hence need for these */ -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); - -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); - #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 313e6f543d2d..28ed82b1800f 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -60,7 +60,7 @@ typedef struct { #define __pgprot(x) ((pgprot_t) { (x) }) #define pte_pgprot(x) __pgprot(pte_val(x)) -typedef pte_t * pgtable_t; +typedef struct page *pgtable_t; /* * Use virt_to_pfn with caution: diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 408bc4b0842d..8ab1af3da6e7 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -45,22 +45,17 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) set_pmd(pmd, __pmd((unsigned long)pte)); } -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - set_pmd(pmd, __pmd((unsigned long)pte)); -} - -static inline int __get_order_pgd(void) -{ - return get_order(PTRS_PER_PGD * sizeof(pgd_t)); + set_pmd(pmd, __pmd((unsigned long)page_address(pte_page))); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - int num, num2; - pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd()); + pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL); if (ret) { + int num, num2; num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE; memzero(ret, num * sizeof(pgd_t)); @@ -76,61 +71,43 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - free_pages((unsigned long)pgd, __get_order_pgd()); -} - - -/* - * With software-only page-tables, addr-split for traversal is tweakable and - * that directly governs how big tables would be at each level. - * Further, the MMU page size is configurable. - * Thus we need to programatically assert the size constraint - * All of this is const math, allowing gcc to do constant folding/propagation. - */ - -static inline int __get_order_pte(void) -{ - return get_order(PTRS_PER_PTE * sizeof(pte_t)); + free_page((unsigned long)pgd); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { pte_t *pte; - pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - __get_order_pte()); + pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_ZERO); return pte; } -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { - pgtable_t pte_pg; struct page *page; - pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte()); - if (!pte_pg) - return 0; - memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); - page = virt_to_page(pte_pg); + page = (pgtable_t)alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); + if (!page) + return NULL; + if (!pgtable_pte_page_ctor(page)) { __free_page(page); - return 0; + return NULL; } - return pte_pg; + return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */ + free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte_page) { - pgtable_pte_page_dtor(virt_to_page(ptep)); - free_pages((unsigned long)ptep, __get_order_pte()); + pgtable_pte_page_dtor(pte_page); + __free_page(pte_page); } #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index e65fb8c9da12..561dedab79ed 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -35,9 +35,15 @@ #else /* * No Super page case - * Default value provides 11:8:13 (8K), 11:9:12 (4K) + * Default value provides 11:8:13 (8K), 10:10:12 (4K) + * Limits imposed by pgtable_t only PAGE_SIZE long + * (so 4K page can only have 1K entries: or 10 bits) */ +#ifdef CONFIG_ARC_PAGE_SIZE_4K +#define PGDIR_SHIFT 22 +#else #define PGDIR_SHIFT 21 +#endif #endif diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index c083bf660cec..46ad9aee7a73 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -189,6 +189,9 @@ void __init mem_init(void) { memblock_free_all(); highmem_init(); + + BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); } #ifdef CONFIG_HIGHMEM diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index b68d5798327b..dfc0c1aba48f 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -534,43 +534,6 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, update_mmu_cache(vma, addr, &pte); } -void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable) -{ - struct list_head *lh = (struct list_head *) pgtable; - - assert_spin_locked(&mm->page_table_lock); - - /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(lh); - else - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); - pmd_huge_pte(mm, pmdp) = pgtable; -} - -pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) -{ - struct list_head *lh; - pgtable_t pgtable; - - assert_spin_locked(&mm->page_table_lock); - - pgtable = pmd_huge_pte(mm, pmdp); - lh = (struct list_head *) pgtable; - if (list_empty(lh)) - pmd_huge_pte(mm, pmdp) = NULL; - else { - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; - list_del(lh); - } - - pte_val(pgtable[0]) = 0; - pte_val(pgtable[1]) = 0; - - return pgtable; -} - void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { -- cgit From 9f3c76aedcbfee61dcdf299e708888141c7132fd Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 12 Aug 2021 14:31:36 -0700 Subject: ARC: mm: switch to asm-generic/pgalloc.h With previous patch ARC pgalloc functions are same as generic, hence switch to that. Suggested-by: Mike Rapoport Acked-by: Mike Rapoport Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgalloc.h | 42 +----------------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 8ab1af3da6e7..0cde9e5eefd7 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -31,6 +31,7 @@ #include #include +#include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -69,47 +70,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_ZERO); - - return pte; -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page; - - page = (pgtable_t)alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); - if (!page) - return NULL; - - if (!pgtable_pte_page_ctor(page)) { - __free_page(page); - return NULL; - } - - return page; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte_page) -{ - pgtable_pte_page_dtor(pte_page); - __free_page(pte_page); -} - #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #endif /* _ASM_ARC_PGALLOC_H */ -- cgit From 2dde02ab6d1a725ddccc7144ff6bf5f55d37f916 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 30 Sep 2020 18:58:50 -0700 Subject: ARC: mm: support 3 levels of page tables ARCv2 MMU is software walked and Linux implements 2 levels of paging: pgd/pte. Forthcoming hw will have multiple levels, so this change preps mm code for same. It is also fun to try multi levels even on soft-walked code to ensure generic mm code is robust to handle. overview ________ 2 levels {pgd, pte} : pmd is folded but pmd_* macros are valid and operate on pgd 3 levels {pgd, pmd, pte}: - pud is folded and pud_* macros point to pgd - pmd_* macros operate on actual pmd code changes ____________ 1. #include 2. Define CONFIG_PGTABLE_LEVELS 3 3a. Define PMD_SHIFT, PMD_SIZE, PMD_MASK, pmd_t 3b. Define pmd_val() which actually deals with pmd (pmd_offset(), pmd_index() are provided by generic code) 3c. pmd_alloc_one()/pmd_free() also provided by generic code (pmd_populate/pmd_free already exist) 4. Define pud_none(), pud_bad() macros based on generic pud_val() which internally pertains to pgd now. 4b. define pud_populate() to just setup pgd Acked-by: Mike Rapoport Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 +++ arch/arc/include/asm/page.h | 11 ++++++ arch/arc/include/asm/pgalloc.h | 11 ++++++ arch/arc/include/asm/pgtable-levels.h | 63 ++++++++++++++++++++++++++++++++--- arch/arc/include/asm/processor.h | 2 +- arch/arc/mm/fault.c | 4 +++ arch/arc/mm/init.c | 1 + arch/arc/mm/tlb.c | 4 +-- arch/arc/mm/tlbex.S | 9 +++++ 9 files changed, 101 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 56aad105ad13..f631a7a60979 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -314,6 +314,10 @@ config ARC_HUGEPAGE_16M endchoice +config PGTABLE_LEVELS + int "Number of Page table levels" + default 2 + config ARC_COMPACT_IRQ_LEVELS depends on ISA_ARCOMPACT bool "Setup Timer IRQ as high Priority" diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 28ed82b1800f..5d7899d87c08 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -41,6 +41,17 @@ typedef struct { #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) }) +#if CONFIG_PGTABLE_LEVELS > 2 + +typedef struct { + unsigned long pmd; +} pmd_t; + +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) }) + +#endif + typedef struct { #ifdef CONFIG_ARC_HAS_PAE40 unsigned long long pte; diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 0cde9e5eefd7..781620d2e86f 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -70,6 +70,17 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } +#if CONFIG_PGTABLE_LEVELS > 2 + +static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) +{ + set_pud(pudp, __pud((unsigned long)pmdp)); +} + +#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) + +#endif + #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #endif /* _ASM_ARC_PGALLOC_H */ diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index 561dedab79ed..5dd8c58f7d70 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -10,6 +10,8 @@ #ifndef _ASM_ARC_PGTABLE_LEVELS_H #define _ASM_ARC_PGTABLE_LEVELS_H +#if CONFIG_PGTABLE_LEVELS == 2 + /* * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS) * @@ -47,16 +49,38 @@ #endif -#define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */ -#define PGDIR_MASK (~(PGDIR_SIZE - 1)) +#else /* CONFIG_PGTABLE_LEVELS != 2 */ + +/* + * A default 3 level paging testing setup in software walked MMU + * MMUv4 (8K page): <4> : <7> : <8> : <13> + */ +#define PGDIR_SHIFT 28 +#if CONFIG_PGTABLE_LEVELS > 2 +#define PMD_SHIFT 21 +#endif +#endif /* CONFIG_PGTABLE_LEVELS */ + +#define PGDIR_SIZE BIT(PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT) -#define PTRS_PER_PTE BIT(PGDIR_SHIFT - PAGE_SHIFT) +#if CONFIG_PGTABLE_LEVELS > 2 +#define PMD_SIZE BIT(PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) +#define PTRS_PER_PMD BIT(PGDIR_SHIFT - PMD_SHIFT) +#endif + +#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) #ifndef __ASSEMBLY__ +#if CONFIG_PGTABLE_LEVELS > 2 +#include +#else #include +#endif /* * 1st level paging: pgd @@ -67,9 +91,35 @@ #define pgd_ERROR(e) \ pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +#if CONFIG_PGTABLE_LEVELS > 2 + +/* In 3 level paging, pud_* macros work on pgd */ +#define pud_none(x) (!pud_val(x)) +#define pud_bad(x) ((pud_val(x) & ~PAGE_MASK)) +#define pud_present(x) (pud_val(x)) +#define pud_clear(xp) do { pud_val(*(xp)) = 0; } while (0) +#define pud_pgtable(pud) ((pmd_t *)(pud_val(pud) & PAGE_MASK)) +#define pud_page(pud) virt_to_page(pud_pgtable(pud)) +#define set_pud(pudp, pud) (*(pudp) = pud) + /* - * Due to the strange way generic pgtable level folding works, in a 2 level - * setup, pmd_val() returns pgd, so these pmd_* macros actually work on pgd + * 2nd level paging: pmd + */ +#define pmd_ERROR(e) \ + pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) + +#define pmd_pfn(pmd) ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#endif + +/* + * Due to the strange way generic pgtable level folding works, the pmd_* macros + * - are valid even for 2 levels (which supposedly only has pgd - pte) + * - behave differently for 2 vs. 3 + * In 2 level paging (pgd -> pte), pmd_* macros work on pgd + * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd */ #define pmd_none(x) (!pmd_val(x)) #define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) @@ -80,6 +130,9 @@ #define set_pmd(pmdp, pmd) (*(pmdp) = pmd) #define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) +/* + * 3rd level paging: pte + */ #define pte_ERROR(e) \ pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index e4031ecd3c8c..f28afcf5c6d1 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p); #define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20)) /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */ -#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4) +#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 41f154320964..8da2f0ad8c69 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -39,6 +39,8 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address) if (!pgd_present(*pgd_k)) goto bad_area; + set_pgd(pgd, *pgd_k); + p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); if (!p4d_present(*p4d_k)) @@ -49,6 +51,8 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address) if (!pud_present(*pud_k)) goto bad_area; + set_pud(pud, *pud_k); + pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 46ad9aee7a73..f7ba2a5d5ec8 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -191,6 +191,7 @@ void __init mem_init(void) highmem_init(); BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index dfc0c1aba48f..5f71445f26bd 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -621,8 +621,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", - p_mmu->ver, p_mmu->pg_sz_k, super_pg, + "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", + p_mmu->ver, p_mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index c4a5f16444ce..5f57eba1089d 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -173,6 +173,15 @@ ex_saved_reg1: tst r3, r3 bz do_slow_path_pf ; if no Page Table, do page fault +#if CONFIG_PGTABLE_LEVELS > 2 + lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD + and r0, r0, (PTRS_PER_PMD - 1) + ld.as r1, [r3, r0] ; PMD entry + tst r1, r1 + bz do_slow_path_pf + mov r3, r1 +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) add2.nz r1, r1, r0 -- cgit From 8747ff704ac886f6ef992b1b7eadcf77d151fd3a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 1 Oct 2020 15:46:42 -0700 Subject: ARC: mm: support 4 levels of page tables Acked-by: Mike Rapoport Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 11 +++++++++ arch/arc/include/asm/pgalloc.h | 11 +++++++++ arch/arc/include/asm/pgtable-levels.h | 45 +++++++++++++++++++++++++++++++---- arch/arc/mm/fault.c | 2 ++ arch/arc/mm/init.c | 1 + arch/arc/mm/tlbex.S | 9 +++++++ 6 files changed, 74 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 5d7899d87c08..9a62e1d87967 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -41,6 +41,17 @@ typedef struct { #define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) }) +#if CONFIG_PGTABLE_LEVELS > 3 + +typedef struct { + unsigned long pud; +} pud_t; + +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) + +#endif + #if CONFIG_PGTABLE_LEVELS > 2 typedef struct { diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 781620d2e86f..096b8ef58edb 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -70,6 +70,17 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return ret; } +#if CONFIG_PGTABLE_LEVELS > 3 + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) +{ + set_p4d(p4dp, __p4d((unsigned long)pudp)); +} + +#define __pud_free_tlb(tlb, pmd, addr) pud_free((tlb)->mm, pmd) + +#endif + #if CONFIG_PGTABLE_LEVELS > 2 static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index 5dd8c58f7d70..8084ef2f6491 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -54,8 +54,13 @@ /* * A default 3 level paging testing setup in software walked MMU * MMUv4 (8K page): <4> : <7> : <8> : <13> + * A default 4 level paging testing setup in software walked MMU + * MMUv4 (8K page): <4> : <3> : <4> : <8> : <13> */ #define PGDIR_SHIFT 28 +#if CONFIG_PGTABLE_LEVELS > 3 +#define PUD_SHIFT 25 +#endif #if CONFIG_PGTABLE_LEVELS > 2 #define PMD_SHIFT 21 #endif @@ -66,17 +71,25 @@ #define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT) +#if CONFIG_PGTABLE_LEVELS > 3 +#define PUD_SIZE BIT(PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PTRS_PER_PUD BIT(PGDIR_SHIFT - PUD_SHIFT) +#endif + #if CONFIG_PGTABLE_LEVELS > 2 #define PMD_SIZE BIT(PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) -#define PTRS_PER_PMD BIT(PGDIR_SHIFT - PMD_SHIFT) +#define PTRS_PER_PMD BIT(PUD_SHIFT - PMD_SHIFT) #endif #define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) #ifndef __ASSEMBLY__ -#if CONFIG_PGTABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 3 +#include +#elif CONFIG_PGTABLE_LEVELS > 2 #include #else #include @@ -91,9 +104,31 @@ #define pgd_ERROR(e) \ pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +#if CONFIG_PGTABLE_LEVELS > 3 + +/* In 4 level paging, p4d_* macros work on pgd */ +#define p4d_none(x) (!p4d_val(x)) +#define p4d_bad(x) ((p4d_val(x) & ~PAGE_MASK)) +#define p4d_present(x) (p4d_val(x)) +#define p4d_clear(xp) do { p4d_val(*(xp)) = 0; } while (0) +#define p4d_pgtable(p4d) ((pud_t *)(p4d_val(p4d) & PAGE_MASK)) +#define p4d_page(p4d) virt_to_page(p4d_pgtable(p4d)) +#define set_p4d(p4dp, p4d) (*(p4dp) = p4d) + +/* + * 2nd level paging: pud + */ +#define pud_ERROR(e) \ + pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) + +#endif + #if CONFIG_PGTABLE_LEVELS > 2 -/* In 3 level paging, pud_* macros work on pgd */ +/* + * In 3 level paging, pud_* macros work on pgd + * In 4 level paging, pud_* macros work on pud + */ #define pud_none(x) (!pud_val(x)) #define pud_bad(x) ((pud_val(x) & ~PAGE_MASK)) #define pud_present(x) (pud_val(x)) @@ -103,7 +138,7 @@ #define set_pud(pudp, pud) (*(pudp) = pud) /* - * 2nd level paging: pmd + * 3rd level paging: pmd */ #define pmd_ERROR(e) \ pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) @@ -131,7 +166,7 @@ #define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) /* - * 3rd level paging: pte + * 4th level paging: pte */ #define pte_ERROR(e) \ pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 8da2f0ad8c69..f8994164fa36 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -46,6 +46,8 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address) if (!p4d_present(*p4d_k)) goto bad_area; + set_p4d(p4d, *p4d_k); + pud = pud_offset(p4d, address); pud_k = pud_offset(p4d_k, address); if (!pud_present(*pud_k)) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index f7ba2a5d5ec8..699ecf119641 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -191,6 +191,7 @@ void __init mem_init(void) highmem_init(); BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); + BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); } diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 5f57eba1089d..e054780a8fe0 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -173,6 +173,15 @@ ex_saved_reg1: tst r3, r3 bz do_slow_path_pf ; if no Page Table, do page fault +#if CONFIG_PGTABLE_LEVELS > 3 + lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD + and r0, r0, (PTRS_PER_PUD - 1) + ld.as r1, [r3, r0] ; PMD entry + tst r1, r1 + bz do_slow_path_pf + mov r3, r1 +#endif + #if CONFIG_PGTABLE_LEVELS > 2 lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD and r0, r0, (PTRS_PER_PMD - 1) -- cgit From 56809a28d45fcad94b28cfd614600568c0d46545 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 13 Oct 2020 19:44:07 -0700 Subject: ARC: mm: vmalloc sync from kernel to user table to update PMD ... ... not PGD vmalloc() sets up the kernel page table (starting from @swapper_pg_dir). But when vmalloc area is accessed in context of a user task, say opening terminal in n_tty_open(), the user page tables need to be synced from kernel page tables so that TLB entry is created in "user context". The old code was doing this incorrectly, as it was updating the user pgd entry (first level itself) to point to kernel pud table (2nd level), effectively yanking away the entire user space translation with kernel one. The correct way to do this is to ONLY update a user space pgd/pud/pmd entry if it is not popluated already. This ensures that only the missing leaf pmd entry gets updated to point to relevant kernel pte table. From code change pov, we are chaging the pattern: p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); if (!p4d_present(*p4d_k)) goto bad_area; set_p4d(p4d, *p4d_k); with p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); if (p4d_none(*p4d_k)) goto bad_area; if (!p4d_present(*p4d)) set_p4d(p4d, *p4d_k); Signed-off-by: Vineet Gupta --- arch/arc/mm/fault.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index f8994164fa36..5787c261c9a4 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -36,31 +36,31 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address) pgd = pgd_offset(current->active_mm, address); pgd_k = pgd_offset_k(address); - if (!pgd_present(*pgd_k)) + if (pgd_none (*pgd_k)) goto bad_area; - - set_pgd(pgd, *pgd_k); + if (!pgd_present(*pgd)) + set_pgd(pgd, *pgd_k); p4d = p4d_offset(pgd, address); p4d_k = p4d_offset(pgd_k, address); - if (!p4d_present(*p4d_k)) + if (p4d_none(*p4d_k)) goto bad_area; - - set_p4d(p4d, *p4d_k); + if (!p4d_present(*p4d)) + set_p4d(p4d, *p4d_k); pud = pud_offset(p4d, address); pud_k = pud_offset(p4d_k, address); - if (!pud_present(*pud_k)) + if (pud_none(*pud_k)) goto bad_area; - - set_pud(pud, *pud_k); + if (!pud_present(*pud)) + set_pud(pud, *pud_k); pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) + if (pmd_none(*pmd_k)) goto bad_area; - - set_pmd(pmd, *pmd_k); + if (!pmd_present(*pmd)) + set_pmd(pmd, *pmd_k); /* XXX: create the TLB entry here */ return 0; -- cgit From c24a19674258dcc968a198d8e0d4717c8f27700c Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Tue, 10 Aug 2021 21:51:05 +0800 Subject: riscv: add support for hugepage migration Generic arch_hugetlb_migration_supported can provide appropriate support for hugepage migration when enable ARCH_ENABLE_HUGEPAGE_MIGRATION, so enable it for hugepage migration in riscv. Signed-off-by: Chen Wandun Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e26e255dce3c..78644e29f1fd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -13,6 +13,7 @@ config 32BIT config RISCV def_bool y select ARCH_CLOCKSOURCE_INIT + select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU -- cgit From 7119decf47d9867266459615be502e5d2cecedba Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Tue, 29 Jun 2021 10:55:30 +0200 Subject: KVM: s390: Enable specification exception interpretation When this feature is enabled the hardware is free to interpret specification exceptions generated by the guest, instead of causing program interruption interceptions. This benefits (test) programs that generate a lot of specification exceptions (roughly 4x increase in exceptions/sec). Interceptions will occur as before if ICTL_PINT is set, i.e. if guest debug is enabled. There is no indication if this feature is available or not and the hardware is free to interpret or not. So we can simply set this bit and if the hardware ignores it we fall back to intercept 8 handling. Signed-off-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/linux-s390/20210706114714.3936825-1-scgl@linux.ibm.com/ Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/kvm-s390.c | 2 ++ arch/s390/kvm/vsie.c | 2 ++ 3 files changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8925f3969478..118d5450c523 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -244,6 +244,7 @@ struct kvm_s390_sie_block { __u8 fpf; /* 0x0060 */ #define ECB_GS 0x40 #define ECB_TE 0x10 +#define ECB_SPECI 0x08 #define ECB_SRSI 0x04 #define ECB_HOSTPROTINT 0x02 __u8 ecb; /* 0x0061 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f72f361d39dd..5b45c83ced21 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3180,6 +3180,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb |= ECB_SRSI; if (test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= ECB_TE; + if (!kvm_is_ucontrol(vcpu->kvm)) + vcpu->arch.sie_block->ecb |= ECB_SPECI; if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4002a24bc43a..acda4b6fc851 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -510,6 +510,8 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= ECB_TE; } + /* specification exception interpretation */ + scb_s->ecb |= scb_o->ecb & ECB_SPECI; /* branch prediction */ if (test_kvm_facility(vcpu->kvm, 82)) scb_s->fpf |= scb_o->fpf & FPF_BPBC; -- cgit From a3e03bc1368c1bc16e19b001fc96dc7430573cc8 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Fri, 27 Aug 2021 14:54:29 +0200 Subject: KVM: s390: index kvm->arch.idle_mask by vcpu_idx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While in practice vcpu->vcpu_idx == vcpu->vcp_id is often true, it may not always be, and we must not rely on this. Reason is that KVM decides the vcpu_idx, userspace decides the vcpu_id, thus the two might not match. Currently kvm->arch.idle_mask is indexed by vcpu_id, which implies that code like for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { vcpu = kvm_get_vcpu(kvm, vcpu_id); do_stuff(vcpu); } is not legit. Reason is that kvm_get_vcpu expects an vcpu_idx, not an vcpu_id. The trouble is, we do actually use kvm->arch.idle_mask like this. To fix this problem we have two options. Either use kvm_get_vcpu_by_id(vcpu_id), which would loop to find the right vcpu_id, or switch to indexing via vcpu_idx. The latter is preferable for obvious reasons. Let us make switch from indexing kvm->arch.idle_mask by vcpu_id to indexing it by vcpu_idx. To keep gisa_int.kicked_mask indexed by the same index as idle_mask lets make the same change for it as well. Fixes: 1ee0bc559dc3 ("KVM: s390: get rid of local_int array") Signed-off-by: Halil Pasic Reviewed-by: Christian Bornträger Reviewed-by: Claudio Imbrenda Cc: # 3.15+ Link: https://lore.kernel.org/r/20210827125429.1912577-1-pasic@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/interrupt.c | 12 ++++++------ arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/kvm-s390.h | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 118d5450c523..611f18ecde91 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -963,6 +963,7 @@ struct kvm_arch{ atomic64_t cmma_dirty_pages; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); + /* indexed by vcpu_idx */ DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS); struct kvm_s390_gisa_interrupt gisa_int; struct kvm_s390_pv pv; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d548d60caed2..16256e17a544 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); - set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) @@ -3050,18 +3050,18 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) { - int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus); + int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; - for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { - vcpu = kvm_get_vcpu(kvm, vcpu_id); + for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { + vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); if (deliverable_mask) { /* lately kicked but not yet running */ - if (test_and_set_bit(vcpu_id, gi->kicked_mask)) + if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; kvm_s390_vcpu_wakeup(vcpu); return; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5b45c83ced21..e144c8046ceb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4026,7 +4026,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) kvm_s390_patch_guest_per_regs(vcpu); } - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask); vcpu->arch.sie_block->icptcode = 0; cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 9fad25109b0d..ecd741ee3276 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) -- cgit From 71f8817c28e2e1e5549138e2aef68c2fd784e162 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 23 Aug 2021 18:34:05 +0100 Subject: MIPS: ingenic: Unconditionally enable clock of CPU #0 Make sure that the PLL that feeds the CPU won't be stopped while the kernel is running. This fixes a problem on JZ4760 (and probably others) where under very specific conditions, the main PLL would be turned OFF when the kernel was shutting down, causing the shutdown process to fail. Signed-off-by: Paul Cercueil Signed-off-by: Thomas Bogendoerfer --- arch/mips/generic/board-ingenic.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch') diff --git a/arch/mips/generic/board-ingenic.c b/arch/mips/generic/board-ingenic.c index e86e0016c548..3f44f14bdb33 100644 --- a/arch/mips/generic/board-ingenic.c +++ b/arch/mips/generic/board-ingenic.c @@ -7,6 +7,8 @@ * Copyright (C) 2020 Paul Cercueil */ +#include +#include #include #include #include @@ -129,10 +131,36 @@ static const struct platform_suspend_ops ingenic_pm_ops __maybe_unused = { static int __init ingenic_pm_init(void) { + struct device_node *cpu_node; + struct clk *cpu0_clk; + int ret; + if (boot_cpu_type() == CPU_XBURST) { if (IS_ENABLED(CONFIG_PM_SLEEP)) suspend_set_ops(&ingenic_pm_ops); _machine_halt = ingenic_halt; + + /* + * Unconditionally enable the clock for the first CPU. + * This makes sure that the PLL that feeds the CPU won't be + * stopped while the kernel is running. + */ + cpu_node = of_get_cpu_node(0, NULL); + if (!cpu_node) { + pr_err("Unable to get CPU node\n"); + } else { + cpu0_clk = of_clk_get(cpu_node, 0); + if (IS_ERR(cpu0_clk)) { + pr_err("Unable to get CPU0 clock\n"); + return PTR_ERR(cpu0_clk); + } + + ret = clk_prepare_enable(cpu0_clk); + if (ret) { + pr_err("Unable to enable CPU0 clock\n"); + return ret; + } + } } return 0; -- cgit From 1d78dfde33a02da1d816279c2e3452978b7abd39 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 27 Aug 2021 14:07:06 +1000 Subject: KVM: PPC: Fix clearing never mapped TCEs in realmode Since commit e1a1ef84cd07 ("KVM: PPC: Book3S: Allocate guest TCEs on demand too"), pages for TCE tables for KVM guests are allocated only when needed. This allows skipping any update when clearing TCEs. This works mostly fine as TCE updates are handled when the MMU is enabled. The realmode handlers fail with H_TOO_HARD when pages are not yet allocated, except when clearing a TCE in which case KVM prints a warning and proceeds to dereference a NULL pointer, which crashes the host OS. This has not been caught so far as the change in commit e1a1ef84cd07 is reasonably new, and POWER9 runs mostly radix which does not use realmode handlers. With hash, the default TCE table is memset() by QEMU when the machine is reset which triggers page faults and the KVM TCE device's kvm_spapr_tce_fault() handles those with MMU on. And the huge DMA windows are not cleared by VMs which instead successfully create a DMA window big enough to map the VM memory 1:1 and then VMs just map everything without clearing. This started crashing now as commit 381ceda88c4c ("powerpc/pseries/iommu: Make use of DDW for indirect mapping") added a mode when a dymanic DMA window not big enough to map the VM memory 1:1 but it is used anyway, and the VM now is the first (i.e. not QEMU) to clear a just created table. Note that upstream QEMU needs to be modified to trigger the VM to trigger the host OS crash. This replaces WARN_ON_ONCE_RM() with a check and return, and adds another warning if TCE is not being cleared. Fixes: e1a1ef84cd07 ("KVM: PPC: Book3S: Allocate guest TCEs on demand too") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210827040706.517652-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_64_vio_hv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index dc6591548f0c..636c6ae0939b 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -173,10 +173,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; /* - * page must not be NULL in real mode, - * kvmppc_rm_ioba_validate() must have taken care of this. + * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is + * being cleared, otherwise it returns H_TOO_HARD and we skip this. */ - WARN_ON_ONCE_RM(!page); + if (!page) { + WARN_ON_ONCE_RM(tce != 0); + return; + } tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; -- cgit From 7f2dcc7371c1ab6e5d3678a7eff1cb0eb9725de9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Aug 2021 20:18:09 -0700 Subject: parisc: math-emu: Avoid "fmt" macro collision The printk "fmt" macro was colliding. Rename like the others with a "bits" suffix. Fixes a build failure: arch/parisc/math-emu/decode_exc.c: In function 'decode_fpu': arch/parisc/math-emu/decode_exc.c:49:14: error: expected identifier before numeric constant 49 | #define fmt 11 /* bits 19 & 20 */ | ^~ ./include/linux/printk.h:379:6: note: in expansion of macro 'fmt' 379 | .fmt = __builtin_constant_p(_fmt) ? (_fmt) : NULL, \ | ^~~ ./include/linux/printk.h:417:3: note: in expansion of macro '__printk_index_emit' 417 | __printk_index_emit(_fmt, NULL, NULL); \ | ^~~~~~~~~~~~~~~~~~~ ./include/linux/printk.h:446:26: note: in expansion of macro 'printk_index_wrap' 446 | #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) | ^~~~~~~~~~~~~~~~~ arch/parisc/math-emu/decode_exc.c:339:3: note: in expansion of macro 'printk' 339 | printk("%s(%d) Unknown FPU exception 0x%x\n", __FILE__, | ^~~~~~ Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Helge Deller --- arch/parisc/math-emu/decode_exc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/math-emu/decode_exc.c b/arch/parisc/math-emu/decode_exc.c index cd8ffc6ceadf..494ca41df05d 100644 --- a/arch/parisc/math-emu/decode_exc.c +++ b/arch/parisc/math-emu/decode_exc.c @@ -46,7 +46,7 @@ #define SIGNALCODE(signal, code) ((signal) << 24 | (code)) #define copropbit 1<<31-2 /* bit position 2 */ #define opclass 9 /* bits 21 & 22 */ -#define fmt 11 /* bits 19 & 20 */ +#define fmtbits 11 /* bits 19 & 20 */ #define df 13 /* bits 17 & 18 */ #define twobits 3 /* mask low-order 2 bits */ #define fivebits 31 /* mask low-order 5 bits */ @@ -57,7 +57,7 @@ #define Excp_instr(index) Instructionfield(Fpu_register[index]) #define Clear_excp_register(index) Allexception(Fpu_register[index]) = 0 #define Excp_format() \ - (current_ir >> ((current_ir>>opclass & twobits)==1 ? df : fmt) & twobits) + (current_ir >> ((current_ir>>opclass & twobits) == 1 ? df : fmtbits) & twobits) /* Miscellaneous definitions */ #define Fpu_sgl(index) Fpu_register[index*2] -- cgit From 7bf82eb3873fbbee8273f60ddef584194b99f6c1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Jul 2021 14:46:12 +0100 Subject: parisc: Rename PMD_ORDER to PMD_TABLE_ORDER This is the order of the page table allocation, not the order of a PMD. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgalloc.h | 6 +++--- arch/parisc/include/asm/pgtable.h | 4 ++-- arch/parisc/mm/init.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 6a7e98e71f1d..54b63374579b 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -48,15 +48,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd; - pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_ORDER); + pmd = (pmd_t *)__get_free_pages(GFP_PGTABLE_KERNEL, PMD_TABLE_ORDER); if (likely(pmd)) - memset ((void *)pmd, 0, PAGE_SIZE << PMD_ORDER); + memset ((void *)pmd, 0, PAGE_SIZE << PMD_TABLE_ORDER); return pmd; } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - free_pages((unsigned long)pmd, PMD_ORDER); + free_pages((unsigned long)pmd, PMD_TABLE_ORDER); } #endif diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 43937af127b1..7badd872f05a 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -112,7 +112,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) #if CONFIG_PGTABLE_LEVELS == 3 -#define PMD_ORDER 1 +#define PMD_TABLE_ORDER 1 #define PGD_ORDER 0 #else #define PGD_ORDER 1 @@ -131,7 +131,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) +#define BITS_PER_PMD (PAGE_SHIFT + PMD_TABLE_ORDER - BITS_PER_PMD_ENTRY) #define PTRS_PER_PMD (1UL << BITS_PER_PMD) #else #define BITS_PER_PMD 0 diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 591a4e939415..3f7d6d5b56ac 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -378,8 +378,8 @@ static void __init map_pages(unsigned long start_vaddr, #if CONFIG_PGTABLE_LEVELS == 3 if (pud_none(*pud)) { - pmd = memblock_alloc(PAGE_SIZE << PMD_ORDER, - PAGE_SIZE << PMD_ORDER); + pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER, + PAGE_SIZE << PMD_TABLE_ORDER); if (!pmd) panic("pmd allocation failed.\n"); pud_populate(NULL, pud, pmd); -- cgit From d220da0967dbda232350c5dc39317e04e0892743 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 25 Aug 2021 00:48:20 +0900 Subject: parisc: remove unused arch/parisc/boot/install.sh and its phony target Parisc has two similar installation scripts: arch/parisc/install.sh arch/parisc/boot/install.sh The latter is never used because 'make ARCH=parisc install' always invokes the 'install' target in arch/parisc/Makefile. The target in arch/parisc/boot/Makefile is not used either. Signed-off-by: Masahiro Yamada Signed-off-by: Helge Deller --- arch/parisc/boot/Makefile | 4 --- arch/parisc/boot/install.sh | 65 --------------------------------------------- 2 files changed, 69 deletions(-) delete mode 100644 arch/parisc/boot/install.sh (limited to 'arch') diff --git a/arch/parisc/boot/Makefile b/arch/parisc/boot/Makefile index 61f44142cfe1..b873ee4720ca 100644 --- a/arch/parisc/boot/Makefile +++ b/arch/parisc/boot/Makefile @@ -15,7 +15,3 @@ $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ - -install: $(CONFIGURE) $(obj)/bzImage - sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ - System.map "$(INSTALL_PATH)" diff --git a/arch/parisc/boot/install.sh b/arch/parisc/boot/install.sh deleted file mode 100644 index 8f7c365fad83..000000000000 --- a/arch/parisc/boot/install.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/sh -# -# arch/parisc/install.sh, derived from arch/i386/boot/install.sh -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1995 by Linus Torvalds -# -# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin -# -# "make install" script for i386 architecture -# -# Arguments: -# $1 - kernel version -# $2 - kernel image file -# $3 - kernel map file -# $4 - default install path (blank if root directory) -# - -verify () { - if [ ! -f "$1" ]; then - echo "" 1>&2 - echo " *** Missing file: $1" 1>&2 - echo ' *** You need to run "make" before "make install".' 1>&2 - echo "" 1>&2 - exit 1 - fi -} - -# Make sure the files actually exist - -verify "$2" -verify "$3" - -# User may have a custom install script - -if [ -n "${INSTALLKERNEL}" ]; then - if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi - if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi -fi - -# Default install - -if [ "$(basename $2)" = "zImage" ]; then -# Compressed install - echo "Installing compressed kernel" - base=vmlinuz -else -# Normal install - echo "Installing normal kernel" - base=vmlinux -fi - -if [ -f $4/$base-$1 ]; then - mv $4/$base-$1 $4/$base-$1.old -fi -cat $2 > $4/$base-$1 - -# Install system map file -if [ -f $4/System.map-$1 ]; then - mv $4/System.map-$1 $4/System.map-$1.old -fi -cp $3 $4/System.map-$1 -- cgit From 87875c1084a28364dad8cd4f9ecbfdfe0b845ad5 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Sat, 7 Aug 2021 11:19:27 +0200 Subject: parisc: Make struct parisc_driver::remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caller of this function (parisc_driver_remove() in arch/parisc/kernel/drivers.c) ignores the return value, so better don't return any value at all to not wake wrong expectations in driver authors. The only function that could return a non-zero value before was ipmi_parisc_remove() which returns the return value of ipmi_si_remove_by_dev(). Make this function return void, too, as for all other callers the value is ignored, too. Also fold in a small checkpatch fix for: WARNING: Unnecessary space before function pointer arguments + void (*remove) (struct parisc_device *dev); Acked-by: Dmitry Torokhov (for drivers/input) Signed-off-by: Uwe Kleine-König Acked-by: Sudip Mukherjee Acked-by: Jiri Slaby Signed-off-by: Helge Deller --- arch/parisc/include/asm/parisc-device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h index d02d144c6012..4de3b391d812 100644 --- a/arch/parisc/include/asm/parisc-device.h +++ b/arch/parisc/include/asm/parisc-device.h @@ -34,8 +34,8 @@ struct parisc_driver { struct parisc_driver *next; char *name; const struct parisc_device_id *id_table; - int (*probe) (struct parisc_device *dev); /* New device discovered */ - int (*remove) (struct parisc_device *dev); + int (*probe)(struct parisc_device *dev); /* New device discovered */ + void (*remove)(struct parisc_device *dev); struct device_driver drv; }; -- cgit From 6ef4661cad32b5098ffb31be3282c866befde85f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:41:16 +0900 Subject: parisc: move core-y in arch/parisc/Makefile to arch/parisc/Kbuild Use obj-y to clean up Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Helge Deller --- arch/parisc/Kbuild | 1 + arch/parisc/Makefile | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kbuild b/arch/parisc/Kbuild index a4e40e534e6a..3c068b700a81 100644 --- a/arch/parisc/Kbuild +++ b/arch/parisc/Kbuild @@ -1 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-y += mm/ kernel/ math-emu/ diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index aed8ea29268b..6c27940eff03 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -111,9 +111,6 @@ KBUILD_CFLAGS += $(cflags-y) LIBGCC := $(shell $(CC) -print-libgcc-file-name) export LIBGCC -kernel-y := mm/ kernel/ math-emu/ - -core-y += $(addprefix arch/parisc/, $(kernel-y)) libs-y += arch/parisc/lib/ $(LIBGCC) boot := arch/parisc/boot -- cgit From 4b511d5bfa74b1926daefd1694205c7f1bcf677f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 30 Jul 2021 11:26:21 +0200 Subject: xen: fix setting of max_pfn in shared_info Xen PV guests are specifying the highest used PFN via the max_pfn field in shared_info. This value is used by the Xen tools when saving or migrating the guest. Unfortunately this field is misnamed, as in reality it is specifying the number of pages (including any memory holes) of the guest, so it is the highest used PFN + 1. Renaming isn't possible, as this is a public Xen hypervisor interface which needs to be kept stable. The kernel will set the value correctly initially at boot time, but when adding more pages (e.g. due to memory hotplug or ballooning) a real PFN number is stored in max_pfn. This is done when expanding the p2m array, and the PFN stored there is even possibly wrong, as it should be the last possible PFN of the just added P2M frame, and not one which led to the P2M expansion. Fix that by setting shared_info->max_pfn to the last possible PFN + 1. Fixes: 98dd166ea3a3c3 ("x86/xen/p2m: hint at the last populated P2M entry") Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Link: https://lore.kernel.org/r/20210730092622.9973-2-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/p2m.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index ac06ca32e9ef..5e6e236977c7 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -618,8 +618,8 @@ int xen_alloc_p2m_entry(unsigned long pfn) } /* Expanded the p2m? */ - if (pfn > xen_p2m_last_pfn) { - xen_p2m_last_pfn = pfn; + if (pfn >= xen_p2m_last_pfn) { + xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE); HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } -- cgit From 2526cff7c4f944924f39043dc657189eccc4fe5c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 30 Jul 2021 09:18:03 +0200 Subject: xen: assume XENFEAT_mmu_pt_update_preserve_ad being set for pv guests XENFEAT_mmu_pt_update_preserve_ad is always set in Xen 4.0 and newer. Remove coding assuming it might be zero. Signed-off-by: Juergen Gross Acked-by: Peter Zijlstra (Intel) Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210730071804.4302-3-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 12 ++---------- arch/x86/xen/mmu_pv.c | 4 ++-- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 03149422dce2..753f63734c13 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -116,9 +116,8 @@ static void __init xen_banner(void) HYPERVISOR_xen_version(XENVER_extraversion, &extra); pr_info("Booting paravirtualized kernel on %s\n", pv_info.name); - printk(KERN_INFO "Xen version: %d.%d%s%s\n", - version >> 16, version & 0xffff, extra.extraversion, - xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); + pr_info("Xen version: %d.%d%s (preserve-AD)\n", + version >> 16, version & 0xffff, extra.extraversion); } static void __init xen_pv_init_platform(void) @@ -1302,13 +1301,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_init_apic(); #endif - if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { - pv_ops.mmu.ptep_modify_prot_start = - xen_ptep_modify_prot_start; - pv_ops.mmu.ptep_modify_prot_commit = - xen_ptep_modify_prot_commit; - } - machine_ops = xen_machine_ops; /* diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index ade789e73ee4..1df5f01529e5 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2099,8 +2099,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .set_pte = xen_set_pte_init, .set_pmd = xen_set_pmd_hyper, - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, + .ptep_modify_prot_start = xen_ptep_modify_prot_start, + .ptep_modify_prot_commit = xen_ptep_modify_prot_commit, .pte_val = PV_CALLEE_SAVE(xen_pte_val), .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), -- cgit From 1a0df28c09831dc4bd336fe6e090bbe85a07e79f Mon Sep 17 00:00:00 2001 From: zhaoxiao Date: Wed, 25 Aug 2021 19:41:11 +0800 Subject: x86: xen: platform-pci-unplug: use pr_err() and pr_warn() instead of raw printk() Since we have the nice helpers pr_err() and pr_warn(), use them instead of raw printk(). [jgross@suse.com] Move the "#define pr_fmt" above the #includes in order to avoid build warnings due to redefinition. Signed-off-by: zhaoxiao Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20210825114111.29009-1-zhaoxiao@uniontech.com Signed-off-by: Juergen Gross --- arch/x86/xen/platform-pci-unplug.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 96d7f7d39cb9..62ac4898df1a 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -7,6 +7,8 @@ * Copyright (c) 2010, Citrix */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -30,13 +32,13 @@ static int check_platform_magic(void) magic = inw(XEN_IOPORT_MAGIC); if (magic != XEN_IOPORT_MAGIC_VAL) { - printk(KERN_ERR "Xen Platform PCI: unrecognised magic value\n"); + pr_err("Xen Platform PCI: unrecognised magic value\n"); return XEN_PLATFORM_ERR_MAGIC; } protocol = inb(XEN_IOPORT_PROTOVER); - printk(KERN_DEBUG "Xen Platform PCI: I/O protocol version %d\n", + pr_debug("Xen Platform PCI: I/O protocol version %d\n", protocol); switch (protocol) { @@ -44,12 +46,12 @@ static int check_platform_magic(void) outw(XEN_IOPORT_LINUX_PRODNUM, XEN_IOPORT_PRODNUM); outl(XEN_IOPORT_LINUX_DRVVER, XEN_IOPORT_DRVVER); if (inw(XEN_IOPORT_MAGIC) != XEN_IOPORT_MAGIC_VAL) { - printk(KERN_ERR "Xen Platform: blacklisted by host\n"); + pr_err("Xen Platform: blacklisted by host\n"); return XEN_PLATFORM_ERR_BLACKLIST; } break; default: - printk(KERN_WARNING "Xen Platform PCI: unknown I/O protocol version\n"); + pr_warn("Xen Platform PCI: unknown I/O protocol version\n"); return XEN_PLATFORM_ERR_PROTOCOL; } @@ -155,12 +157,12 @@ void xen_unplug_emulated_devices(void) * been compiled for this kernel (modules or built-in are both OK). */ if (!xen_emul_unplug) { if (xen_must_unplug_nics()) { - printk(KERN_INFO "Netfront and the Xen platform PCI driver have " + pr_info("Netfront and the Xen platform PCI driver have " "been compiled for this kernel: unplug emulated NICs.\n"); xen_emul_unplug |= XEN_UNPLUG_ALL_NICS; } if (xen_must_unplug_disks()) { - printk(KERN_INFO "Blkfront and the Xen platform PCI driver have " + pr_info("Blkfront and the Xen platform PCI driver have " "been compiled for this kernel: unplug emulated disks.\n" "You might have to change the root device\n" "from /dev/hd[a-d] to /dev/xvd[a-d]\n" @@ -200,7 +202,7 @@ static int __init parse_xen_emul_unplug(char *arg) else if (!strncmp(p, "never", l)) xen_emul_unplug |= XEN_UNPLUG_NEVER; else - printk(KERN_WARNING "unrecognised option '%s' " + pr_warn("unrecognised option '%s' " "in parameter 'xen_emul_unplug'\n", p); } return 0; -- cgit From 436fc4feeabbf103d78d50a8e091b3aac28cc37f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 27 Aug 2021 08:36:06 +0200 Subject: s390: add kmemleak annotation in stack_alloc() kmemleak with enabled auto scanning reports that our stack allocation is lost. This is because we're saving the pointer + STACK_INIT_OFFSET to lowcore. When kmemleak now scans the objects, it thinks that this one is lost because it can't find a corresponding pointer. Reported-by: Marc Hartmayer Signed-off-by: Sven Schnelle Tested-by: Marc Hartmayer Signed-off-by: Heiko Carstens --- arch/s390/kernel/setup.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index fe14beb338e5..1b8316212512 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -356,9 +357,12 @@ void *restart_stack; unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK - return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, - THREADINFO_GFP, NUMA_NO_NODE, - __builtin_return_address(0)); + void *ret; + + ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(ret); + return (unsigned long)ret; #else return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif -- cgit From 15256194eff64f9a774b33b7817ea663e352394a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 27 Aug 2021 13:45:14 +0200 Subject: s390/entry: make oklabel within CHKSTG macro local Make the oklabel within the CHKSTG macro local. This makes sure that tools like objdump and the crash debugging tool still disassemble full functions where the macro has been used instead of stopping half way where such a global label is used and one has to guess how to disassemble the rest of such a function: E.g.: 0000000000cb0270 : cb0270: b2 05 03 20 stck 800 ... cb0354: a7 74 00 97 jne cb0482 0000000000cb0358 : cb0358: c0 e0 00 22 4e 8f larl %r14,10fa076 ... Fixes: d35925b34996 ("s390/mcck: move storage error checks to assembler") Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index b9716a7e326d..4c9b967290ae 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -140,10 +140,10 @@ _LPP_OFFSET = __LC_LPP TSTMSK __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR) jnz \errlabel TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD - jz oklabel\@ + jz .Loklabel\@ TSTMSK __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR jnz \errlabel -oklabel\@: +.Loklabel\@: .endm #if IS_ENABLED(CONFIG_KVM) -- cgit From 81912856e0fbc294deed595db568a7a00f962d95 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 28 Aug 2021 10:18:44 +0200 Subject: s390/configs: enable CONFIG_KFENCE in debug_defconfig Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 11ffc7c37ada..37b6115ed80e 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -804,6 +804,7 @@ CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y +CONFIG_KFENCE=y CONFIG_DEBUG_SHIRQ=y CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y -- cgit From 7851155a1a7c33f2270dcd979ab6532a89be5293 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 26 Aug 2021 22:59:44 +0930 Subject: openrisc/litex: Update uart address Recent litex socs will place the UART at 0xe0006800. Signed-off-by: Joel Stanley Signed-off-by: Stafford Horne --- arch/openrisc/boot/dts/or1klitex.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/boot/dts/or1klitex.dts b/arch/openrisc/boot/dts/or1klitex.dts index 3f9867aa3844..baba4f49fa6b 100644 --- a/arch/openrisc/boot/dts/or1klitex.dts +++ b/arch/openrisc/boot/dts/or1klitex.dts @@ -41,10 +41,10 @@ interrupt-controller; }; - serial0: serial@e0002000 { + serial0: serial@e0006800 { device_type = "serial"; compatible = "litex,liteuart"; - reg = <0xe0002000 0x100>; + reg = <0xe0006800 0x100>; }; soc_ctrl0: soc_controller@e0000000 { -- cgit From 978c791491bce8cc1a27cb50392a2e8bbcea79d4 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 26 Aug 2021 22:59:45 +0930 Subject: openrisc/litex: Add ethernet device Add the liteeth ethernet device. Signed-off-by: Joel Stanley Signed-off-by: Stafford Horne --- arch/openrisc/boot/dts/or1klitex.dts | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/openrisc/boot/dts/or1klitex.dts b/arch/openrisc/boot/dts/or1klitex.dts index baba4f49fa6b..91c7173c50e6 100644 --- a/arch/openrisc/boot/dts/or1klitex.dts +++ b/arch/openrisc/boot/dts/or1klitex.dts @@ -52,4 +52,13 @@ reg = <0xe0000000 0xc>; status = "okay"; }; + + ethernet@e0001000 { + compatible = "litex,liteeth"; + reg = <0xe0001000 0x7c>, + <0xe0001800 0x0a>, + <0x80000000 0x2000>; + reg-names = "mac", "mdio", "buffer"; + interrupts = <2>; + }; }; -- cgit From 1955d843efc3b5cf3ab4878986a87ad4972ff4e1 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 26 Aug 2021 22:59:46 +0930 Subject: openrisc/litex: Update defconfig Add the liteeth network device and basic network options, and update the options by doing a savedefconfig. Signed-off-by: Joel Stanley Signed-off-by: Stafford Horne --- arch/openrisc/configs/or1klitex_defconfig | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/configs/or1klitex_defconfig b/arch/openrisc/configs/or1klitex_defconfig index 3c2c70d3d740..d695879a4d26 100644 --- a/arch/openrisc/configs/or1klitex_defconfig +++ b/arch/openrisc/configs/or1klitex_defconfig @@ -1,18 +1,24 @@ CONFIG_BLK_DEV_INITRD=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y -CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y CONFIG_EMBEDDED=y +CONFIG_OPENRISC_BUILTIN_DTB="or1klitex" CONFIG_HZ_100=y -CONFIG_INITRAMFS_SOURCE="openrisc-rootfs.cpio.gz" +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_OF_OVERLAY=y -CONFIG_OPENRISC_BUILTIN_DTB="or1klitex" -CONFIG_PANIC_ON_OOPS=y -CONFIG_PRINTK_TIME=y -CONFIG_LITEX_SOC_CONTROLLER=y +CONFIG_NETDEVICES=y +CONFIG_LITEX_LITEETH=y CONFIG_SERIAL_LITEUART=y CONFIG_SERIAL_LITEUART_CONSOLE=y -CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_TTY_PRINTK=y +CONFIG_LITEX_SOC_CONTROLLER=y +CONFIG_TMPFS=y +CONFIG_PRINTK_TIME=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BUG_ON_DATA_CORRUPTION=y -- cgit From b14b8b1ed0e15b8f43fba9c25654278a31ee3c2f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 31 Aug 2021 23:51:51 +1000 Subject: powerpc/ptdump: Fix generic ptdump for 64-bit Since the conversion to generic ptdump we see crashes on 64-bit: BUG: Unable to handle kernel data access on read at 0xc0eeff7f00000000 Faulting instruction address: 0xc00000000045e5fc Oops: Kernel access of bad area, sig: 11 [#1] ... NIP __walk_page_range+0x2bc/0xce0 LR __walk_page_range+0x240/0xce0 Call Trace: __walk_page_range+0x240/0xce0 (unreliable) walk_page_range_novma+0x74/0xb0 ptdump_walk_pgd+0x98/0x170 ptdump_check_wx+0x88/0xd0 mark_rodata_ro+0x48/0x80 kernel_init+0x74/0x1a0 ret_from_kernel_thread+0x5c/0x64 What's happening is that have walked off the end of the kernel page tables, and started dereferencing junk values. That happens because we initialised the ptdump_range to span all the way up to 0xffffffffffffffff: static struct ptdump_range ptdump_range[] __ro_after_init = { {TASK_SIZE_MAX, ~0UL}, But the kernel page tables don't span that far. So on 64-bit set the end of the range to be the address immediately past the end of the kernel page tables, to limit the page table walk to valid addresses. Fixes: e084728393a5 ("powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP") Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210831135151.886620-1-mpe@ellerman.id.au --- arch/powerpc/mm/ptdump/ptdump.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2d80d775d15e..bf251191e78d 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -359,6 +359,8 @@ static int __init ptdump_init(void) ptdump_range[0].start = KERN_VIRT_START; else ptdump_range[0].start = PAGE_OFFSET; + + ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD); #endif populate_markers(); -- cgit From 4b92d4add5f6dcf21275185c997d6ecb800054cd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Aug 2021 13:48:34 +0200 Subject: drivers: base: cacheinfo: Get rid of DEFINE_SMP_CALL_CACHE_FUNCTION() DEFINE_SMP_CALL_CACHE_FUNCTION() was usefel before the CPU hotplug rework to ensure that the cache related functions are called on the upcoming CPU because the notifier itself could run on any online CPU. The hotplug state machine guarantees that the callbacks are invoked on the upcoming CPU. So there is no need to have this SMP function call obfuscation. That indirection was missed when the hotplug notifiers were converted. This also solves the problem of ARM64 init_cache_level() invoking ACPI functions which take a semaphore in that context. That's invalid as SMP function calls run with interrupts disabled. Running it just from the callback in context of the CPU hotplug thread solves this. Fixes: 8571890e1513 ("arm64: Add support for ACPI based firmware tables") Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Tested-by: Guenter Roeck Acked-by: Will Deacon Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/871r69ersb.ffs@tglx --- arch/arm64/kernel/cacheinfo.c | 7 ++----- arch/mips/kernel/cacheinfo.c | 7 ++----- arch/riscv/kernel/cacheinfo.c | 7 ++----- arch/x86/kernel/cpu/cacheinfo.c | 7 ++----- 4 files changed, 8 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 7fa6828bb488..587543c6c51c 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int level, idx; enum cache_type type; @@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu) } return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 53d8ea7d36e6..495dd058231d 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -17,7 +17,7 @@ do { \ leaf++; \ } while (0) -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -74,7 +74,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) cpumask_set_cpu(cpu1, cpu_map); } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -114,6 +114,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index d86781357044..90deabfe63ea 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -113,7 +113,7 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf, } } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct device_node *np = of_cpu_device_node_get(cpu); @@ -155,7 +155,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; @@ -187,6 +187,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index d66af2950e06..b5e36bd0425b 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->priv = base->nb; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) id4_regs->id = c->apicid >> index_msb; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) -- cgit From 58e636039b512697554b579c2bb23774061877f5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 25 Aug 2021 13:31:58 +0200 Subject: xen: remove stray preempt_disable() from PV AP startup code In cpu_bringup() there is a call of preempt_disable() without a paired preempt_enable(). This is not needed as interrupts are off initially. Additionally this will result in early boot messages like: BUG: scheduling while atomic: swapper/1/0/0x00000002 Signed-off-by: Juergen Gross Link: https://lore.kernel.org/r/20210825113158.11716-1-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/smp_pv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index c2ac319f11a4..96afadf9878e 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -64,7 +64,6 @@ static void cpu_bringup(void) cr4_init(); cpu_init(); touch_softlockup_watchdog(); - preempt_disable(); /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { -- cgit From e432fe97f3e5de325b40021e505cce53877586c5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 27 Aug 2021 22:51:06 +1000 Subject: powerpc/bug: Cast to unsigned long before passing to inline asm In commit 1e688dd2a3d6 ("powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto") we changed WARN_ON(). Previously it would take the warning condition, x, and double negate it before converting the result to int, and passing that int to the underlying inline asm. ie: #define WARN_ON(x) ({ int __ret_warn_on = !!(x); if (__builtin_constant_p(__ret_warn_on)) { ... } else { BUG_ENTRY(PPC_TLNEI " %4, 0", BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), "r" (__ret_warn_on)); The asm then does a full register width comparison with zero and traps if it is non-zero (PPC_TLNEI). The new code instead passes the full expression, x, with some arbitrary type, to the inline asm: #define WARN_ON(x) ({ ... do { if (__builtin_constant_p((x))) { ... } else { ... WARN_ENTRY(PPC_TLNEI " %4, 0", BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), __label_warn_on, "r" (x)); As reported[1] by Nathan, when building with clang this can cause spurious warnings to fire repeatedly at boot: WARNING: CPU: 0 PID: 1 at lib/klist.c:62 .klist_add_tail+0x3c/0x110 Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 5.14.0-rc7-next-20210825 #1 NIP: c0000000007ff81c LR: c00000000090a038 CTR: 0000000000000000 REGS: c0000000073c32a0 TRAP: 0700 Tainted: G W (5.14.0-rc7-next-20210825) MSR: 8000000002029032 CR: 22000a40 XER: 00000000 CFAR: c00000000090a034 IRQMASK: 0 GPR00: c00000000090a038 c0000000073c3540 c000000001be3200 0000000000000001 GPR04: c0000000072d65c0 0000000000000000 c0000000091ba798 c0000000091bb0a0 GPR08: 0000000000000001 0000000000000000 c000000008581918 fffffffffffffc00 GPR12: 0000000044000240 c000000001dd0000 c000000000012300 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 c0000000017e3200 0000000000000000 c000000001a0e778 GPR28: c0000000072d65b0 c0000000072d65a8 c000000007de72c8 c0000000073c35d0 NIP .klist_add_tail+0x3c/0x110 LR .bus_add_driver+0x148/0x290 Call Trace: 0xc0000000073c35d0 (unreliable) .bus_add_driver+0x148/0x290 .driver_register+0xb8/0x190 .__hid_register_driver+0x70/0xd0 .redragon_driver_init+0x34/0x58 .do_one_initcall+0x130/0x3b0 .do_initcall_level+0xd8/0x188 .do_initcalls+0x7c/0xdc .kernel_init_freeable+0x178/0x21c .kernel_init+0x34/0x220 .ret_from_kernel_thread+0x58/0x60 Instruction dump: fba10078 7c7d1b78 38600001 fb810070 3b9d0008 fbc10080 7c9e2378 389d0018 fb9d0008 fb9d0010 90640000 fbdd0000 <0b1e0000> e87e0018 28230000 41820024 The instruction dump shows that we are trapping because r30 is not zero: tdnei r30,0 Where r30 = c000000007de72c8 The WARN_ON() comes from: static void knode_set_klist(struct klist_node *knode, struct klist *klist) { knode->n_klist = klist; /* no knode deserves to start its life dead */ WARN_ON(knode_dead(knode)); ^^^^^^^^^^^^^^^^^ Where: #define KNODE_DEAD 1LU static bool knode_dead(struct klist_node *knode) { return (unsigned long)knode->n_klist & KNODE_DEAD; } The full disassembly shows that clang has not generated any code to apply the "& KNODE_DEAD" to the n_klist pointer, which is surprising. Nathan filed an LLVM bug [2], in which Eli Friedman explained that clang believes it is only passing a single bit to the asm (ie. a bool) and so the mask of bit 0 with 1 can be omitted, and suggested that if we want the full 64-bit value passed to the inline asm we should cast to a 64-bit type (or 32-bit on 32-bits). In fact we already do that for BUG_ENTRY(), which was added to fix a possibly similar bug in 2005 in commit 32818c2eb6b8 ("[PATCH] ppc64: Fix issue with gcc 4.0 compiled kernels"). So cast the value we pass to the inline asm to long. For GCC this appears to have no effect on code generation, other than causing sign extension in some cases. [1]: http://lore.kernel.org/r/YSa1O4fcX1nNKqN/@Ryzen-9-3900X.localdomain [2]: https://bugs.llvm.org/show_bug.cgi?id=51634 Fixes: 1e688dd2a3d6 ("powerpc/bug: Provide better flexibility to WARN_ON/__WARN_FLAGS() with asm goto") Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210901112522.1085134-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/bug.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 1ee0f22313ee..02c08d1492f8 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -119,7 +119,8 @@ __label_warn_on: \ \ WARN_ENTRY(PPC_TLNEI " %4, 0", \ BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \ - __label_warn_on, "r" (x)); \ + __label_warn_on, \ + "r" ((__force long)(x))); \ break; \ __label_warn_on: \ __ret_warn_on = true; \ -- cgit From 5f6e0fe01b6b33894cf6f61b359ab5a6d2b7674e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 10 Jun 2021 11:03:31 +0900 Subject: parisc: Fix compile failure when building 64-bit kernel natively Commit 23243c1ace9f ("arch: use cross_compiling to check whether it is a cross build or not") broke 64-bit parisc builds on 32-bit parisc systems. Helge mentioned: - 64-bit parisc userspace is not supported yet [1] - hppa gcc does not support "-m64" flag [2] That means, parisc developers working on a 32-bit parisc machine need to use hppa64-linux-gnu-gcc (cross compiler) for building the 64-bit parisc kernel. After the offending commit, gcc is used in such a case because both $(SRCARCH) and $(SUBARCH) are 'parisc', hence cross_compiling is unset. A correct way is to introduce ARCH=parisc64 because building the 64-bit parisc kernel on a 32-bit parisc system is not exactly a native build, but rather a semi-cross build. [1]: https://lore.kernel.org/linux-parisc/5dfd81eb-c8ca-b7f5-e80e-8632767c022d@gmx.de/#t [2]: https://lore.kernel.org/linux-parisc/89515325-fc21-31da-d238-6f7a9abbf9a0@gmx.de/ Fixes: 23243c1ace9f ("arch: use cross_compiling to check whether it is a cross build or not") Signed-off-by: Masahiro Yamada Reported-by: Meelis Roos Tested-by: Meelis Roos Cc: # v5.13+ Signed-off-by: Helge Deller --- arch/parisc/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 6c27940eff03..fcde3ffa0221 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -25,18 +25,18 @@ CHECKFLAGS += -D__hppa__=1 ifdef CONFIG_64BIT UTS_MACHINE := parisc64 CHECKFLAGS += -D__LP64__=1 -CC_ARCHES = hppa64 LD_BFD := elf64-hppa-linux else # 32-bit -CC_ARCHES = hppa hppa2.0 hppa1.1 LD_BFD := elf32-hppa-linux endif # select defconfig based on actual architecture -ifeq ($(shell uname -m),parisc64) +ifeq ($(ARCH),parisc64) KBUILD_DEFCONFIG := generic-64bit_defconfig + CC_ARCHES := hppa64 else KBUILD_DEFCONFIG := generic-32bit_defconfig + CC_ARCHES := hppa hppa2.0 hppa1.1 endif export LD_BFD -- cgit From 030f653078316a9cc9ca6bd1b0234dcf858be35d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 30 Aug 2021 05:42:27 -0400 Subject: parisc: fix crash with signals and alloca I was debugging some crashes on parisc and I found out that there is a crash possibility if a function using alloca is interrupted by a signal. The reason for the crash is that the gcc alloca implementation leaves garbage in the upper 32 bits of the sp register. This normally doesn't matter (the upper bits are ignored because the PSW W-bit is clear), however the signal delivery routine in the kernel uses full 64 bits of sp and it fails with -EFAULT if the upper 32 bits are not zero. I created this program that demonstrates the problem: #include #include #include #include static __attribute__((noinline,noclone)) void aa(int *size) { void * volatile p = alloca(-*size); while (1) ; } static void handler(int sig) { write(1, "signal delivered\n", 17); _exit(0); } int main(void) { int size = -0x100; signal(SIGALRM, handler); alarm(1); aa(&size); } If you compile it with optimizations, it will crash. The "aa" function has this disassembly: 000106a0 : 106a0: 08 03 02 41 copy r3,r1 106a4: 08 1e 02 43 copy sp,r3 106a8: 6f c1 00 80 stw,ma r1,40(sp) 106ac: 37 dc 3f c1 ldo -20(sp),ret0 106b0: 0c 7c 12 90 stw ret0,8(r3) 106b4: 0f 40 10 9c ldw 0(r26),ret0 ; ret0 = 0x00000000FFFFFF00 106b8: 97 9c 00 7e subi 3f,ret0,ret0 ; ret0 = 0xFFFFFFFF0000013F 106bc: d7 80 1c 1a depwi 0,31,6,ret0 ; ret0 = 0xFFFFFFFF00000100 106c0: 0b 9e 0a 1e add,l sp,ret0,sp ; sp = 0xFFFFFFFFxxxxxxxx 106c4: e8 1f 1f f7 b,l,n 106c4 ,r0 This patch fixes the bug by truncating the "usp" variable to 32 bits. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- arch/parisc/kernel/signal.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index fb1e94a3982b..db1a47cf424d 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -237,6 +237,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif usp = (regs->gr[30] & ~(0x01UL)); +#ifdef CONFIG_64BIT + if (is_compat_task()) { + /* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */ + usp = (compat_uint_t)usp; + } +#endif /*FIXME: frame_size parameter is unused, remove it. */ frame = get_sigframe(&ksig->ka, usp, sizeof(*frame)); -- cgit From 6f1fce595b78b775d7fb585c15c2dc3a6994f96e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 1 Sep 2021 22:18:18 +0200 Subject: parisc: math-emu: Fix fall-through warnings Fix lots of fallthrough warnings, e.g.: arch/parisc/math-emu/fpudispatch.c:323:33: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Helge Deller --- arch/parisc/math-emu/fpudispatch.c | 56 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c index 7c46969ead9b..01ed133227c2 100644 --- a/arch/parisc/math-emu/fpudispatch.c +++ b/arch/parisc/math-emu/fpudispatch.c @@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: /* illegal */ @@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and clear sign bit */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: /* illegal */ @@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and invert sign bit */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: /* illegal */ @@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and set sign bit */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 5: /* FRND */ switch (fmt) { case 0: @@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(MAJOR_0C_EXCP); } } /* end of switch (subop) */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ if ((df & 2) || (fmt & 2)) { @@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* dbl/dbl */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ - + BUG(); case 2: /* class 2 */ fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int); @@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FTEST */ switch (fmt) { case 0: @@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: return(MAJOR_0C_EXCP); } + BUG(); } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int); if (r2 == 0) @@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 2: /* FMPY */ switch (fmt) { case 0: @@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: @@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); } /* end of class 3 switch */ } /* end of switch(class) */ @@ -736,10 +764,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: @@ -747,10 +777,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: @@ -758,10 +790,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: @@ -769,10 +803,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -785,6 +821,7 @@ u_int fpregs[]; case 3: return(MAJOR_0E_EXCP); } + BUG(); case 5: /* FRMD */ switch (fmt) { case 0: @@ -798,7 +835,7 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); } } /* end of switch (subop */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ /* @@ -826,6 +863,7 @@ u_int fpregs[]; case 3: /* dbl/dbl */ return(MAJOR_0E_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -841,6 +879,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -856,6 +895,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -871,6 +911,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -886,6 +927,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -901,6 +943,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -916,9 +959,11 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ + BUG(); case 2: /* class 2 */ /* * Be careful out there. @@ -994,6 +1039,7 @@ u_int fpregs[]; } } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ /* * Be careful out there. @@ -1026,6 +1072,7 @@ u_int fpregs[]; return(dbl_fadd(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -1035,6 +1082,7 @@ u_int fpregs[]; return(dbl_fsub(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 2: /* FMPY or XMPYU */ /* * check for integer multiply (x bit set) @@ -1071,6 +1119,7 @@ u_int fpregs[]; &fpregs[r2],&fpregs[t],status)); } } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -1080,6 +1129,7 @@ u_int fpregs[]; return(dbl_fdiv(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: -- cgit From bea6a94a279bcbe6b2cde348782b28baf12255a5 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Thu, 2 Sep 2021 09:19:51 +0200 Subject: MIPS: Malta: fix alignment of the devicetree buffer Starting with following patch MIPS Malta is not able to boot: | commit 79edff12060fe7772af08607eff50c0e2486c5ba | Author: Rob Herring | scripts/dtc: Update to upstream version v1.6.0-51-g183df9e9c2b9 The reason is the alignment test added to the fdt_ro_probe_(). To fix this issue, we need to make sure that fdt_buf is aligned. Since the dtc patch was designed to uncover potential issue, I handle initial MIPS Malta patch as initial bug. Fixes: e81a8c7dabac ("MIPS: Malta: Setup RAM regions via DT") Signed-off-by: Oleksij Rempel Signed-off-by: Thomas Bogendoerfer --- arch/mips/mti-malta/malta-dtshim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c index 0ddf03df6268..f451268f6c38 100644 --- a/arch/mips/mti-malta/malta-dtshim.c +++ b/arch/mips/mti-malta/malta-dtshim.c @@ -22,7 +22,7 @@ #define ROCIT_CONFIG_GEN1_MEMMAP_SHIFT 8 #define ROCIT_CONFIG_GEN1_MEMMAP_MASK (0xf << 8) -static unsigned char fdt_buf[16 << 10] __initdata; +static unsigned char fdt_buf[16 << 10] __initdata __aligned(8); /* determined physical memory size, not overridden by command line args */ extern unsigned long physical_memsize; -- cgit From 0da14a19493da0f9b4c5ee5930ab05a4c61f5883 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 22 Aug 2021 20:49:20 +0200 Subject: x86/PCI: sta2x11: switch from 'pci_' to 'dma_' API The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script below. It has been hand modified to use 'dma_set_mask_and_coherent()' instead of 'pci_set_dma_mask()/pci_set_consistent_dma_mask()' when applicable. This is less verbose. It has been compile tested. @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Link: https://lore.kernel.org/r/99656452963ba3c63a6cb12e151279d81da365eb.1629658069.git.christophe.jaillet@wanadoo.fr Link: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ Signed-off-by: Christophe JAILLET Signed-off-by: Bjorn Helgaas --- arch/x86/pci/sta2x11-fixup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 7d2525691854..101081ad64b6 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -146,8 +146,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev) dev_err(dev, "sta2x11: could not set DMA offset\n"); dev->bus_dma_limit = max_amba_addr; - pci_set_consistent_dma_mask(pdev, max_amba_addr); - pci_set_dma_mask(pdev, max_amba_addr); + dma_set_mask_and_coherent(&pdev->dev, max_amba_addr); /* Configure AHB mapping */ pci_write_config_dword(pdev, AHB_PEXLBASE(0), 0); -- cgit From e5a2cac908df691f1637f9272d4c6dec83239611 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 2 Sep 2021 23:54:03 +0200 Subject: parisc: Drop __arch_swab16(), arch_swab24(), _arch_swab32() and __arch_swab64() functions No need to keep those as inline assembly functions, the compiler now generates the same or even better optimized code. Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/swab.h | 68 ------------------------------------- 1 file changed, 68 deletions(-) delete mode 100644 arch/parisc/include/uapi/asm/swab.h (limited to 'arch') diff --git a/arch/parisc/include/uapi/asm/swab.h b/arch/parisc/include/uapi/asm/swab.h deleted file mode 100644 index 35fb2d1bfbbd..000000000000 --- a/arch/parisc/include/uapi/asm/swab.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _PARISC_SWAB_H -#define _PARISC_SWAB_H - -#include -#include -#include - -#define __SWAB_64_THRU_32__ - -static inline __attribute_const__ __u16 __arch_swab16(__u16 x) -{ - __asm__("dep %0, 15, 8, %0\n\t" /* deposit 00ab -> 0bab */ - "shd %%r0, %0, 8, %0" /* shift 000000ab -> 00ba */ - : "=r" (x) - : "0" (x)); - return x; -} -#define __arch_swab16 __arch_swab16 - -static inline __attribute_const__ __u32 __arch_swab24(__u32 x) -{ - __asm__("shd %0, %0, 8, %0\n\t" /* shift xabcxabc -> cxab */ - "dep %0, 15, 8, %0\n\t" /* deposit cxab -> cbab */ - "shd %%r0, %0, 8, %0" /* shift 0000cbab -> 0cba */ - : "=r" (x) - : "0" (x)); - return x; -} - -static inline __attribute_const__ __u32 __arch_swab32(__u32 x) -{ - unsigned int temp; - __asm__("shd %0, %0, 16, %1\n\t" /* shift abcdabcd -> cdab */ - "dep %1, 15, 8, %1\n\t" /* deposit cdab -> cbab */ - "shd %0, %1, 8, %0" /* shift abcdcbab -> dcba */ - : "=r" (x), "=&r" (temp) - : "0" (x)); - return x; -} -#define __arch_swab32 __arch_swab32 - -#if __BITS_PER_LONG > 32 -/* -** From "PA-RISC 2.0 Architecture", HP Professional Books. -** See Appendix I page 8 , "Endian Byte Swapping". -** -** Pretty cool algorithm: (* == zero'd bits) -** PERMH 01234567 -> 67452301 into %0 -** HSHL 67452301 -> 7*5*3*1* into %1 -** HSHR 67452301 -> *6*4*2*0 into %0 -** OR %0 | %1 -> 76543210 into %0 (all done!) -*/ -static inline __attribute_const__ __u64 __arch_swab64(__u64 x) -{ - __u64 temp; - __asm__("permh,3210 %0, %0\n\t" - "hshl %0, 8, %1\n\t" - "hshr,u %0, 8, %0\n\t" - "or %1, %0, %0" - : "=r" (x), "=&r" (temp) - : "0" (x)); - return x; -} -#define __arch_swab64 __arch_swab64 -#endif /* __BITS_PER_LONG > 32 */ - -#endif /* _PARISC_SWAB_H */ -- cgit From 55a6d00ed0c1b4d20d7966d00fda6848d776658d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 19 Aug 2021 10:22:51 +0900 Subject: x86/build/vdso: fix missing FORCE for *.so build rule Add FORCE so that if_changed can detect the command line change. Signed-off-by: Masahiro Yamada --- arch/x86/entry/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 05c4abc2fdfd..a2dddcc189f6 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -131,7 +131,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE targets += vdsox32.lds $(vobjx32s-y) $(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table -$(obj)/%.so: $(obj)/%.so.dbg +$(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE -- cgit From 25c648a066c1cc5ed763f2354531fdff41ac83a1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 Apr 2021 07:34:17 +0200 Subject: kbuild: sh: remove unused install script The sh arch has a install.sh script, but no Makefile actually calls it. Remove it to keep anyone from accidentally calling it in the future. Signed-off-by: Greg Kroah-Hartman Reviewed-by: Kees Cook Signed-off-by: Masahiro Yamada --- arch/sh/boot/compressed/install.sh | 56 -------------------------------------- 1 file changed, 56 deletions(-) delete mode 100644 arch/sh/boot/compressed/install.sh (limited to 'arch') diff --git a/arch/sh/boot/compressed/install.sh b/arch/sh/boot/compressed/install.sh deleted file mode 100644 index f9f41818b17e..000000000000 --- a/arch/sh/boot/compressed/install.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh -# -# arch/sh/boot/install.sh -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1995 by Linus Torvalds -# -# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin -# Adapted from code in arch/i386/boot/install.sh by Russell King -# Adapted from code in arch/arm/boot/install.sh by Stuart Menefy -# -# "make install" script for sh architecture -# -# Arguments: -# $1 - kernel version -# $2 - kernel image file -# $3 - kernel map file -# $4 - default install path (blank if root directory) -# - -# User may have a custom install script - -if [ -x /sbin/${INSTALLKERNEL} ]; then - exec /sbin/${INSTALLKERNEL} "$@" -fi - -if [ "$2" = "zImage" ]; then -# Compressed install - echo "Installing compressed kernel" - if [ -f $4/vmlinuz-$1 ]; then - mv $4/vmlinuz-$1 $4/vmlinuz.old - fi - - if [ -f $4/System.map-$1 ]; then - mv $4/System.map-$1 $4/System.old - fi - - cat $2 > $4/vmlinuz-$1 - cp $3 $4/System.map-$1 -else -# Normal install - echo "Installing normal kernel" - if [ -f $4/vmlinux-$1 ]; then - mv $4/vmlinux-$1 $4/vmlinux.old - fi - - if [ -f $4/System.map ]; then - mv $4/System.map $4/System.old - fi - - cat $2 > $4/vmlinux-$1 - cp $3 $4/System.map -fi -- cgit From 87c3cb564f3e34626f1afc0286b864045559b403 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 23:24:40 +0900 Subject: sparc: move the install rule to arch/sparc/Makefile Currently, the install target in arch/sparc/Makefile descends into arch/sparc/boot/Makefile to invoke the shell script, but there is no good reason to do so. arch/sparc/Makefile can run the shell script directly. Signed-off-by: Masahiro Yamada --- arch/sparc/Makefile | 3 ++- arch/sparc/boot/Makefile | 4 ---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 4e65245bc755..24fb5a99f439 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -72,7 +72,8 @@ image zImage uImage tftpboot.img vmlinux.aout: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ install: - $(Q)$(MAKE) $(build)=$(boot) $@ + sh $(srctree)/$(boot)/install.sh $(KERNELRELEASE) $(KBUILD_IMAGE) \ + System.map "$(INSTALL_PATH)" archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile index 380e2b018992..849236d4eca4 100644 --- a/arch/sparc/boot/Makefile +++ b/arch/sparc/boot/Makefile @@ -70,7 +70,3 @@ $(obj)/image: vmlinux FORCE $(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback System.map $(ROOT_IMG) FORCE $(call if_changed,elftoaout) $(call if_changed,piggy) - -install: - sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/zImage \ - System.map "$(INSTALL_PATH)" -- cgit From ba3e87cfa2a0f2db889297b6fc8a9e91a35c2d21 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:43:12 +0900 Subject: ia64: move core-y in arch/ia64/Makefile to arch/ia64/Kbuild Use obj-y to clean up Makefile. Signed-off-by: Masahiro Yamada --- arch/ia64/Kbuild | 2 ++ arch/ia64/Makefile | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/Kbuild b/arch/ia64/Kbuild index a4e40e534e6a..e77cc76d228c 100644 --- a/arch/ia64/Kbuild +++ b/arch/ia64/Kbuild @@ -1 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ mm/ +obj-$(CONFIG_IA64_SGI_UV) += uv/ diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 467b7e7f967c..7e548c654a29 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -47,8 +47,6 @@ KBUILD_CFLAGS += $(cflags-y) head-y := arch/ia64/kernel/head.o libs-y += arch/ia64/lib/ -core-y += arch/ia64/kernel/ arch/ia64/mm/ -core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ drivers-y += arch/ia64/pci/ arch/ia64/hp/common/ -- cgit From ff00f64bceb164267dccc3648eb299aeafd3a342 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 16 Aug 2021 17:21:04 -0700 Subject: s390: replace cc-option-yn uses with cc-option cc-option-yn can be replaced with cc-option. ie. Checking for support: ifeq ($(call cc-option-yn,$(FLAG)),y) becomes: ifneq ($(call cc-option,$(FLAG)),) Checking for lack of support: ifeq ($(call cc-option-yn,$(FLAG)),n) becomes: ifeq ($(call cc-option,$(FLAG)),) This allows us to pursue removing cc-option-yn. Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Heiko Carstens Signed-off-by: Masahiro Yamada --- arch/s390/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 1e3172877982..f5c399851d70 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -70,7 +70,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y) +ifneq ($(call cc-option,-mpacked-stack -mbackchain -msoft-float),) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif @@ -78,22 +78,22 @@ endif KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) -ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) +ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) -ifneq ($(call cc-option-yn,-mstack-size=8192),y) +ifeq ($(call cc-option,-mstack-size=8192),) cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif ifdef CONFIG_WARN_DYNAMIC_STACK - ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) + ifneq ($(call cc-option,-mwarn-dynamicstack),) KBUILD_CFLAGS += -mwarn-dynamicstack KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack endif endif ifdef CONFIG_EXPOLINE - ifeq ($(call cc-option-yn,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),y) + ifneq ($(call cc-option,$(CC_FLAGS_MARCH) -mindirect-branch=thunk),) CC_FLAGS_EXPOLINE := -mindirect-branch=thunk CC_FLAGS_EXPOLINE += -mfunction-return=thunk CC_FLAGS_EXPOLINE += -mindirect-branch-table @@ -104,10 +104,10 @@ ifdef CONFIG_EXPOLINE endif ifdef CONFIG_FUNCTION_TRACER - ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n) + ifeq ($(call cc-option,-mfentry -mnop-mcount),) # make use of hotpatch feature if the compiler supports it cc_hotpatch := -mhotpatch=0,3 - ifeq ($(call cc-option-yn,$(cc_hotpatch)),y) + ifneq ($(call cc-option,$(cc_hotpatch)),) CC_FLAGS_FTRACE := $(cc_hotpatch) KBUILD_AFLAGS += -DCC_USING_HOTPATCH KBUILD_CFLAGS += -DCC_USING_HOTPATCH -- cgit From 43e6b58f793c47b0a6772a112e6586cd1e24b239 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 16 Aug 2021 17:21:06 -0700 Subject: arc: replace cc-option-yn uses with cc-option cc-option-yn can be replaced with cc-option. ie. Checking for support: ifeq ($(call cc-option-yn,$(FLAG)),y) becomes: ifneq ($(call cc-option,$(FLAG)),) Checking for lack of support: ifeq ($(call cc-option-yn,$(FLAG)),n) becomes: ifeq ($(call cc-option,$(FLAG)),) This allows us to pursue removing cc-option-yn. Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- arch/arc/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index c0d87ac2e221..8782a03f24a8 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -18,8 +18,7 @@ ifeq ($(CONFIG_ARC_TUNE_MCPU),"") cflags-y += $(tune-mcpu-def-y) else tune-mcpu := $(shell echo $(CONFIG_ARC_TUNE_MCPU)) -tune-mcpu-ok := $(call cc-option-yn, $(tune-mcpu)) -ifeq ($(tune-mcpu-ok),y) +ifneq ($(call cc-option,$(tune-mcpu)),) cflags-y += $(tune-mcpu) else # The flag provided by 'CONFIG_ARC_TUNE_MCPU' option isn't known by this compiler -- cgit From 7ab44e9ee5f241c0ed19e350d17e80bd90bde93d Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 16 Aug 2021 17:21:07 -0700 Subject: x86: remove cc-option-yn test for -mtune= As noted in the comment, -mtune= has been supported since GCC 3.4. The minimum required version of GCC to build the kernel (as specified in Documentation/process/changes.rst) is GCC 4.9. tune is not immediately expanded. Instead it defines a macro that will test via cc-option later values for -mtune=. But we can skip the test whether to use -mtune= vs. -mcpu=. Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Signed-off-by: Masahiro Yamada --- arch/x86/Makefile_32.cpu | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index cd3056759880..e7355f8b51c2 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -2,13 +2,7 @@ # CPU tuning section - shared with UML. # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. -#-mtune exists since gcc 3.4 -HAS_MTUNE := $(call cc-option-yn, -mtune=i386) -ifeq ($(HAS_MTUNE),y) tune = $(call cc-option,-mtune=$(1),$(2)) -else -tune = $(call cc-option,-mcpu=$(1),$(2)) -endif cflags-$(CONFIG_M486SX) += -march=i486 cflags-$(CONFIG_M486) += -march=i486 -- cgit From c42813b71a06a2ff4a155aa87ac609feeab76cf3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 2 Sep 2021 23:24:42 +0200 Subject: parisc: Fix unaligned-access crash in bootloader Kernel v5.14 has various changes to optimize unaligned memory accesses, e.g. commit 0652035a5794 ("asm-generic: unaligned: remove byteshift helpers"). Those changes triggered an unalignment-exception and thus crashed the bootloader on parisc because the unaligned "output_len" variable now suddenly was read word-wise while it was read byte-wise in the past. Fix this issue by declaring the external output_len variable as char which then forces the compiler to generate byte-accesses. Signed-off-by: Helge Deller Cc: Arnd Bergmann Cc: John David Anglin Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102162 Fixes: 8c031ba63f8f ("parisc: Unbreak bootloader due to gcc-7 optimizations") Fixes: 0652035a5794 ("asm-generic: unaligned: remove byteshift helpers") Cc: # v5.14+ --- arch/parisc/boot/compressed/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 2d395998f524..7ee49f5881d1 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -26,7 +26,7 @@ extern char input_data[]; extern int input_len; /* output_len is inserted by the linker possibly at an unaligned address */ -extern __le32 output_len __aligned(1); +extern char output_len; extern char _text, _end; extern char _bss, _ebss; extern char _startcode_end; -- cgit From 42be8b42535183f84df99acbaf799e38724348f3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 22 Apr 2021 12:53:00 +0200 Subject: binfmt: don't use MAP_DENYWRITE when loading shared libraries via uselib() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit uselib() is the legacy systemcall for loading shared libraries. Nowadays, applications use dlopen() to load shared libraries, completely implemented in user space via mmap(). For example, glibc uses MAP_COPY to mmap shared libraries. While this maps to MAP_PRIVATE | MAP_DENYWRITE on Linux, Linux ignores any MAP_DENYWRITE specification from user space in mmap. With this change, all remaining in-tree users of MAP_DENYWRITE use it to map an executable. We will be able to open shared libraries loaded via uselib() writable, just as we already can via dlopen() from user space. This is one step into the direction of removing MAP_DENYWRITE from the kernel. This can be considered a minor user space visible change. Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- arch/x86/ia32/ia32_aout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 5e5b9fc2747f..321d7b22ad2d 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -293,7 +293,7 @@ static int load_aout_library(struct file *file) /* Now use mmap to map the library into memory. */ error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, + MAP_FIXED | MAP_PRIVATE | MAP_32BIT, N_TXTOFF(ex)); retval = error; if (error != start_addr) -- cgit From 4589ff7ca81516381393649dec8dd4948884b2b2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Apr 2021 09:42:41 +0200 Subject: binfmt: remove in-tree usage of MAP_DENYWRITE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At exec time when we mmap the new executable via MAP_DENYWRITE we have it opened via do_open_execat() and already deny_write_access()'ed the file successfully. Once exec completes, we allow_write_acces(); however, we set mm->exe_file in begin_new_exec() via set_mm_exe_file() and also deny_write_access() as long as mm->exe_file remains set. We'll effectively deny write access to our executable via mm->exe_file until mm->exe_file is changed -- when the process is removed, on new exec, or via sys_prctl(PR_SET_MM_MAP/EXE_FILE). Let's remove all usage of MAP_DENYWRITE, it's no longer necessary for mm->exe_file. In case of an elf interpreter, we'll now only deny write access to the file during exec. This is somewhat okay, because the interpreter behaves (and sometime is) a shared library; all shared libraries, especially the ones loaded directly in user space like via dlopen() won't ever be mapped via MAP_DENYWRITE, because we ignore that from user space completely; these shared libraries can always be modified while mapped and executed. Let's only special-case the main executable, denying write access while being executed by a process. This can be considered a minor user space visible change. While this is a cleanup, it also fixes part of a problem reported with VM_DENYWRITE on overlayfs, as VM_DENYWRITE is effectively unused with this patch and will be removed next: "Overlayfs did not honor positive i_writecount on realfile for VM_DENYWRITE mappings." [1] [1] https://lore.kernel.org/r/YNHXzBgzRrZu1MrD@miu.piliscsaba.redhat.com/ Reported-by: Chengguang Xu Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- arch/x86/ia32/ia32_aout.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 321d7b22ad2d..9bd15241fadb 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -202,8 +202,7 @@ static int load_aout_binary(struct linux_binprm *bprm) error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | - MAP_32BIT, + MAP_FIXED | MAP_PRIVATE | MAP_32BIT, fd_offset); if (error != N_TXTADDR(ex)) @@ -211,8 +210,7 @@ static int load_aout_binary(struct linux_binprm *bprm) error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | - MAP_32BIT, + MAP_FIXED | MAP_PRIVATE | MAP_32BIT, fd_offset + ex.a_text); if (error != N_DATADDR(ex)) return error; -- cgit From 577706de69c1e53bc23893953770d829a2242c38 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 2 Sep 2021 14:49:58 -0700 Subject: ia64: fix typo in a comment s/when when/when/ Link: https://lkml.kernel.org/r/20210817112500.12848-1-wangborong@cdjrlc.com Signed-off-by: Jason Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index e2af6b172200..96d13cb7c19f 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -906,6 +906,6 @@ EXPORT_SYMBOL(acpi_unregister_ioapic); /* * acpi_suspend_lowlevel() - save kernel state and suspend. * - * TBD when when IA64 starts to support suspend... + * TBD when IA64 starts to support suspend... */ int acpi_suspend_lowlevel(void) { return 0; } -- cgit From 1d1f4bf845d36d73b27ed37790cae0da3112ca77 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 2 Sep 2021 14:50:01 -0700 Subject: ia64: fix #endif comment for reserve_elfcorehdr() Patch series "ia64: Miscellaneous fixes and cleanups". This patch series contains some miscellaneous fixes and cleanups for ia64. The second patch fixes a naming conflict triggered by a patch for the FDT code. This patch (of 3): The definition of reserve_elfcorehdr() depends on CONFIG_CRASH_DUMP, not CONFIG_PROC_VMCORE. Link: https://lkml.kernel.org/r/cover.1629884459.git.geert+renesas@glider.be Link: https://lkml.kernel.org/r/77b4c0648f200cab7e1c2c5171c06763e09362aa.1629884459.git.geert+renesas@glider.be Fixes: d9a9855d0b06ca6d ("always reserve elfcore header memory in crash kernel") Fixes: 17c1f07ed70afa4f ("[IA64] Reserve elfcorehdr memory in CONFIG_CRASH_DUMP") Signed-off-by: Geert Uytterhoeven Cc: Simon Horman Cc: Tony Luck Cc: Jay Lan Cc: Magnus Damm Cc: Mike Rapoport Cc: Rob Herring Cc: Frank Rowand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index dd595fbd8006..fbd931f1eb27 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -546,7 +546,7 @@ int __init reserve_elfcorehdr(u64 *start, u64 *end) return 0; } -#endif /* CONFIG_PROC_VMCORE */ +#endif /* CONFIG_CRASH_DUMP */ void __init setup_arch (char **cmdline_p) -- cgit From 70b2e9912a018e9fddb3a1e0cbb397d4d3c0e98f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 2 Sep 2021 14:50:04 -0700 Subject: ia64: make reserve_elfcorehdr() static There never was a reason for reserve_elfcorehdr() to be global. Make the function static, and move it before its sole caller. Link: https://lkml.kernel.org/r/fe236cd73b64abc4abd03dd808cb015c907f4c8c.1629884459.git.geert+renesas@glider.be Fixes: cee87af2a5f75713 ("[IA64] kexec: Use EFI_LOADER_DATA for ELF core header") Signed-off-by: Geert Uytterhoeven Cc: Frank Rowand Cc: Jay Lan Cc: Magnus Damm Cc: Mike Rapoport Cc: Rob Herring Cc: Simon Horman Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/meminit.h | 1 - arch/ia64/kernel/setup.c | 51 ++++++++++++++++++++--------------------- 2 files changed, 25 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h index 6c47a239fc26..2738f62b5f98 100644 --- a/arch/ia64/include/asm/meminit.h +++ b/arch/ia64/include/asm/meminit.h @@ -40,7 +40,6 @@ extern unsigned long efi_memmap_init(u64 *s, u64 *e); extern int find_max_min_low_pfn (u64, u64, void *); extern unsigned long vmcore_find_descriptor_size(unsigned long address); -extern int reserve_elfcorehdr(u64 *start, u64 *end); /* * For rounding an address to the next IA64_GRANULE_SIZE or order diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index fbd931f1eb27..5e6ee8939092 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -325,6 +325,31 @@ static inline void __init setup_crashkernel(unsigned long total, int *n) {} #endif +#ifdef CONFIG_CRASH_DUMP +static int __init reserve_elfcorehdr(u64 *start, u64 *end) +{ + u64 length; + + /* We get the address using the kernel command line, + * but the size is extracted from the EFI tables. + * Both address and size are required for reservation + * to work properly. + */ + + if (!is_vmcore_usable()) + return -EINVAL; + + if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { + vmcore_unusable(); + return -EINVAL; + } + + *start = (unsigned long)__va(elfcorehdr_addr); + *end = *start + length; + return 0; +} +#endif /* CONFIG_CRASH_DUMP */ + /** * reserve_memory - setup reserved memory areas * @@ -522,32 +547,6 @@ static __init int setup_nomca(char *s) } early_param("nomca", setup_nomca); -#ifdef CONFIG_CRASH_DUMP -int __init reserve_elfcorehdr(u64 *start, u64 *end) -{ - u64 length; - - /* We get the address using the kernel command line, - * but the size is extracted from the EFI tables. - * Both address and size are required for reservation - * to work properly. - */ - - if (!is_vmcore_usable()) - return -EINVAL; - - if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { - vmcore_unusable(); - return -EINVAL; - } - - *start = (unsigned long)__va(elfcorehdr_addr); - *end = *start + length; - return 0; -} - -#endif /* CONFIG_CRASH_DUMP */ - void __init setup_arch (char **cmdline_p) { -- cgit From 7e4265c88968d4e53b164944c05e12e79d8ff9c6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 2 Sep 2021 14:50:08 -0700 Subject: ia64: make num_rsvd_regions static Commit f62800992e5917f2 ("ia64: switch to NO_BOOTMEM") removed the last user of num_rsvd_regions outside arch/ia64/kernel/setup.c. Link: https://lkml.kernel.org/r/a377b5437e3e9da93d02f996fe06a2b956cb0990.1629884459.git.geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Cc: Frank Rowand Cc: Jay Lan Cc: Magnus Damm Cc: Mike Rapoport Cc: Rob Herring Cc: Simon Horman Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/meminit.h | 1 - arch/ia64/kernel/setup.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h index 2738f62b5f98..f1d5bf2ba847 100644 --- a/arch/ia64/include/asm/meminit.h +++ b/arch/ia64/include/asm/meminit.h @@ -29,7 +29,6 @@ struct rsvd_region { }; extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; -extern int num_rsvd_regions; extern void find_memory (void); extern void reserve_memory (void); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 5e6ee8939092..31fb84de2d21 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -131,7 +131,7 @@ unsigned long ia64_cache_stride_shift = ~0; * We use a special marker for the end of memory and it uses the extra (+1) slot */ struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; -int num_rsvd_regions __initdata; +static int num_rsvd_regions __initdata; /* -- cgit From 4bdffd2708d65e68ff254d90793bb167d828219f Mon Sep 17 00:00:00 2001 From: kernel test robot Date: Thu, 2 Sep 2021 14:50:20 -0700 Subject: arch/csky/kernel/probes/kprobes.c: fix bugon.cocci warnings Use BUG_ON instead of a if condition followed by BUG. Generated by: scripts/coccinelle/misc/bugon.cocci Link: https://lkml.kernel.org/r/alpine.DEB.2.22.394.2107061049150.7197@hadrien Fixes: 7d37cb2c912d ("lib: fix kconfig dependency on ARCH_WANT_FRAME_POINTERS") Signed-off-by: kernel test robot Signed-off-by: Julia Lawall Reported-by: kernel test robot Cc: Julian Braha Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/csky/kernel/probes/kprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c index 68b22b499aeb..8fffa34d4e1c 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c @@ -283,8 +283,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) * normal page fault. */ regs->pc = (unsigned long) cur->addr; - if (!instruction_pointer(regs)) - BUG(); + BUG_ON(!instruction_pointer(regs)); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); -- cgit From 9857a17f206ff374aea78bccfb687f145368be2e Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 2 Sep 2021 14:53:54 -0700 Subject: mm/gup: remove try_get_page(), call try_get_compound_head() directly try_get_page() is very similar to try_get_compound_head(), and in fact try_get_page() has fallen a little behind in terms of maintenance: try_get_compound_head() handles speculative page references more thoroughly. There are only two try_get_page() callsites, so just call try_get_compound_head() directly from those, and remove try_get_page() entirely. Also, seeing as how this changes try_get_compound_head() into a non-static function, provide some kerneldoc documentation for it. Link: https://lkml.kernel.org/r/20210813044133.1536842-4-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e33c43b38afe..81d760749987 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -817,7 +817,7 @@ void do_secure_storage_access(struct pt_regs *regs) break; case KERNEL_FAULT: page = phys_to_page(addr); - if (unlikely(!try_get_page(page))) + if (unlikely(!try_get_compound_head(page, 1))) break; rc = arch_make_page_accessible(page); put_page(page); -- cgit From ec403e2ae0dfc85996aad6e944a98a16e6dfcc6d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 2 Sep 2021 14:55:43 -0700 Subject: memcg: enable accounting for ldt_struct objects Each task can request own LDT and force the kernel to allocate up to 64Kb memory per-mm. There are legitimate workloads with hundreds of processes and there can be hundreds of workloads running on large machines. The unaccounted memory can cause isolation issues between the workloads particularly on highly utilized machines. It makes sense to account for this objects to restrict the host's memory consumption from inside the memcg-limited container. Link: https://lkml.kernel.org/r/38010594-50fe-c06d-7cb0-d1f77ca422f3@virtuozzo.com Signed-off-by: Vasily Averin Acked-by: Borislav Petkov Reviewed-by: Shakeel Butt Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrei Vagin Cc: Borislav Petkov Cc: Christian Brauner Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "J. Bruce Fields" Cc: Jeff Layton Cc: Jens Axboe Cc: Jiri Slaby Cc: Johannes Weiner Cc: Kirill Tkhai Cc: Michal Hocko Cc: Oleg Nesterov Cc: Roman Gushchin Cc: Serge Hallyn Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Yutian Yang Cc: Zefan Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/ldt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index aa15132228da..525876e7b9f4 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -154,7 +154,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) if (num_entries > LDT_ENTRIES) return NULL; - new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); + new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT); if (!new_ldt) return NULL; @@ -168,9 +168,9 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) * than PAGE_SIZE. */ if (alloc_size > PAGE_SIZE) - new_ldt->entries = vzalloc(alloc_size); + new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); else - new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!new_ldt->entries) { kfree(new_ldt); -- cgit From f358afc52c3066f4e8cd7b3a2d75b31e822519e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Sep 2021 14:56:36 -0700 Subject: mm: remove flush_kernel_dcache_page flush_kernel_dcache_page is a rather confusing interface that implements a subset of flush_dcache_page by not being able to properly handle page cache mapped pages. The only callers left are in the exec code as all other previous callers were incorrect as they could have dealt with page cache pages. Replace the calls to flush_kernel_dcache_page with calls to flush_dcache_page, which for all architectures does either exactly the same thing, can contains one or more of the following: 1) an optimization to defer the cache flush for page cache pages not mapped into userspace 2) additional flushing for mapped page cache pages if cache aliases are possible Link: https://lkml.kernel.org/r/20210712060928.4161649-7-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Linus Torvalds Reviewed-by: Ira Weiny Cc: Alex Shi Cc: Geoff Levand Cc: Greentime Hu Cc: Guo Ren Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Nick Hu Cc: Paul Cercueil Cc: Rich Felker Cc: Russell King Cc: Thomas Bogendoerfer Cc: Ulf Hansson Cc: Vincent Chen Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/cacheflush.h | 4 +--- arch/arm/mm/flush.c | 33 --------------------------------- arch/arm/mm/nommu.c | 6 ------ arch/csky/abiv1/cacheflush.c | 11 ----------- arch/csky/abiv1/inc/abi/cacheflush.h | 4 +--- arch/mips/include/asm/cacheflush.h | 8 +------- arch/nds32/include/asm/cacheflush.h | 3 +-- arch/nds32/mm/cacheflush.c | 9 --------- arch/parisc/include/asm/cacheflush.h | 8 ++------ arch/parisc/kernel/cache.c | 3 +-- arch/sh/include/asm/cacheflush.h | 8 ++------ 11 files changed, 9 insertions(+), 88 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2e24e765e6d3..5e56288e343b 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -291,6 +291,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { if ((cache_is_vivt() || cache_is_vipt_aliasing())) @@ -312,9 +313,6 @@ static inline void flush_anon_page(struct vm_area_struct *vma, __flush_anon_page(vma, page, vmaddr); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -extern void flush_kernel_dcache_page(struct page *); - #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6d89db7895d1..7ff9feea13a6 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -345,39 +345,6 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -/* - * Ensure cache coherency for the kernel mapping of this page. We can - * assume that the page is pinned via kmap. - * - * If the page only exists in the page cache and there are no user - * space mappings, this is a no-op since the page was already marked - * dirty at creation. Otherwise, we need to flush the dirty kernel - * cache lines directly. - */ -void flush_kernel_dcache_page(struct page *page) -{ - if (cache_is_vivt() || cache_is_vipt_aliasing()) { - struct address_space *mapping; - - mapping = page_mapping_file(page); - - if (!mapping || mapping_mapped(mapping)) { - void *addr; - - addr = page_address(page); - /* - * kmap_atomic() doesn't set the page virtual - * address for highmem pages, and - * kunmap_atomic() takes care of cache - * flushing already. - */ - if (!IS_ENABLED(CONFIG_HIGHMEM) || addr) - __cpuc_flush_dcache_area(addr, PAGE_SIZE); - } - } -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - /* * Flush an anonymous page so that users of get_user_pages() * can safely access the data. The expected sequence is: diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 8b3d7191e2b8..2658f52903da 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -166,12 +166,6 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); -void flush_kernel_dcache_page(struct page *page) -{ - __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) diff --git a/arch/csky/abiv1/cacheflush.c b/arch/csky/abiv1/cacheflush.c index 07ff17ea33de..fb91b069dc69 100644 --- a/arch/csky/abiv1/cacheflush.c +++ b/arch/csky/abiv1/cacheflush.c @@ -56,17 +56,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, } } -void flush_kernel_dcache_page(struct page *page) -{ - struct address_space *mapping; - - mapping = page_mapping_file(page); - - if (!mapping || mapping_mapped(mapping)) - dcache_wbinv_all(); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h index 6cab7afae962..ed62e2066ba7 100644 --- a/arch/csky/abiv1/inc/abi/cacheflush.h +++ b/arch/csky/abiv1/inc/abi/cacheflush.h @@ -14,12 +14,10 @@ extern void flush_dcache_page(struct page *); #define flush_cache_page(vma, page, pfn) cache_wbinv_all() #define flush_cache_dup_mm(mm) cache_wbinv_all() -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -extern void flush_kernel_dcache_page(struct page *); - #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { dcache_wbinv_all(); diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h index d687b40b9fbb..b3dc9c589442 100644 --- a/arch/mips/include/asm/cacheflush.h +++ b/arch/mips/include/asm/cacheflush.h @@ -125,13 +125,7 @@ static inline void kunmap_noncoherent(void) kunmap_coherent(); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ - BUG_ON(cpu_has_dc_aliases && PageHighMem(page)); - flush_dcache_page(page); -} - +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 /* * For now flush_kernel_vmap_range and invalidate_kernel_vmap_range both do a * cache writeback and invalidate operation. diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 7d6824f7c0e8..c2a222ebfa2a 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -36,8 +36,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr); -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -void flush_kernel_dcache_page(struct page *page); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *addr, int size); void invalidate_kernel_vmap_range(void *addr, int size); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index ad5344ef5d33..07aac65d1cab 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -318,15 +318,6 @@ void flush_anon_page(struct vm_area_struct *vma, local_irq_restore(flags); } -void flush_kernel_dcache_page(struct page *page) -{ - unsigned long flags; - local_irq_save(flags); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); - local_irq_restore(flags); -} -EXPORT_SYMBOL(flush_kernel_dcache_page); - void flush_kernel_vmap_range(void *addr, int size) { unsigned long flags; diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 99663fc1f997..eef0096db5f8 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -36,16 +36,12 @@ void flush_cache_all_local(void); void flush_cache_all(void); void flush_cache_mm(struct mm_struct *mm); -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE void flush_kernel_dcache_page_addr(void *addr); -static inline void flush_kernel_dcache_page(struct page *page) -{ - flush_kernel_dcache_page_addr(page_address(page)); -} #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *vaddr, int size); void invalidate_kernel_vmap_range(void *vaddr, int size); @@ -59,7 +55,7 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages) #define flush_icache_page(vma,page) do { \ - flush_kernel_dcache_page(page); \ + flush_kernel_dcache_page_addr(page_address(page)); \ flush_kernel_icache_page(page_address(page)); \ } while (0) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 86a1a63563fd..39e02227e231 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -334,7 +334,7 @@ void flush_dcache_page(struct page *page) return; } - flush_kernel_dcache_page(page); + flush_kernel_dcache_page_addr(page_address(page)); if (!mapping) return; @@ -375,7 +375,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* Defined in arch/parisc/kernel/pacache.S */ EXPORT_SYMBOL(flush_kernel_dcache_range_asm); -EXPORT_SYMBOL(flush_kernel_dcache_page_asm); EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h index 4486a865ff62..372afa82fee6 100644 --- a/arch/sh/include/asm/cacheflush.h +++ b/arch/sh/include/asm/cacheflush.h @@ -63,6 +63,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma, if (boot_cpu_data.dcache.n_aliases && PageAnon(page)) __flush_anon_page(page, vmaddr); } + +#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 static inline void flush_kernel_vmap_range(void *addr, int size) { __flush_wback_region(addr, size); @@ -72,12 +74,6 @@ static inline void invalidate_kernel_vmap_range(void *addr, int size) __flush_invalidate_region(addr, size); } -#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE -static inline void flush_kernel_dcache_page(struct page *page) -{ - flush_dcache_page(page); -} - extern void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, unsigned long len); -- cgit From fc1f5e980a463325cf41d39ac6a69aa3cca73995 Mon Sep 17 00:00:00 2001 From: Ohhoon Kwon Date: Thu, 2 Sep 2021 14:57:01 -0700 Subject: mm: sparse: pass section_nr to find_memory_block With CONFIG_SPARSEMEM_EXTREME enabled, __section_nr() which converts mem_section to section_nr could be costly since it iterates all section roots to check if the given mem_section is in its range. On the other hand, __nr_to_section() which converts section_nr to mem_section can be done in O(1). Let's pass section_nr instead of mem_section ptr to find_memory_block() in order to reduce needless iterations. Link: https://lkml.kernel.org/r/20210707150212.855-3-ohoono.kwon@samsung.com Signed-off-by: Ohhoon Kwon Acked-by: Michal Hocko Acked-by: Mike Rapoport Reviewed-by: David Hildenbrand Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/pseries/hotplug-memory.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 377d852f5a9a..d4f28ee4d5dc 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -211,13 +211,11 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb) static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) { unsigned long section_nr; - struct mem_section *mem_sect; struct memory_block *mem_block; section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); - mem_sect = __nr_to_section(section_nr); - mem_block = find_memory_block(mem_sect); + mem_block = find_memory_block(section_nr); return mem_block; } -- cgit From c3ab6baf6a004eab7344a1d8880a971f2414e1b6 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Sep 2021 14:57:56 -0700 Subject: mm/page_alloc: always initialize memory map for the holes Patch series "mm: ensure consistency of memory map poisoning". Currently memory map allocation for FLATMEM case does not poison the struct pages regardless of CONFIG_PAGE_POISON setting. This happens because allocation of the memory map for FLATMEM and SPARSMEM use different memblock functions and those that are used for SPARSMEM case (namely memblock_alloc_try_nid_raw() and memblock_alloc_exact_nid_raw()) implicitly poison the allocated memory. Another side effect of this implicit poisoning is that early setup code that uses the same functions to allocate memory burns cycles for the memory poisoning even if it was not intended. These patches introduce memmap_alloc() wrapper that ensure that the memory map allocation is consistent for different memory models. This patch (of 4): Currently memory map for the holes is initialized only when SPARSEMEM memory model is used. Yet, even with FLATMEM there could be holes in the physical memory layout that have memory map entries. For instance, the memory reserved using e820 API on i386 or "reserved-memory" nodes in device tree would not appear in memblock.memory and hence the struct pages for such holes will be skipped during memory map initialization. These struct pages will be zeroed because the memory map for FLATMEM systems is allocated with memblock_alloc_node() that clears the allocated memory. While zeroed struct pages do not cause immediate problems, the correct behaviour is to initialize every page using __init_single_page(). Besides, enabling page poison for FLATMEM case will trigger PF_POISONED_CHECK() unless the memory map is properly initialized. Make sure init_unavailable_range() is called for both SPARSEMEM and FLATMEM so that struct pages representing memory holes would appear as PG_Reserved with any memory layout. [rppt@kernel.org: fix microblaze] Link: https://lkml.kernel.org/r/YQWW3RCE4eWBuMu/@kernel.org Link: https://lkml.kernel.org/r/20210714123739.16493-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210714123739.16493-2-rppt@kernel.org Signed-off-by: Mike Rapoport Acked-by: David Hildenbrand Tested-by: Guenter Roeck Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/page.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index ce550978f4fc..4b8b2fa78fc5 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -112,8 +112,7 @@ extern int page_is_ram(unsigned long pfn); # define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) # define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) -# define pfn_valid(pfn) ((pfn) < (max_mapnr + ARCH_PFN_OFFSET)) - +# define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < (max_mapnr + ARCH_PFN_OFFSET)) # endif /* __ASSEMBLY__ */ #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -- cgit From 22e7878102f94a50e9a4c2c19f909a9a0898c4ce Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Sep 2021 14:57:59 -0700 Subject: microblaze: simplify pte_alloc_one_kernel() The microblaze's implementation of pte_alloc_one_kernel() used memblock_alloc_try_nid_raw() along with clear_page() to allocated a zeroed page during early setup. Replace calls of these functions with a call to memblock_alloc_try_nid() that already returns zeroed page and respects the same allocation limits as memblock_alloc_try_nid_raw(). While on it drop early_get_page() wrapper that was only used in pte_alloc_one_kernel(). Link: https://lkml.kernel.org/r/20210714123739.16493-3-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: David Hildenbrand Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/pgtable.h | 2 -- arch/microblaze/mm/init.c | 12 ------------ arch/microblaze/mm/pgtable.c | 17 ++++++++--------- 3 files changed, 8 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 71cd547655d9..c136a01e467e 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -443,8 +443,6 @@ extern int mem_init_done; asmlinkage void __init mmu_init(void); -void __init *early_get_page(void); - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index ab55c70380a5..952f35b335b2 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -265,18 +265,6 @@ asmlinkage void __init mmu_init(void) dma_contiguous_reserve(memory_start + lowmem_size - 1); } -/* This is only called until mem_init is done. */ -void __init *early_get_page(void) -{ - /* - * Mem start + kernel_tlb -> here is limit - * because of mem mapping from head.S - */ - return memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE, - MEMBLOCK_LOW_LIMIT, memory_start + kernel_tlb, - NUMA_NO_NODE); -} - void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 38ccb909bc9d..c1833b159d3b 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -242,15 +243,13 @@ unsigned long iopa(unsigned long addr) __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; - if (mem_init_done) { - pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - } else { - pte = (pte_t *)early_get_page(); - if (pte) - clear_page(pte); - } - return pte; + if (mem_init_done) + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + else + return memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + MEMBLOCK_LOW_LIMIT, + memory_start + kernel_tlb, + NUMA_NO_NODE); } void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) -- cgit From a7259df7670240ee03b0cfce8a3e5d3773911e24 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 2 Sep 2021 15:00:26 -0700 Subject: memblock: make memblock_find_in_range method private There are a lot of uses of memblock_find_in_range() along with memblock_reserve() from the times memblock allocation APIs did not exist. memblock_find_in_range() is the very core of memblock allocations, so any future changes to its internal behaviour would mandate updates of all the users outside memblock. Replace the calls to memblock_find_in_range() with an equivalent calls to memblock_phys_alloc() and memblock_phys_alloc_range() and make memblock_find_in_range() private method of memblock. This simplifies the callers, ensures that (unlikely) errors in memblock_reserve() are handled and improves maintainability of memblock_find_in_range(). Link: https://lkml.kernel.org/r/20210816122622.30279-1-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: Catalin Marinas [arm64] Acked-by: Kirill A. Shutemov Acked-by: Rafael J. Wysocki [ACPI] Acked-by: Russell King (Oracle) Acked-by: Nick Kossifidis [riscv] Tested-by: Guenter Roeck Acked-by: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/setup.c | 20 +++++++----------- arch/arm64/kvm/hyp/reserved_mem.c | 9 +++----- arch/arm64/mm/init.c | 36 ++++++++++---------------------- arch/mips/kernel/setup.c | 14 ++++++------- arch/riscv/mm/init.c | 44 +++++++++++++-------------------------- arch/s390/kernel/setup.c | 9 +++++--- arch/x86/kernel/aperture_64.c | 5 ++--- arch/x86/mm/init.c | 23 +++++++++++++------- arch/x86/mm/numa.c | 5 ++--- arch/x86/mm/numa_emulation.c | 5 ++--- arch/x86/realmode/init.c | 2 +- 11 files changed, 69 insertions(+), 103 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f97eb2371672..284a80c0b6e1 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1012,31 +1012,25 @@ static void __init reserve_crashkernel(void) unsigned long long lowmem_max = __pa(high_memory - 1) + 1; if (crash_max > lowmem_max) crash_max = lowmem_max; - crash_base = memblock_find_in_range(CRASH_ALIGN, crash_max, - crash_size, CRASH_ALIGN); + + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + CRASH_ALIGN, crash_max); if (!crash_base) { pr_err("crashkernel reservation failed - No suitable area found.\n"); return; } } else { + unsigned long long crash_max = crash_base + crash_size; unsigned long long start; - start = memblock_find_in_range(crash_base, - crash_base + crash_size, - crash_size, SECTION_SIZE); - if (start != crash_base) { + start = memblock_phys_alloc_range(crash_size, SECTION_SIZE, + crash_base, crash_max); + if (!start) { pr_err("crashkernel reservation failed - memory is in use.\n"); return; } } - ret = memblock_reserve(crash_base, crash_size); - if (ret < 0) { - pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n", - (unsigned long)crash_base); - return; - } - pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crash_base >> 20), diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index d654921dd09b..578670e3f608 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -92,12 +92,10 @@ void __init kvm_hyp_reserve(void) * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. */ hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; - hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), - ALIGN(hyp_mem_size, PMD_SIZE), - PMD_SIZE); + hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), + PMD_SIZE); if (!hyp_mem_base) - hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), - hyp_mem_size, PAGE_SIZE); + hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE); else hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); @@ -105,7 +103,6 @@ void __init kvm_hyp_reserve(void) kvm_err("Failed to reserve hyp memory\n"); return; } - memblock_reserve(hyp_mem_base, hyp_mem_size); kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, hyp_mem_base); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1fdb7bb7c198..bf5b8a5cd451 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -74,6 +74,7 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; + unsigned long long crash_max = arm64_dma_phys_limit; int ret; ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), @@ -84,33 +85,18 @@ static void __init reserve_crashkernel(void) crash_size = PAGE_ALIGN(crash_size); - if (crash_base == 0) { - /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma_phys_limit, - crash_size, SZ_2M); - if (crash_base == 0) { - pr_warn("cannot allocate crashkernel (size:0x%llx)\n", - crash_size); - return; - } - } else { - /* User specifies base address explicitly. */ - if (!memblock_is_region_memory(crash_base, crash_size)) { - pr_warn("cannot reserve crashkernel: region is not memory\n"); - return; - } + /* User specifies base address explicitly. */ + if (crash_base) + crash_max = crash_base + crash_size; - if (memblock_is_region_reserved(crash_base, crash_size)) { - pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n"); - return; - } - - if (!IS_ALIGNED(crash_base, SZ_2M)) { - pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n"); - return; - } + /* Current arm64 boot protocol requires 2MB alignment */ + crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, + crash_base, crash_max); + if (!crash_base) { + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", + crash_size); + return; } - memblock_reserve(crash_base, crash_size); pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", crash_base, crash_base + crash_size, crash_size >> 20); diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 23a140327a0b..f979adfd4fc2 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -452,8 +452,9 @@ static void __init mips_parse_crashkernel(void) return; if (crash_base <= 0) { - crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_MAX, - crash_size, CRASH_ALIGN); + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, + CRASH_ALIGN, + CRASH_ADDR_MAX); if (!crash_base) { pr_warn("crashkernel reservation failed - No suitable area found.\n"); return; @@ -461,8 +462,9 @@ static void __init mips_parse_crashkernel(void) } else { unsigned long long start; - start = memblock_find_in_range(crash_base, crash_base + crash_size, - crash_size, 1); + start = memblock_phys_alloc_range(crash_size, 1, + crash_base, + crash_base + crash_size); if (start != crash_base) { pr_warn("Invalid memory region reserved for crash kernel\n"); return; @@ -656,10 +658,6 @@ static void __init arch_mem_init(char **cmdline_p) mips_reserve_vmcore(); mips_parse_crashkernel(); -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) - memblock_reserve(crashk_res.start, resource_size(&crashk_res)); -#endif device_tree_init(); /* diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7cb4f391d106..e6cac495a9e8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -819,38 +819,22 @@ static void __init reserve_crashkernel(void) crash_size = PAGE_ALIGN(crash_size); - if (crash_base == 0) { - /* - * Current riscv boot protocol requires 2MB alignment for - * RV64 and 4MB alignment for RV32 (hugepage size) - */ - crash_base = memblock_find_in_range(search_start, search_end, - crash_size, PMD_SIZE); - - if (crash_base == 0) { - pr_warn("crashkernel: couldn't allocate %lldKB\n", - crash_size >> 10); - return; - } - } else { - /* User specifies base address explicitly. */ - if (!memblock_is_region_memory(crash_base, crash_size)) { - pr_warn("crashkernel: requested region is not memory\n"); - return; - } - - if (memblock_is_region_reserved(crash_base, crash_size)) { - pr_warn("crashkernel: requested region is reserved\n"); - return; - } - + if (crash_base) { + search_start = crash_base; + search_end = crash_base + crash_size; + } - if (!IS_ALIGNED(crash_base, PMD_SIZE)) { - pr_warn("crashkernel: requested region is misaligned\n"); - return; - } + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 (hugepage size) + */ + crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, + search_start, search_end); + if (crash_base == 0) { + pr_warn("crashkernel: couldn't allocate %lldKB\n", + crash_size >> 10); + return; } - memblock_reserve(crash_base, crash_size); pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n", crash_base, crash_base + crash_size, crash_size >> 20); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ff0f9e838916..0bab57d6413b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -626,8 +626,9 @@ static void __init reserve_crashkernel(void) return; } low = crash_base ?: low; - crash_base = memblock_find_in_range(low, high, crash_size, - KEXEC_CRASH_MEM_ALIGN); + crash_base = memblock_phys_alloc_range(crash_size, + KEXEC_CRASH_MEM_ALIGN, + low, high); } if (!crash_base) { @@ -636,8 +637,10 @@ static void __init reserve_crashkernel(void) return; } - if (register_memory_notifier(&kdump_mem_nb)) + if (register_memory_notifier(&kdump_mem_nb)) { + memblock_free(crash_base, crash_size); return; + } if (!OLDMEM_BASE && MACHINE_IS_VM) diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 294ed4392a0e..10562885f5fc 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -109,14 +109,13 @@ static u32 __init allocate_aperture(void) * memory. Unfortunately we cannot move it up because that would * make the IOMMU useless. */ - addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, - aper_size, aper_size); + addr = memblock_phys_alloc_range(aper_size, aper_size, + GART_MIN_ADDR, GART_MAX_ADDR); if (!addr) { pr_err("Cannot allocate aperture memory hole [mem %#010lx-%#010lx] (%uKB)\n", addr, addr + aper_size - 1, aper_size >> 10); return 0; } - memblock_reserve(addr, aper_size); pr_info("Mapping aperture over RAM [mem %#010lx-%#010lx] (%uKB)\n", addr, addr + aper_size - 1, aper_size >> 10); register_nosave_region(addr >> PAGE_SHIFT, diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 75ef19aa8903..23a14d82e783 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -127,14 +127,12 @@ __ref void *alloc_low_pages(unsigned int num) unsigned long ret = 0; if (min_pfn_mapped < max_pfn_mapped) { - ret = memblock_find_in_range( + ret = memblock_phys_alloc_range( + PAGE_SIZE * num, PAGE_SIZE, min_pfn_mapped << PAGE_SHIFT, - max_pfn_mapped << PAGE_SHIFT, - PAGE_SIZE * num , PAGE_SIZE); + max_pfn_mapped << PAGE_SHIFT); } - if (ret) - memblock_reserve(ret, PAGE_SIZE * num); - else if (can_use_brk_pgt) + if (!ret && can_use_brk_pgt) ret = __pa(extend_brk(PAGE_SIZE * num, PAGE_SIZE)); if (!ret) @@ -610,8 +608,17 @@ static void __init memory_map_top_down(unsigned long map_start, unsigned long addr; unsigned long mapped_ram_size = 0; - /* xen has big range in reserved near end of ram, skip it at first.*/ - addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); + /* + * Systems that have many reserved areas near top of the memory, + * e.g. QEMU with less than 1G RAM and EFI enabled, or Xen, will + * require lots of 4K mappings which may exhaust pgt_buf. + * Start with top-most PMD_SIZE range aligned at PMD_SIZE to ensure + * there is enough mapped memory that can be allocated from + * memblock. + */ + addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start, + map_end); + memblock_free(addr, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e94da744386f..a1b5c71099e6 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -376,15 +376,14 @@ static int __init numa_alloc_distance(void) cnt++; size = cnt * cnt * sizeof(numa_distance[0]); - phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), - size, PAGE_SIZE); + phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0, + PFN_PHYS(max_pfn_mapped)); if (!phys) { pr_warn("Warning: can't allocate distance table!\n"); /* don't retry until explicitly reset */ numa_distance = (void *)1LU; return -ENOMEM; } - memblock_reserve(phys, size); numa_distance = __va(phys); numa_distance_cnt = cnt; diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 87d77cc52f86..737491b13728 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -447,13 +447,12 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) if (numa_dist_cnt) { u64 phys; - phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), - phys_size, PAGE_SIZE); + phys = memblock_phys_alloc_range(phys_size, PAGE_SIZE, 0, + PFN_PHYS(max_pfn_mapped)); if (!phys) { pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); goto no_emu; } - memblock_reserve(phys, phys_size); phys_dist = __va(phys); for (i = 0; i < numa_dist_cnt; i++) diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 6534c92d0f83..31b5856010cb 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -28,7 +28,7 @@ void __init reserve_real_mode(void) WARN_ON(slab_is_available()); /* Has to be under 1M so we can execute real-mode AP code. */ - mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); + mem = memblock_phys_alloc_range(size, PAGE_SIZE, 0, 1<<20); if (!mem) pr_info("No sub-1M memory is available for the trampoline\n"); else -- cgit From dce49103962840dd61423d7627748d6c558d58c5 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 2 Sep 2021 15:00:33 -0700 Subject: mm: wire up syscall process_mrelease Split off from prev patch in the series that implements the syscall. Link: https://lkml.kernel.org/r/20210809185259.405936-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Geert Uytterhoeven Cc: Andy Lutomirski Cc: Christian Brauner Cc: Christoph Hellwig Cc: David Hildenbrand Cc: David Rientjes Cc: Florian Weimer Cc: Jan Engelhardt Cc: Jann Horn Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Oleg Nesterov Cc: Rik van Riel Cc: Roman Gushchin Cc: Shakeel Butt Cc: Tim Murray Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/syscalls/syscall.tbl | 2 ++ arch/arm/tools/syscall.tbl | 2 ++ arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 2 ++ arch/m68k/kernel/syscalls/syscall.tbl | 2 ++ arch/microblaze/kernel/syscalls/syscall.tbl | 2 ++ arch/mips/kernel/syscalls/syscall_n32.tbl | 2 ++ arch/mips/kernel/syscalls/syscall_n64.tbl | 2 ++ arch/mips/kernel/syscalls/syscall_o32.tbl | 2 ++ arch/parisc/kernel/syscalls/syscall.tbl | 2 ++ arch/powerpc/kernel/syscalls/syscall.tbl | 2 ++ arch/s390/kernel/syscalls/syscall.tbl | 2 ++ arch/sh/kernel/syscalls/syscall.tbl | 2 ++ arch/sparc/kernel/syscalls/syscall.tbl | 2 ++ arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 2 ++ 18 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index a17687ed4b51..605645eae04c 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -486,3 +486,5 @@ 554 common landlock_create_ruleset sys_landlock_create_ruleset 555 common landlock_add_rule sys_landlock_add_rule 556 common landlock_restrict_self sys_landlock_restrict_self +# 557 reserved for memfd_secret +558 common process_mrelease sys_process_mrelease diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c5df1179fc5d..2f32eb8beca8 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -460,3 +460,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 727bfc3be99b..3cb206aea3db 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 447 +#define __NR_compat_syscalls 449 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 99ffcafc736c..0f49cdb180dd 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -901,6 +901,8 @@ __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) #define __NR_landlock_restrict_self 446 __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 6d07742c57b8..9bf45f2be966 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -367,3 +367,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 541bc1b3a8f9..f1f98ee6c82d 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -446,3 +446,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a176faca2927..da49ddd4bb54 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -452,3 +452,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c2d2e19abea8..56c8d3cf42ed 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -385,3 +385,5 @@ 444 n32 landlock_create_ruleset sys_landlock_create_ruleset 445 n32 landlock_add_rule sys_landlock_add_rule 446 n32 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 n32 process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index ac653d08b1ea..1ca7bc337932 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -361,3 +361,5 @@ 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 n64 process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 253f2cd70b6b..fd3a9df60ec2 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -434,3 +434,5 @@ 444 o32 landlock_create_ruleset sys_landlock_create_ruleset 445 o32 landlock_add_rule sys_landlock_add_rule 446 o32 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 o32 process_mrelease sys_process_mrelease diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index e26187b9ab87..040df1b7a589 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -444,3 +444,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index aef2a290e71a..d8ebd7d37c0f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -526,3 +526,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 64d51ab5a8b4..57233ace30cb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -449,3 +449,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease sys_process_mrelease diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index e0a70be77d84..2f6e95eb4690 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -449,3 +449,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 603f5a821502..42fc2906215d 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -492,3 +492,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ce763a12311c..661a03bcfbd1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -452,3 +452,4 @@ 445 i386 landlock_add_rule sys_landlock_add_rule 446 i386 landlock_restrict_self sys_landlock_restrict_self 447 i386 memfd_secret sys_memfd_secret +448 i386 process_mrelease sys_process_mrelease diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index f6b57799c1ea..807b6a1de8e8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -369,6 +369,7 @@ 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self 447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 235d67d6ceb4..f4384951f393 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -417,3 +417,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease -- cgit From a717a780fc4e1dddd3fbf9ad08f180eec2a78194 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 23 Aug 2021 17:58:24 -0700 Subject: KVM: x86/mmu: Don't freak out if pml5_root is NULL on 4-level host Include pml5_root in the set of special roots if and only if the host, and thus NPT, is using 5-level paging. mmu_alloc_special_roots() expects special roots to be allocated as a bundle, i.e. they're either all valid or all NULL. But for pml5_root, that expectation only holds true if the host uses 5-level paging, which causes KVM to WARN about pml5_root being NULL when the other special roots are valid. The silver lining of 4-level vs. 5-level NPT being tied to the host kernel's paging level is that KVM's shadow root level is constant; unlike VMX's EPT, KVM can't choose 4-level NPT based on guest.MAXPHYADDR. That means KVM can still expect pml5_root to be bundled with the other special roots, it just needs to be conditioned on the shadow root level. Fixes: cb0f722aff6e ("KVM: x86/mmu: Support shadowing NPT when 5-level paging is enabled in host") Reported-by: Maxim Levitsky Reviewed-by: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20210824005824.205536-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c04e30f6e0db..e7bac871747c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3588,6 +3588,7 @@ out_unlock: static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; + bool need_pml5 = mmu->shadow_root_level > PT64_ROOT_4LEVEL; u64 *pml5_root = NULL; u64 *pml4_root = NULL; u64 *pae_root; @@ -3602,7 +3603,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) mmu->shadow_root_level < PT64_ROOT_4LEVEL) return 0; - if (mmu->pae_root && mmu->pml4_root && mmu->pml5_root) + /* + * NPT, the only paging mode that uses this horror, uses a fixed number + * of levels for the shadow page tables, e.g. all MMUs are 4-level or + * all MMus are 5-level. Thus, this can safely require that pml5_root + * is allocated if the other roots are valid and pml5 is needed, as any + * prior MMU would also have required pml5. + */ + if (mmu->pae_root && mmu->pml4_root && (!need_pml5 || mmu->pml5_root)) return 0; /* @@ -3610,7 +3618,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) * bail if KVM ends up in a state where only one of the roots is valid. */ if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root || - mmu->pml5_root)) + (need_pml5 && mmu->pml5_root))) return -EIO; /* @@ -3626,7 +3634,7 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (!pml4_root) goto err_pml4; - if (mmu->shadow_root_level > PT64_ROOT_4LEVEL) { + if (need_pml5) { pml5_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!pml5_root) goto err_pml5; -- cgit From 81b4b56d4f8130bbb99cf4e2b48082e5b4cfccb9 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 26 Aug 2021 12:57:49 +0300 Subject: KVM: VMX: avoid running vmx_handle_exit_irqoff in case of emulation If we are emulating an invalid guest state, we don't have a correct exit reason, and thus we shouldn't do anything in this function. Signed-off-by: Maxim Levitsky Message-Id: <20210826095750.1650467-2-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Fixes: 95b5a48c4f2b ("KVM: VMX: Handle NMIs, #MCs and async #PFs in common irqs-disabled fn", 2019-06-18) Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fada1055f325..0c2c0d5ae873 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6382,6 +6382,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vmx->emulation_required) + return; + if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT) handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI) -- cgit From 4ddacd525a2f16cebec8ba409fbce6b6fb45e987 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 3 Sep 2021 17:15:58 -0400 Subject: kvm: x86: Set KVM_MAX_VCPU_ID to 4*KVM_MAX_VCPUS Instead of requiring KVM_MAX_VCPU_ID to be manually increased every time we increase KVM_MAX_VCPUS, set it to 4*KVM_MAX_VCPUS. This should be enough for CPU topologies where Cores-per-Package and Packages-per-Socket are not powers of 2. In practice, this increases KVM_MAX_VCPU_ID from 1023 to 1152. The only side effect of this change is making some fields in struct kvm_ioapic larger, increasing the struct size from 1628 to 1780 bytes (in x86_64). Signed-off-by: Eduardo Habkost Message-Id: <20210903211600.2002377-2-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0532f4e84308..59da1fbf5248 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -39,7 +39,19 @@ #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 -#define KVM_MAX_VCPU_ID 1023 + +/* + * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs + * might be larger than the actual number of VCPUs because the + * APIC ID encodes CPU topology information. + * + * In the worst case, we'll need less than one extra bit for the + * Core ID, and less than one extra bit for the Package (Die) ID, + * so ratio of 4 should be enough. + */ +#define KVM_VCPU_ID_RATIO 4 +#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) + /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 -- cgit From 074c82c8f7cf8a46c3b81965f122599e3a133450 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 3 Sep 2021 17:15:59 -0400 Subject: kvm: x86: Increase MAX_VCPUS to 1024 Increase KVM_MAX_VCPUS to 1024, so we can test larger VMs. I'm not changing KVM_SOFT_MAX_VCPUS yet because I'm afraid it might involve complicated questions around the meaning of "supported" and "recommended" in the upstream tree. KVM_SOFT_MAX_VCPUS will be changed in a separate patch. For reference, visible effects of this change are: - KVM_CAP_MAX_VCPUS will now return 1024 (of course) - Default value for CPUID[HYPERV_CPUID_IMPLEMENT_LIMITS (00x40000005)].EAX will now be 1024 - KVM_MAX_VCPU_ID will change from 1151 to 4096 - Size of struct kvm will increase from 19328 to 22272 bytes (in x86_64) - Size of struct kvm_ioapic will increase from 1780 to 5084 bytes (in x86_64) - Bitmap stack variables that will grow: - At kvm_hv_flush_tlb() kvm_hv_send_ipi(), vp_bitmap[] and vcpu_bitmap[] will now be 128 bytes long - vcpu_bitmap at bioapic_write_indirect() will be 128 bytes long once patch "KVM: x86: Fix stack-out-of-bounds memory access from ioapic_write_indirect()" is applied Signed-off-by: Eduardo Habkost Message-Id: <20210903211600.2002377-3-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 59da1fbf5248..52ca8da4ab52 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -37,7 +37,7 @@ #define __KVM_HAVE_ARCH_VCPU_DEBUGFS -#define KVM_MAX_VCPUS 288 +#define KVM_MAX_VCPUS 1024 #define KVM_SOFT_MAX_VCPUS 240 /* -- cgit From 1dbaf04cb91b2b680d10f9a8f9f823d94c7087bf Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 3 Sep 2021 17:16:00 -0400 Subject: kvm: x86: Increase KVM_SOFT_MAX_VCPUS to 710 Support for 710 VCPUs was tested by Red Hat since RHEL-8.4, so increase KVM_SOFT_MAX_VCPUS to 710. Signed-off-by: Eduardo Habkost Message-Id: <20210903211600.2002377-4-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 52ca8da4ab52..f8f48a7ec577 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -38,7 +38,7 @@ #define __KVM_HAVE_ARCH_VCPU_DEBUGFS #define KVM_MAX_VCPUS 1024 -#define KVM_SOFT_MAX_VCPUS 240 +#define KVM_SOFT_MAX_VCPUS 710 /* * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs -- cgit From 678a305b85d95f288c12e3d69a32d3351b34f2bb Mon Sep 17 00:00:00 2001 From: Jia He Date: Mon, 30 Aug 2021 22:53:36 +0800 Subject: KVM: x86/mmu: Remove unused field mmio_cached in struct kvm_mmu_page After reverting and restoring the fast tlb invalidation patch series, the mmio_cached is not removed. Hence a unused field is left in kvm_mmu_page. Cc: Sean Christopherson Signed-off-by: Jia He Message-Id: <20210830145336.27183-1-justin.he@arm.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu_internal.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 62bb8f758b3f..6b6f10895710 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -37,7 +37,6 @@ struct kvm_mmu_page { bool unsync; u8 mmu_valid_gen; - bool mmio_cached; bool lpage_disallowed; /* Can't be replaced by an equiv large page */ /* -- cgit From e7177339d7b5f9594b316842122b5fda9513d5e2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 31 Aug 2021 09:42:22 -0700 Subject: Revert "KVM: x86: mmu: Add guest physical address check in translate_gpa()" Revert a misguided illegal GPA check when "translating" a non-nested GPA. The check is woefully incomplete as it does not fill in @exception as expected by all callers, which leads to KVM attempting to inject a bogus exception, potentially exposing kernel stack information in the process. WARNING: CPU: 0 PID: 8469 at arch/x86/kvm/x86.c:525 exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525 CPU: 1 PID: 8469 Comm: syz-executor531 Not tainted 5.14.0-rc7-syzkaller #0 RIP: 0010:exception_type+0x98/0xb0 arch/x86/kvm/x86.c:525 Call Trace: x86_emulate_instruction+0xef6/0x1460 arch/x86/kvm/x86.c:7853 kvm_mmu_page_fault+0x2f0/0x1810 arch/x86/kvm/mmu/mmu.c:5199 handle_ept_misconfig+0xdf/0x3e0 arch/x86/kvm/vmx/vmx.c:5336 __vmx_handle_exit arch/x86/kvm/vmx/vmx.c:6021 [inline] vmx_handle_exit+0x336/0x1800 arch/x86/kvm/vmx/vmx.c:6038 vcpu_enter_guest+0x2a1c/0x4430 arch/x86/kvm/x86.c:9712 vcpu_run arch/x86/kvm/x86.c:9779 [inline] kvm_arch_vcpu_ioctl_run+0x47d/0x1b20 arch/x86/kvm/x86.c:10010 kvm_vcpu_ioctl+0x49e/0xe50 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3652 The bug has escaped notice because practically speaking the GPA check is useless. The GPA check in question only comes into play when KVM is walking guest page tables (or "translating" CR3), and KVM already handles illegal GPA checks by setting reserved bits in rsvd_bits_mask for each PxE, or in the case of CR3 for loading PTDPTRs, manually checks for an illegal CR3. This particular failure doesn't hit the existing reserved bits checks because syzbot sets guest.MAXPHYADDR=1, and IA32 architecture simply doesn't allow for such an absurd MAXPHYADDR, e.g. 32-bit paging doesn't define any reserved PA bits checks, which KVM emulates by only incorporating the reserved PA bits into the "high" bits, i.e. bits 63:32. Simply remove the bogus check. There is zero meaningful value and no architectural justification for supporting guest.MAXPHYADDR < 32, and properly filling the exception would introduce non-trivial complexity. This reverts commit ec7771ab471ba6a945350353617e2e3385d0e013. Fixes: ec7771ab471b ("KVM: x86: mmu: Add guest physical address check in translate_gpa()") Cc: stable@vger.kernel.org Reported-by: syzbot+200c08e88ae818f849ce@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20210831164224.1119728-2-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e7bac871747c..9b0cdec8b62d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -334,12 +334,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception) { - /* Check if guest physical address doesn't exceed guest maximum */ - if (kvm_vcpu_is_illegal_gpa(vcpu, gpa)) { - exception->error_code |= PFERR_RSVD_MASK; - return UNMAPPED_GVA; - } - return gpa; } -- cgit From ca41c34cab1f50f13ab5ac95739483871637a684 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 15:10:22 -0700 Subject: KVM: x86/mmu: Relocate kvm_mmu_page.tdp_mmu_page for better cache locality Move "tdp_mmu_page" into the 1-byte void left by the recently removed "mmio_cached" so that it resides in the first 64 bytes of kvm_mmu_page, i.e. in the same cache line as the most commonly accessed fields. Don't bother wrapping tdp_mmu_page in CONFIG_X86_64, including the field in 32-bit builds doesn't affect the size of kvm_mmu_page, and a future patch can always wrap the field in the unlikely event KVM gains a 1-byte flag that is 32-bit specific. Note, the size of kvm_mmu_page is also unchanged on CONFIG_X86_64=y due to it previously sharing an 8-byte chunk with write_flooding_count. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210901221023.1303578-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu_internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 6b6f10895710..4e7b634efa38 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -35,6 +35,7 @@ struct kvm_mmu_page { struct hlist_node hash_link; struct list_head lpage_disallowed_link; + bool tdp_mmu_page; bool unsync; u8 mmu_valid_gen; bool lpage_disallowed; /* Can't be replaced by an equiv large page */ @@ -70,8 +71,6 @@ struct kvm_mmu_page { atomic_t write_flooding_count; #ifdef CONFIG_X86_64 - bool tdp_mmu_page; - /* Used for freeing the page asynchronously if it is a TDP MMU page. */ struct rcu_head rcu_head; #endif -- cgit From 1148bfc47be3f885a10945ecbc1e3789a0313603 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 1 Sep 2021 15:10:23 -0700 Subject: KVM: x86/mmu: Move lpage_disallowed_link further "down" in kvm_mmu_page Move "lpage_disallowed_link" out of the first 64 bytes, i.e. out of the first cache line, of kvm_mmu_page so that "spt" and to a lesser extent "gfns" land in the first cache line. "lpage_disallowed_link" is accessed relatively infrequently compared to "spt", which is accessed any time KVM is walking and/or manipulating the shadow page tables. No functional change intended. Signed-off-by: Sean Christopherson Message-Id: <20210901221023.1303578-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu_internal.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 4e7b634efa38..bf2bdbf333c2 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -31,9 +31,12 @@ extern bool dbg; #define IS_VALID_PAE_ROOT(x) (!!(x)) struct kvm_mmu_page { + /* + * Note, "link" through "spt" fit in a single 64 byte cache line on + * 64-bit kernels, keep it that way unless there's a reason not to. + */ struct list_head link; struct hlist_node hash_link; - struct list_head lpage_disallowed_link; bool tdp_mmu_page; bool unsync; @@ -59,6 +62,7 @@ struct kvm_mmu_page { struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ DECLARE_BITMAP(unsync_child_bitmap, 512); + struct list_head lpage_disallowed_link; #ifdef CONFIG_X86_32 /* * Used out of the mmu-lock to avoid reading spte values while an -- cgit From 3cc4e148b96263313e3dce926eae569c942bb74e Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Aug 2021 00:26:39 +0000 Subject: KVM: stats: Add VM stat for remote tlb flush requests Add a new stat that counts the number of times a remote TLB flush is requested, regardless of whether it kicks vCPUs out of guest mode. This allows us to look at how often flushes are initiated. Unlike remote_tlb_flush, this one applies to ARM's instruction-set-based TLB flush implementation, so apply it there too. Original-by: David Matlack Signed-off-by: Jing Zhang Message-Id: <20210817002639.3856694-1-jingzhangos@google.com> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0625bf2353c2..f5bb235bbb59 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -80,6 +80,7 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) */ void kvm_flush_remote_tlbs(struct kvm *kvm) { + ++kvm->stat.generic.remote_tlb_flush_requests; kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); } -- cgit From a40b2fd064bb7c0425c7cbdca2120cf0609a90a2 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 14 Aug 2021 11:51:29 +0800 Subject: x86/kvm: Don't enable IRQ when IRQ enabled in kvm_wait Commit f4e61f0c9add3 ("x86/kvm: Fix broken irq restoration in kvm_wait") replaced "local_irq_restore() when IRQ enabled" with "local_irq_enable() when IRQ enabled" to suppress a warnning. Although there is no similar debugging warnning for doing local_irq_enable() when IRQ enabled as doing local_irq_restore() in the same IRQ situation. But doing local_irq_enable() when IRQ enabled is no less broken as doing local_irq_restore() and we'd better avoid it. Cc: Mark Rutland Cc: Peter Zijlstra (Intel) Signed-off-by: Lai Jiangshan Message-Id: <20210814035129.154242-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a26643dc6bd6..b656456c3a94 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -884,10 +884,11 @@ static void kvm_wait(u8 *ptr, u8 val) } else { local_irq_disable(); + /* safe_halt() will enable IRQ */ if (READ_ONCE(*ptr) == val) safe_halt(); - - local_irq_enable(); + else + local_irq_enable(); } } -- cgit From a3cf527e70bd1553500746ef2f38db41e2af1d9e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 6 Apr 2021 10:49:11 +0800 Subject: KVM: MIPS: Remove a "set but not used" variable This fix a build warning: arch/mips/kvm/vz.c: In function '_kvm_vz_restore_htimer': >> arch/mips/kvm/vz.c:392:10: warning: variable 'freeze_time' set but not used [-Wunused-but-set-variable] 392 | ktime_t freeze_time; | ^~~~~~~~~~~ Reported-by: kernel test robot Signed-off-by: Huacai Chen Message-Id: <20210406024911.2008046-1-chenhuacai@loongson.cn> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/vz.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 43cad10b877d..4adca5abbc72 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -388,7 +388,6 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, u32 compare, u32 cause) { u32 start_count, after_count; - ktime_t freeze_time; unsigned long flags; /* @@ -396,7 +395,7 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, * this with interrupts disabled to avoid latency. */ local_irq_save(flags); - freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count); + kvm_mips_freeze_hrtimer(vcpu, &start_count); write_c0_gtoffset(start_count - read_c0_count()); local_irq_restore(flags); -- cgit From 4ac214574d2d83b7ef20c603d75f1937c912bfd6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 6 Sep 2021 05:52:23 -0400 Subject: KVM: MMU: mark role_regs and role accessors as maybe unused It is reasonable for these functions to be used only in some configurations, for example only if the host is 64-bits (and therefore supports 64-bit guests). It is also reasonable to keep the role_regs and role accessors in sync even though some of the accessors may be used only for one of the two sets (as is the case currently for CR4.LA57).. Because clang reports warnings for unused inlines declared in a .c file, mark both sets of accessors as __maybe_unused. Reported-by: kernel test robot Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 9b0cdec8b62d..2d7e61122af8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -204,7 +204,7 @@ struct kvm_mmu_role_regs { * the single source of truth for the MMU's state. */ #define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag) \ -static inline bool ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\ +static inline bool __maybe_unused ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\ { \ return !!(regs->reg & flag); \ } @@ -226,7 +226,7 @@ BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA); * and the vCPU may be incorrect/irrelevant. */ #define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name) \ -static inline bool is_##reg##_##name(struct kvm_mmu *mmu) \ +static inline bool __maybe_unused is_##reg##_##name(struct kvm_mmu *mmu) \ { \ return !!(mmu->mmu_role. base_or_ext . reg##_##name); \ } -- cgit From d9130a2dfdd4b21736c91b818f87dbc0ccd1e757 Mon Sep 17 00:00:00 2001 From: Zelin Deng Date: Wed, 28 Apr 2021 10:22:01 +0800 Subject: KVM: x86: Update vCPU's hv_clock before back to guest when tsc_offset is adjusted When MSR_IA32_TSC_ADJUST is written by guest due to TSC ADJUST feature especially there's a big tsc warp (like a new vCPU is hot-added into VM which has been up for a long time), tsc_offset is added by a large value then go back to guest. This causes system time jump as tsc_timestamp is not adjusted in the meantime and pvclock monotonic character. To fix this, just notify kvm to update vCPU's guest time before back to guest. Cc: stable@vger.kernel.org Signed-off-by: Zelin Deng Signed-off-by: Paolo Bonzini Message-Id: <1619576521-81399-2-git-send-email-zelin.deng@linux.alibaba.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1a00af1b076b..28ef14155726 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3321,6 +3321,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); + /* Before back to guest, tsc_timestamp must be adjusted + * as well, otherwise guest's percpu pvclock time could jump. + */ + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } vcpu->arch.ia32_tsc_adjust_msr = data; } -- cgit From 88b604263f3d6eedae0b1c2c3bbd602d1e2e8775 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 1 Sep 2021 16:05:59 +0200 Subject: s390/unwind: use current_frame_address() to unwind current task current_stack_pointer() simply returns current value of %r15. If current_stack_pointer() caller allocates stack (which is the case in unwind code) %r15 points to a stack frame allocated for callees, meaning current_stack_pointer() caller (e.g. stack_trace_save) will end up in the stacktrace. This is not expected by stack_trace_save*() callers and causes problems. current_frame_address() on the other hand returns function stack frame address, which matches %r15 upon function invocation. Using it in get_stack_pointer() makes it more aligned with x86 implementation (according to BACKTRACE_SELF_TEST output) and meets stack_trace_save*() caller's expectations, notably KCSAN. Also make sure unwind_start is always inlined. Reported-by: Nathan Chancellor Suggested-by: Marco Elver Signed-off-by: Vasily Gorbik Tested-by: Marco Elver Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/patch.git-04dd26be3043.your-ad-here.call-01630504868-ext-6188@work.hours Signed-off-by: Heiko Carstens --- arch/s390/include/asm/stacktrace.h | 20 ++++++++++---------- arch/s390/include/asm/unwind.h | 8 ++++---- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 3d8a4b94c620..dd00d98804ec 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -34,16 +34,6 @@ static inline bool on_stack(struct stack_info *info, return addr >= info->begin && addr + len <= info->end; } -static __always_inline unsigned long get_stack_pointer(struct task_struct *task, - struct pt_regs *regs) -{ - if (regs) - return (unsigned long) kernel_stack_pointer(regs); - if (task == current) - return current_stack_pointer(); - return (unsigned long) task->thread.ksp; -} - /* * Stack layout of a C stack frame. */ @@ -74,6 +64,16 @@ struct stack_frame { ((unsigned long)__builtin_frame_address(0) - \ offsetof(struct stack_frame, back_chain)) +static __always_inline unsigned long get_stack_pointer(struct task_struct *task, + struct pt_regs *regs) +{ + if (regs) + return (unsigned long)kernel_stack_pointer(regs); + if (task == current) + return current_frame_address(); + return (unsigned long)task->thread.ksp; +} + /* * To keep this simple mark register 2-6 as being changed (volatile) * by the called function, even though register 6 is saved/nonvolatile. diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index de9006b0cfeb..5ebf534ef753 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -55,10 +55,10 @@ static inline bool unwind_error(struct unwind_state *state) return state->error; } -static inline void unwind_start(struct unwind_state *state, - struct task_struct *task, - struct pt_regs *regs, - unsigned long first_frame) +static __always_inline void unwind_start(struct unwind_state *state, + struct task_struct *task, + struct pt_regs *regs, + unsigned long first_frame) { task = task ?: current; first_frame = first_frame ?: get_stack_pointer(task, regs); -- cgit From a052096bdd6809eeab809202726634d1ac975aa1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 27 Aug 2021 20:21:05 +0200 Subject: s390/topology: fix topology information when calling cpu hotplug notifiers The cpu hotplug notifiers are called without updating the core/thread masks when a new CPU is added. This causes problems with code setting up data structures in a cpu hotplug notifier, and relying on that later in normal code. This caused a crash in the new core scheduling code (SCHED_CORE), where rq->core was set up in a notifier depending on cpu masks. To fix this, add a cpu_setup_mask which is used in update_cpu_masks() instead of the cpu_online_mask to determine whether the cpu masks should be set for a certain cpu. Also move update_cpu_masks() to update the masks before calling notify_cpu_starting() so that the notifiers are seeing the updated masks. Signed-off-by: Sven Schnelle Cc: [hca@linux.ibm.com: get rid of cpu_online_mask handling] Signed-off-by: Heiko Carstens --- arch/s390/include/asm/smp.h | 1 + arch/s390/kernel/smp.c | 9 +++++++-- arch/s390/kernel/topology.c | 13 +++++++------ 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index e317fd4866c1..f16f4d054ae2 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex; extern unsigned int smp_cpu_mt_shift; extern unsigned int smp_cpu_mtid; extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; +extern cpumask_t cpu_setup_mask; extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2a991e43ead3..1a04e5bdf655 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -95,6 +95,7 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; #endif static unsigned int smp_max_threads __initdata = -1U; +cpumask_t cpu_setup_mask; static int __init early_nosmt(char *s) { @@ -902,13 +903,14 @@ static void smp_start_secondary(void *cpuvoid) vtime_init(); vdso_getcpu_init(); pfault_init(); + cpumask_set_cpu(cpu, &cpu_setup_mask); + update_cpu_masks(); notify_cpu_starting(cpu); if (topology_cpu_dedicated(cpu)) set_cpu_flag(CIF_DEDICATED_CPU); else clear_cpu_flag(CIF_DEDICATED_CPU); set_cpu_online(cpu, true); - update_cpu_masks(); inc_irq_stat(CPU_RST); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); @@ -950,10 +952,13 @@ early_param("possible_cpus", _setup_possible_cpus); int __cpu_disable(void) { unsigned long cregs[16]; + int cpu; /* Handle possible pending IPIs */ smp_handle_ext_call(); - set_cpu_online(smp_processor_id(), false); + cpu = smp_processor_id(); + set_cpu_online(cpu, false); + cpumask_clear_cpu(cpu, &cpu_setup_mask); update_cpu_masks(); /* Disable pseudo page faults on this cpu. */ pfault_fini(); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index d2458a29618f..58f8291950cb 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -67,7 +67,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c static cpumask_t mask; cpumask_clear(&mask); - if (!cpu_online(cpu)) + if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) goto out; cpumask_set_cpu(cpu, &mask); switch (topology_mode) { @@ -88,7 +88,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c case TOPOLOGY_MODE_SINGLE: break; } - cpumask_and(&mask, &mask, cpu_online_mask); + cpumask_and(&mask, &mask, &cpu_setup_mask); out: cpumask_copy(dst, &mask); } @@ -99,16 +99,16 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu) int i; cpumask_clear(&mask); - if (!cpu_online(cpu)) + if (!cpumask_test_cpu(cpu, &cpu_setup_mask)) goto out; cpumask_set_cpu(cpu, &mask); if (topology_mode != TOPOLOGY_MODE_HW) goto out; cpu -= cpu % (smp_cpu_mtid + 1); - for (i = 0; i <= smp_cpu_mtid; i++) - if (cpu_present(cpu + i)) + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpumask_test_cpu(cpu + i, &cpu_setup_mask)) cpumask_set_cpu(cpu + i, &mask); - cpumask_and(&mask, &mask, cpu_online_mask); + } out: cpumask_copy(dst, &mask); } @@ -569,6 +569,7 @@ void __init topology_init_early(void) alloc_masks(info, &book_info, 2); alloc_masks(info, &drawer_info, 3); out: + cpumask_set_cpu(0, &cpu_setup_mask); __arch_update_cpu_topology(); __arch_update_dedicated_flag(NULL); } -- cgit From 2e8275285a60868231eaf63abd9552cc27e512a2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 2 Sep 2021 19:47:37 +0200 Subject: s390/mm: fix kernel doc comments Signed-off-by: Heiko Carstens --- arch/s390/mm/gmap.c | 11 +++++------ arch/s390/mm/pgtable.c | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 9bb2c7512cd5..4d3b33ce81c6 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -27,7 +27,6 @@ /** * gmap_alloc - allocate and initialize a guest address space - * @mm: pointer to the parent mm_struct * @limit: maximum address of the gmap address space * * Returns a guest address space structure. @@ -504,7 +503,7 @@ EXPORT_SYMBOL_GPL(gmap_translate); /** * gmap_unlink - disconnect a page table from the gmap shadow tables - * @gmap: pointer to guest mapping meta data structure + * @mm: pointer to the parent mm_struct * @table: pointer to the host page table * @vmaddr: vm address associated with the host page table */ @@ -527,7 +526,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, unsigned long gaddr); /** - * gmap_link - set up shadow page tables to connect a host to a guest address + * __gmap_link - set up shadow page tables to connect a host to a guest address * @gmap: pointer to guest mapping meta data structure * @gaddr: guest address * @vmaddr: vm address @@ -1971,7 +1970,7 @@ out_free: EXPORT_SYMBOL_GPL(gmap_shadow_sgt); /** - * gmap_shadow_lookup_pgtable - find a shadow page table + * gmap_shadow_pgt_lookup - find a shadow page table * @sg: pointer to the shadow guest address space structure * @saddr: the address in the shadow aguest address space * @pgt: parent gmap address of the page table to get shadowed @@ -2165,7 +2164,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) } EXPORT_SYMBOL_GPL(gmap_shadow_page); -/** +/* * gmap_shadow_notify - handle notifications for shadow gmap * * Called with sg->parent->shadow_lock. @@ -2225,7 +2224,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr, /** * ptep_notify - call all invalidation callbacks for a specific pte. * @mm: pointer to the process mm_struct - * @addr: virtual address in the process address space + * @vmaddr: virtual address in the process address space * @pte: pointer to the page table entry * @bits: bits from the pgste that caused the notify call * diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index eec3a9d7176e..034721a68d8f 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -834,7 +834,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); -/** +/* * Conditionally set a guest storage key (handling csske). * oldkey will be updated when either mr or mc is set and a pointer is given. * @@ -867,7 +867,7 @@ int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(cond_set_guest_storage_key); -/** +/* * Reset a guest reference bit (rrbe), returning the reference and changed bit. * * Returns < 0 in case of error, otherwise the cc to be reported to the guest. -- cgit From 5dddfaac4c258f5eda7b3dba7f9d851262fcbbab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 2 Sep 2021 19:51:59 +0200 Subject: s390/cpum_cf: move array from header to C file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move array from header to C file to avoid that it gets defined in every C file where the header is included: In file included from arch/s390/kernel/perf_cpum_cf_common.c:19: ./arch/s390/include/asm/cpu_mcf.h:27:18: warning: ‘cpumf_ctr_ctl’ defined but not used [-Wunused-const-variable=] 27 | static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { | ^~~~~~~~~~~~~ Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cpu_mcf.h | 7 ------- arch/s390/kernel/perf_cpum_cf.c | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index ca0e0e5ddbc4..f87a4788c19c 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -24,13 +24,6 @@ enum cpumf_ctr_set { #define CPUMF_LCCTL_ENABLE_SHIFT 16 #define CPUMF_LCCTL_ACTCTL_SHIFT 0 -static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { - [CPUMF_CTR_SET_BASIC] = 0x02, - [CPUMF_CTR_SET_USER] = 0x04, - [CPUMF_CTR_SET_CRYPTO] = 0x08, - [CPUMF_CTR_SET_EXT] = 0x01, - [CPUMF_CTR_SET_MT_DIAG] = 0x20, -}; static inline void ctr_set_enable(u64 *state, u64 ctrsets) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 2e3bb633acf6..4a99154fe651 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -158,6 +158,14 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, return need; } +static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, + [CPUMF_CTR_SET_MT_DIAG] = 0x20, +}; + /* Read out all counter sets and save them in the provided data buffer. * The last 64 byte host an artificial trailer entry. */ -- cgit From ebd9cc6593691e6bc8526e368cedbdfc8034f403 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 3 Sep 2021 18:27:01 +0200 Subject: s390/pci: fix clp_get_state() handling of -ENODEV With commit cc049eecfb7a ("s390/pci: simplify CLP List PCI handling") clp_get_state() was changed to make use of the new clp_find_pci() helper function to query a specific function. This however returns -ENODEV when the device is not found at all and this error was passed to the caller. It was missed however that the callers actually expect a success return from clp_get_state() if the device is gone. Fix this by handling the -ENODEV return of clp_find_pci() explicitly in clp_get_state() returning success and setting the state parameter to ZPCI_FN_STATE_RESERVED matching the design concept that a PCI function that disappeared must have been resverved elsewhere. For all other error returns continue to just pass them on to the caller. Reviewed-by: Matthew Rosato Fixes: cc049eecfb7a ("s390/pci: simplify CLP List PCI handling") Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_clp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 51dc2215a2b7..df895d98a56b 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -449,14 +449,17 @@ int clp_get_state(u32 fid, enum zpci_state *state) struct clp_fh_list_entry entry; int rc; - *state = ZPCI_FN_STATE_RESERVED; rrb = clp_alloc_block(GFP_ATOMIC); if (!rrb) return -ENOMEM; rc = clp_find_pci(rrb, fid, &entry); - if (!rc) + if (!rc) { *state = entry.config_state; + } else if (rc == -ENODEV) { + *state = ZPCI_FN_STATE_RESERVED; + rc = 0; + } clp_free_block(rrb); return rc; -- cgit From 85ad27215ca57d02bb7c43d76f65a865dc863b59 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Mon, 6 Sep 2021 08:49:44 +0200 Subject: s390/pci: read clp_list_pci_req only once We do not have to reset the fh_list in the loop. Signed-off-by: Pierre Morel Reviewed-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_clp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index df895d98a56b..be077b39da33 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -383,8 +383,8 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid, rc = clp_list_pci_req(rrb, &resume_token, &nentries); if (rc) return rc; + fh_list = rrb->response.fh_list; for (i = 0; i < nentries; i++) { - fh_list = rrb->response.fh_list; if (fh_list[i].fid == fid) { *entry = fh_list[i]; return 0; -- cgit From 68c32eb2707aed0a3be1a60b0f206943a25e8f34 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Sep 2021 13:59:26 +0200 Subject: s390: remove xpram device driver Support for expanded storage was only available until z13 and z/VM 6.3 respectively. However there haven't been any use cases a long time before for this device driver. Therefore remove it. Acked-by: Christian Borntraeger Signed-off-by: Heiko Carstens --- arch/s390/configs/defconfig | 1 - arch/s390/configs/zfcpdump_defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index e1642d2cba59..56a1cc85c5d7 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -397,7 +397,6 @@ CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -# CONFIG_BLK_DEV_XPRAM is not set CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_RBD=m CONFIG_BLK_DEV_NVME=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index d576aaab27c9..aceccf3b9a88 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -35,7 +35,6 @@ CONFIG_NET=y # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y -# CONFIG_BLK_DEV_XPRAM is not set # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set CONFIG_ENCLOSURE_SERVICES=y -- cgit From cd1adf1b63a112d762832e9c64b0a886fbb840d6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Sep 2021 11:03:45 -0700 Subject: Revert "mm/gup: remove try_get_page(), call try_get_compound_head() directly" This reverts commit 9857a17f206ff374aea78bccfb687f145368be2e. That commit was completely broken, and I should have caught on to it earlier. But happily, the kernel test robot noticed the breakage fairly quickly. The breakage is because "try_get_page()" is about avoiding the page reference count overflow case, but is otherwise the exact same as a plain "get_page()". In contrast, "try_get_compound_head()" is an entirely different beast, and uses __page_cache_add_speculative() because it's not just about the page reference count, but also about possibly racing with the underlying page going away. So all the commentary about how "try_get_page() has fallen a little behind in terms of maintenance, try_get_compound_head() handles speculative page references more thoroughly" was just completely wrong: yes, try_get_compound_head() handles speculative page references, but the point is that try_get_page() does not, and must not. So there's no lack of maintainance - there are fundamentally different semantics. A speculative page reference would be entirely wrong in "get_page()", and it's entirely wrong in "try_get_page()". It's not about speculation, it's purely about "uhhuh, you can't get this page because you've tried to increment the reference count too much already". The reason the kernel test robot noticed this bug was that it hit the VM_BUG_ON() in __page_cache_add_speculative(), which is all about verifying that the context of any speculative page access is correct. But since that isn't what try_get_page() is all about, the VM_BUG_ON() tests things that are not correct to test for try_get_page(). Reported-by: kernel test robot Cc: John Hubbard Cc: Christoph Hellwig Cc: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a834e4672f72..212632d57db9 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -822,7 +822,7 @@ void do_secure_storage_access(struct pt_regs *regs) break; case KERNEL_FAULT: page = phys_to_page(addr); - if (unlikely(!try_get_compound_head(page, 1))) + if (unlikely(!try_get_page(page))) break; rc = arch_make_page_accessible(page); put_page(page); -- cgit From 9652cb805c44b74ba935c84bfd59745cb1962de8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Sep 2021 20:37:59 +0200 Subject: s390/ftrace: remove incorrect __va usage The address of ftrace_graph_caller is already virtual. Using __va() to translate the address is wrong. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/kernel/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 0a464d328467..1d94ffdf347b 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -341,13 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return); */ int ftrace_enable_ftrace_graph_caller(void) { - brcl_disable(__va(ftrace_graph_caller)); + brcl_disable(ftrace_graph_caller); return 0; } int ftrace_disable_ftrace_graph_caller(void) { - brcl_enable(__va(ftrace_graph_caller)); + brcl_enable(ftrace_graph_caller); return 0; } -- cgit From bb9c14ad267d25dd77ceccbcfd69804bdb7240f5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 8 Sep 2021 17:45:06 +0200 Subject: hugetlbfs: s390 is always 64bit No need to check for 64BIT. While at it, let's just select ARCH_SUPPORTS_HUGETLBFS from arch/s390/Kconfig. Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20210908154506.20764-1-david@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 92c0a1b4c528..d42c8161a0ed 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -110,6 +110,7 @@ config S390 select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC + select ARCH_SUPPORTS_HUGETLBFS select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF -- cgit From 65a2aa5f482ed0c1b5afb9e6b0b9e0b16bb8b616 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:04 -0700 Subject: mm/memory_hotplug: remove nid parameter from arch_remove_memory() The parameter is unused, let's remove it. Link: https://lkml.kernel.org/r/20210712124052.26491-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Catalin Marinas Acked-by: Michael Ellerman [powerpc] Acked-by: Heiko Carstens [s390] Reviewed-by: Pankaj Gupta Reviewed-by: Oscar Salvador Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Yoshinori Sato Cc: Rich Felker Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Pavel Tatashin Cc: Baoquan He Cc: Laurent Dufour Cc: Sergei Trofimovich Cc: Kefeng Wang Cc: Michel Lespinasse Cc: Christophe Leroy Cc: "Aneesh Kumar K.V" Cc: Thiago Jung Bauermann Cc: Joe Perches Cc: Pierre Morel Cc: Jia He Cc: Anton Blanchard Cc: Dan Williams Cc: Dave Jiang Cc: Jason Wang Cc: Len Brown Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Nathan Lynch Cc: Pankaj Gupta Cc: "Rafael J. Wysocki" Cc: "Rafael J. Wysocki" Cc: Scott Cheloha Cc: Vishal Verma Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 3 +-- arch/ia64/mm/init.c | 3 +-- arch/powerpc/mm/mem.c | 3 +-- arch/s390/mm/init.c | 3 +-- arch/sh/mm/init.c | 3 +-- arch/x86/mm/init_32.c | 3 +-- arch/x86/mm/init_64.c | 3 +-- 7 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9ff0de1b2b93..cfd9deb347c3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1502,8 +1502,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 064a967a7b6e..5c6da8d83c1a 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -484,8 +484,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ad198b439222..c3c4e31462ec 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -119,8 +119,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return rc; } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 8ac710de1ab1..d85bd7f5d8dc 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -306,8 +306,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return rc; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index ce26c7f8950a..506784702430 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -414,8 +414,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 74b78840182d..bd90b8fe81e4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -801,8 +801,7 @@ int arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start_pfn, nr_pages, params); } -void arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ddeaba947eb3..a6e11763763f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1255,8 +1255,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true, NULL); } -void __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; -- cgit From e1c158e4956612e7bada4c03dfb99210af4d6cde Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 7 Sep 2021 19:55:09 -0700 Subject: mm/memory_hotplug: remove nid parameter from remove_memory() and friends There is only a single user remaining. We can simply lookup the nid only used for node offlining purposes when walking our memory blocks. We don't expect to remove multi-nid ranges; and if we'd ever do, we most probably don't care about removing multi-nid ranges that actually result in empty nodes. If ever required, we can detect the "multi-nid" scenario and simply try offlining all online nodes. Link: https://lkml.kernel.org/r/20210712124052.26491-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michael Ellerman (powerpc) Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Nathan Lynch Cc: Laurent Dufour Cc: "Aneesh Kumar K.V" Cc: Scott Cheloha Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christophe Leroy Cc: Dave Hansen Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jia He Cc: Joe Perches Cc: Kefeng Wang Cc: Michal Hocko Cc: Michel Lespinasse Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pankaj Gupta Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Pierre Morel Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Sergei Trofimovich Cc: Thiago Jung Bauermann Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vitaly Kuznetsov Cc: Vlastimil Babka Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/pseries/hotplug-memory.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 377d852f5a9a..ef5c24b42cf1 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -286,7 +286,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si { unsigned long block_sz, start_pfn; int sections_per_block; - int i, nid; + int i; start_pfn = base >> PAGE_SHIFT; @@ -297,10 +297,9 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si block_sz = pseries_memory_block_size(); sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; - nid = memory_add_physaddr_to_nid(base); for (i = 0; i < sections_per_block; i++) { - __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); + __remove_memory(base, MIN_MEMORY_BLOCK_SIZE); base += MIN_MEMORY_BLOCK_SIZE; } @@ -387,7 +386,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) block_sz = pseries_memory_block_size(); - __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, block_sz); put_device(&mem_block->dev); /* Update memory regions for memory remove */ @@ -660,7 +659,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; -- cgit From 8350229ffceb372621e277d4d36f1ffca6212135 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Sep 2021 19:55:58 -0700 Subject: riscv: only select GENERIC_IOREMAP if MMU support is enabled nommu ioremap is an inline stub in asm-generic/io.h. Link: https://lkml.kernel.org/r/20210825072036.GA29161@lst.de Signed-off-by: Christoph Hellwig Cc: Nicholas Piggin Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 4f7b70ae7c31..8598e3d103a3 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -48,7 +48,7 @@ config RISCV select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_EARLY_IOREMAP select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO - select GENERIC_IOREMAP + select GENERIC_IOREMAP if MMU select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_SHOW select GENERIC_LIB_DEVMEM_IS_ALLOWED -- cgit From 5ecae8f6aafe6ae7c8fb10c3c175f85baf779814 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 7 Sep 2021 19:57:21 -0700 Subject: alpha: agp: make empty macros use do-while-0 style Copy these macros from ia64/include/asm/agp.h to avoid the "empty-body" in 'if' statment warning. drivers/char/agp/generic.c: In function 'agp_generic_destroy_page': ../drivers/char/agp/generic.c:1265:42: warning: suggest braces around empty body in an 'if' statement [-Wempty-body] 1265 | unmap_page_from_agp(page); Link: https://lkml.kernel.org/r/20210809030822.20658-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: David Airlie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/agp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/agp.h b/arch/alpha/include/asm/agp.h index 7173eada1567..7874f063d000 100644 --- a/arch/alpha/include/asm/agp.h +++ b/arch/alpha/include/asm/agp.h @@ -6,8 +6,8 @@ /* dummy for now */ -#define map_page_into_agp(page) -#define unmap_page_from_agp(page) +#define map_page_into_agp(page) do { } while (0) +#define unmap_page_from_agp(page) do { } while (0) #define flush_agp_cache() mb() /* GATT allocation. Returns/accepts GATT kernel virtual address. */ -- cgit From 0a9d991c424b4cda5db574b6355f0c627ae59109 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 7 Sep 2021 19:57:24 -0700 Subject: alpha: pci-sysfs: fix all kernel-doc warnings Fix all kernel-doc warnings in arch/alpha/kernel/pci-sysfs.c: arch/alpha/kernel/pci-sysfs.c:67: warning: No description found for return value of 'pci_mmap_resource' arch/alpha/kernel/pci-sysfs.c:115: warning: Function parameter or member 'pdev' not described in 'pci_remove_resource_files' arch/alpha/kernel/pci-sysfs.c:115: warning: Excess function parameter 'dev' description in 'pci_remove_resource_files' arch/alpha/kernel/pci-sysfs.c:230: warning: Function parameter or member 'pdev' not described in 'pci_create_resource_files' arch/alpha/kernel/pci-sysfs.c:230: warning: Excess function parameter 'dev' description in 'pci_create_resource_files' arch/alpha/kernel/pci-sysfs.c:232: warning: No description found for return value of 'pci_create_resource_files' arch/alpha/kernel/pci-sysfs.c:305: warning: Function parameter or member 'bus' not described in 'pci_adjust_legacy_attr' arch/alpha/kernel/pci-sysfs.c:305: warning: Excess function parameter 'b' description in 'pci_adjust_legacy_attr' Link: https://lkml.kernel.org/r/20210808185249.31442-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/pci-sysfs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 0021580d79ad..5808a66e2a81 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -60,6 +60,8 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num, * @sparse: address space type * * Use the bus mapping routines to map a PCI resource into userspace. + * + * Return: %0 on success, negative error code otherwise */ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, @@ -106,7 +108,7 @@ static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj, /** * pci_remove_resource_files - cleanup resource files - * @dev: dev to cleanup + * @pdev: pci_dev to cleanup * * If we created resource files for @dev, remove them from sysfs and * free their resources. @@ -221,10 +223,12 @@ static int pci_create_attr(struct pci_dev *pdev, int num) } /** - * pci_create_resource_files - create resource files in sysfs for @dev - * @dev: dev in question + * pci_create_resource_files - create resource files in sysfs for @pdev + * @pdev: pci_dev in question * * Walk the resources in @dev creating files for each resource available. + * + * Return: %0 on success, or negative error code */ int pci_create_resource_files(struct pci_dev *pdev) { @@ -296,7 +300,7 @@ int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, /** * pci_adjust_legacy_attr - adjustment of legacy file attributes - * @b: bus to create files under + * @bus: bus to create files under * @mmap_type: I/O port or memory * * Adjust file name and size for sparse mappings. -- cgit From c226bc3cd99bd3535f6a7264c7c7fb0bce16db12 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 7 Sep 2021 19:57:38 -0700 Subject: arch: Kconfig: fix spelling mistake "seperate" -> "separate" Threre is a spelling mistake in the Kconfig text. Fix it. Link: https://lkml.kernel.org/r/20210704095207.37342-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 129df498a8e1..766e68b1e703 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -886,7 +886,7 @@ config HAVE_SOFTIRQ_ON_OWN_STACK bool help Architecture provides a function to run __do_softirq() on a - seperate stack. + separate stack. config PGTABLE_LEVELS int -- cgit From 8b097881b54cbc23dd78262ed88c9924d00ea457 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 7 Sep 2021 20:16:06 -0700 Subject: trap: cleanup trap_init() There are some empty trap_init() definitions in different ARCHs, Introduce a new weak trap_init() function to clean them up. Link: https://lkml.kernel.org/r/20210812123602.76356-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Russell King (Oracle) [arm32] Acked-by: Vineet Gupta [arc] Acked-by: Michael Ellerman [powerpc] Cc: Yoshinori Sato Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Stafford Horne Cc: James E.J. Bottomley Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/kernel/traps.c | 5 ----- arch/arm/kernel/traps.c | 5 ----- arch/h8300/kernel/traps.c | 4 ---- arch/hexagon/kernel/traps.c | 4 ---- arch/nds32/kernel/traps.c | 5 ----- arch/nios2/kernel/traps.c | 5 ----- arch/openrisc/kernel/traps.c | 5 ----- arch/parisc/kernel/traps.c | 4 ---- arch/powerpc/kernel/traps.c | 5 ----- arch/riscv/kernel/traps.c | 5 ----- arch/um/kernel/trap.c | 4 ---- 11 files changed, 51 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 57235e5c0cea..6b83e3f2b41c 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -20,11 +20,6 @@ #include #include -void __init trap_init(void) -{ - return; -} - void die(const char *str, struct pt_regs *regs, unsigned long address) { show_kernel_fault_diag(str, regs, address); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 64308e3a5d0c..e9b4f2b49bd8 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -781,11 +781,6 @@ void abort(void) panic("Oops failed to kill thread"); } -void __init trap_init(void) -{ - return; -} - #ifdef CONFIG_KUSER_HELPERS static void __init kuser_init(void *vectors) { diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 5d8b969cd8f3..bdbe988d8dbc 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -39,10 +39,6 @@ void __init base_trap_init(void) { } -void __init trap_init(void) -{ -} - asmlinkage void set_esp0(unsigned long ssp) { current->thread.esp0 = ssp; diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 904134b37232..edfc35dafeb1 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -28,10 +28,6 @@ #define TRAP_SYSCALL 1 #define TRAP_DEBUG 0xdb -void __init trap_init(void) -{ -} - #ifdef CONFIG_GENERIC_BUG /* Maybe should resemble arch/sh/kernel/traps.c ?? */ int is_valid_bugaddr(unsigned long addr) diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index ee0d9ae192a5..f06421c645af 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -183,11 +183,6 @@ void __pgd_error(const char *file, int line, unsigned long val) } extern char *exception_vector, *exception_vector_end; -void __init trap_init(void) -{ - return; -} - void __init early_trap_init(void) { unsigned long ivb = 0; diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index b172da4eb1a9..596986a74a26 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -105,11 +105,6 @@ void show_stack(struct task_struct *task, unsigned long *stack, printk("%s\n", loglvl); } -void __init trap_init(void) -{ - /* Nothing to do here */ -} - /* Breakpoint handler */ asmlinkage void breakpoint_c(struct pt_regs *fp) { diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 4d61333c2623..aa1e709405ac 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -231,11 +231,6 @@ void unhandled_exception(struct pt_regs *regs, int ea, int vector) die("Oops", regs, 9); } -void __init trap_init(void) -{ - /* Nothing needs to be done */ -} - asmlinkage void do_trap(struct pt_regs *regs, unsigned long address) { force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc); diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 8d8441d4562a..747c328fb886 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -859,7 +859,3 @@ void __init early_trap_init(void) initialize_ivt(&fault_vector_20); } - -void __init trap_init(void) -{ -} diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d56254f05e17..7819231242b2 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2215,11 +2215,6 @@ DEFINE_INTERRUPT_HANDLER(kernel_bad_stack) die("Bad kernel stack pointer", regs, SIGABRT); } -void __init trap_init(void) -{ -} - - #ifdef CONFIG_PPC_EMULATED_STATS #define WARN_EMULATED_SETUP(type) .type = { .name = #type } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 0a98fd0ddfe9..0daaa3e4630d 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -199,11 +199,6 @@ int is_valid_bugaddr(unsigned long pc) } #endif /* CONFIG_GENERIC_BUG */ -/* stvec & scratch is already set from head.S */ -void __init trap_init(void) -{ -} - #ifdef CONFIG_VMAP_STACK static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ad12f78bda7e..3198c4767387 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -311,7 +311,3 @@ void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { do_IRQ(WINCH_IRQ, regs); } - -void trap_init(void) -{ -} -- cgit From 4cb398fe1bf127c1e9eff40969454b289f219c43 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 7 Sep 2021 20:00:44 -0700 Subject: configs: remove the obsolete CONFIG_INPUT_POLLDEV This CONFIG option was removed in commit 278b13ce3a89 ("Input: remove input_polled_dev implementation") so there's no point to keep it in defconfigs any longer. Get rid of the leftover for all arches. Link: https://lkml.kernel.org/r/20210726074741.1062-1-yuzenghui@huawei.com Signed-off-by: Zenghui Yu Cc: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/configs/dove_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/mips/configs/lemote2f_defconfig | 1 - arch/mips/configs/pic32mzda_defconfig | 1 - arch/mips/configs/rt305x_defconfig | 1 - arch/mips/configs/xway_defconfig | 1 - arch/parisc/configs/generic-32bit_defconfig | 1 - arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - 9 files changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index b935162a8bba..33074fdab2ea 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -56,7 +56,6 @@ CONFIG_ATA=y CONFIG_SATA_MV=y CONFIG_NETDEVICES=y CONFIG_MV643XX_ETH=y -CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 363f1b1b08e3..58f4834289e6 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -284,7 +284,6 @@ CONFIG_RT2800USB=m CONFIG_MWIFIEX=m CONFIG_MWIFIEX_SDIO=m CONFIG_INPUT_FF_MEMLESS=m -CONFIG_INPUT_POLLDEV=y CONFIG_INPUT_MATRIXKMAP=y CONFIG_INPUT_MOUSEDEV=m CONFIG_INPUT_MOUSEDEV_SCREEN_X=640 diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index aaf9d5e0aa2c..791894c4d8fb 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -116,7 +116,6 @@ CONFIG_8139TOO=y CONFIG_R8169=y CONFIG_USB_USBNET=m CONFIG_USB_NET_CDC_EEM=m -CONFIG_INPUT_POLLDEV=m CONFIG_INPUT_EVDEV=y # CONFIG_MOUSE_PS2_ALPS is not set # CONFIG_MOUSE_PS2_LOGIPS2PP is not set diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig index 63fe2da1b37f..fd567247adc7 100644 --- a/arch/mips/configs/pic32mzda_defconfig +++ b/arch/mips/configs/pic32mzda_defconfig @@ -34,7 +34,6 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SCAN_ASYNC=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_INPUT_LEDS=m -CONFIG_INPUT_POLLDEV=y CONFIG_INPUT_MOUSEDEV=m CONFIG_INPUT_EVDEV=y CONFIG_INPUT_EVBUG=m diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index fec5851c164b..eb359db15dba 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -90,7 +90,6 @@ CONFIG_PPPOE=m CONFIG_PPP_ASYNC=m CONFIG_ISDN=y CONFIG_INPUT=m -CONFIG_INPUT_POLLDEV=m # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_INPUT_MOUSE is not set CONFIG_INPUT_MISC=y diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index 9abbc0debc2a..eeb689f715cb 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -96,7 +96,6 @@ CONFIG_PPPOE=m CONFIG_PPP_ASYNC=m CONFIG_ISDN=y CONFIG_INPUT=m -CONFIG_INPUT_POLLDEV=m # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_INPUT_MOUSE is not set CONFIG_INPUT_MISC=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 7611d48c599e..dd14e3131325 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -111,7 +111,6 @@ CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m CONFIG_PPPOE=m # CONFIG_WLAN is not set -CONFIG_INPUT_POLLDEV=y CONFIG_KEYBOARD_HIL_OLD=m CONFIG_KEYBOARD_HIL=m CONFIG_MOUSE_SERIAL=y diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 9c9c4a888b1d..e81885384f60 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -156,7 +156,6 @@ CONFIG_FORCEDETH=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set CONFIG_R8169=y -CONFIG_INPUT_POLLDEV=y CONFIG_INPUT_EVDEV=y CONFIG_INPUT_JOYSTICK=y CONFIG_INPUT_TABLET=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index b60bd2d86034..e8a7a0af2bda 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -148,7 +148,6 @@ CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y CONFIG_R8169=y -CONFIG_INPUT_POLLDEV=y CONFIG_INPUT_EVDEV=y CONFIG_INPUT_JOYSTICK=y CONFIG_INPUT_TABLET=y -- cgit From 59ab844eed9c6b01d32dcb27b57accc23771b324 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Sep 2021 15:18:25 -0700 Subject: compat: remove some compat entry points These are all handled correctly when calling the native system call entry point, so remove the special cases. Link: https://lkml.kernel.org/r/20210727144859.4150043-6-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Cc: Al Viro Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Eric Biederman Cc: Feng Tang Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Mackerras Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/unistd32.h | 10 +++++----- arch/mips/kernel/syscalls/syscall_n32.tbl | 10 +++++----- arch/mips/kernel/syscalls/syscall_o32.tbl | 10 +++++----- arch/parisc/kernel/syscalls/syscall.tbl | 8 ++++---- arch/powerpc/kernel/syscalls/syscall.tbl | 10 +++++----- arch/s390/kernel/syscalls/syscall.tbl | 10 +++++----- arch/sparc/kernel/syscalls/syscall.tbl | 10 +++++----- arch/x86/entry/syscalls/syscall_32.tbl | 4 ++-- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- 9 files changed, 37 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 4e99e4b912ef..844f6ae58662 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -649,11 +649,11 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) #define __NR_inotify_rm_watch 318 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) #define __NR_mbind 319 -__SYSCALL(__NR_mbind, compat_sys_mbind) +__SYSCALL(__NR_mbind, sys_mbind) #define __NR_get_mempolicy 320 -__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy) +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) #define __NR_set_mempolicy 321 -__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy) +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) #define __NR_openat 322 __SYSCALL(__NR_openat, compat_sys_openat) #define __NR_mkdirat 323 @@ -699,7 +699,7 @@ __SYSCALL(__NR_tee, sys_tee) #define __NR_vmsplice 343 __SYSCALL(__NR_vmsplice, sys_vmsplice) #define __NR_move_pages 344 -__SYSCALL(__NR_move_pages, compat_sys_move_pages) +__SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_getcpu 345 __SYSCALL(__NR_getcpu, sys_getcpu) #define __NR_epoll_pwait 346 @@ -811,7 +811,7 @@ __SYSCALL(__NR_rseq, sys_rseq) #define __NR_io_pgetevents 399 __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents) #define __NR_migrate_pages 400 -__SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages) +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) #define __NR_kexec_file_load 401 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) /* 402 is unused */ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 56c8d3cf42ed..70e32de2bcaa 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -239,9 +239,9 @@ 228 n32 clock_nanosleep sys_clock_nanosleep_time32 229 n32 tgkill sys_tgkill 230 n32 utimes sys_utimes_time32 -231 n32 mbind compat_sys_mbind -232 n32 get_mempolicy compat_sys_get_mempolicy -233 n32 set_mempolicy compat_sys_set_mempolicy +231 n32 mbind sys_mbind +232 n32 get_mempolicy sys_get_mempolicy +233 n32 set_mempolicy sys_set_mempolicy 234 n32 mq_open compat_sys_mq_open 235 n32 mq_unlink sys_mq_unlink 236 n32 mq_timedsend sys_mq_timedsend_time32 @@ -258,7 +258,7 @@ 247 n32 inotify_init sys_inotify_init 248 n32 inotify_add_watch sys_inotify_add_watch 249 n32 inotify_rm_watch sys_inotify_rm_watch -250 n32 migrate_pages compat_sys_migrate_pages +250 n32 migrate_pages sys_migrate_pages 251 n32 openat sys_openat 252 n32 mkdirat sys_mkdirat 253 n32 mknodat sys_mknodat @@ -279,7 +279,7 @@ 268 n32 sync_file_range sys_sync_file_range 269 n32 tee sys_tee 270 n32 vmsplice sys_vmsplice -271 n32 move_pages compat_sys_move_pages +271 n32 move_pages sys_move_pages 272 n32 set_robust_list compat_sys_set_robust_list 273 n32 get_robust_list compat_sys_get_robust_list 274 n32 kexec_load compat_sys_kexec_load diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 201237fd0f43..a61c35edaa74 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -279,9 +279,9 @@ 265 o32 clock_nanosleep sys_clock_nanosleep_time32 266 o32 tgkill sys_tgkill 267 o32 utimes sys_utimes_time32 -268 o32 mbind sys_mbind compat_sys_mbind -269 o32 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -270 o32 set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +268 o32 mbind sys_mbind +269 o32 get_mempolicy sys_get_mempolicy +270 o32 set_mempolicy sys_set_mempolicy 271 o32 mq_open sys_mq_open compat_sys_mq_open 272 o32 mq_unlink sys_mq_unlink 273 o32 mq_timedsend sys_mq_timedsend_time32 @@ -298,7 +298,7 @@ 284 o32 inotify_init sys_inotify_init 285 o32 inotify_add_watch sys_inotify_add_watch 286 o32 inotify_rm_watch sys_inotify_rm_watch -287 o32 migrate_pages sys_migrate_pages compat_sys_migrate_pages +287 o32 migrate_pages sys_migrate_pages 288 o32 openat sys_openat compat_sys_openat 289 o32 mkdirat sys_mkdirat 290 o32 mknodat sys_mknodat @@ -319,7 +319,7 @@ 305 o32 sync_file_range sys_sync_file_range sys32_sync_file_range 306 o32 tee sys_tee 307 o32 vmsplice sys_vmsplice -308 o32 move_pages sys_move_pages compat_sys_move_pages +308 o32 move_pages sys_move_pages 309 o32 set_robust_list sys_set_robust_list compat_sys_set_robust_list 310 o32 get_robust_list sys_get_robust_list compat_sys_get_robust_list 311 o32 kexec_load sys_kexec_load compat_sys_kexec_load diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 0bf854b70612..bf751e0732b7 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -292,9 +292,9 @@ 258 32 clock_nanosleep sys_clock_nanosleep_time32 258 64 clock_nanosleep sys_clock_nanosleep 259 common tgkill sys_tgkill -260 common mbind sys_mbind compat_sys_mbind -261 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -262 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +260 common mbind sys_mbind +261 common get_mempolicy sys_get_mempolicy +262 common set_mempolicy sys_set_mempolicy # 263 was vserver 264 common add_key sys_add_key 265 common request_key sys_request_key @@ -331,7 +331,7 @@ 292 64 sync_file_range sys_sync_file_range 293 common tee sys_tee 294 common vmsplice sys_vmsplice -295 common move_pages sys_move_pages compat_sys_move_pages +295 common move_pages sys_move_pages 296 common getcpu sys_getcpu 297 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 298 common statfs64 sys_statfs64 compat_sys_statfs64 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 29b55e2e035c..7bef917cc84e 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -330,10 +330,10 @@ 256 64 sys_debug_setcontext sys_ni_syscall 256 spu sys_debug_setcontext sys_ni_syscall # 257 reserved for vserver -258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages -259 nospu mbind sys_mbind compat_sys_mbind -260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +258 nospu migrate_pages sys_migrate_pages +259 nospu mbind sys_mbind +260 nospu get_mempolicy sys_get_mempolicy +261 nospu set_mempolicy sys_set_mempolicy 262 nospu mq_open sys_mq_open compat_sys_mq_open 263 nospu mq_unlink sys_mq_unlink 264 32 mq_timedsend sys_mq_timedsend_time32 @@ -381,7 +381,7 @@ 298 common faccessat sys_faccessat 299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -301 common move_pages sys_move_pages compat_sys_move_pages +301 common move_pages sys_move_pages 302 common getcpu sys_getcpu 303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 304 32 utimensat sys_utimensat_time32 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index aa9d68b8ee14..df5261e5cfe1 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -274,9 +274,9 @@ 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind compat_sys_mbind -269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +268 common mbind sys_mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open 272 common mq_unlink sys_mq_unlink sys_mq_unlink 273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 @@ -293,7 +293,7 @@ 284 common inotify_init sys_inotify_init sys_inotify_init 285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages +287 common migrate_pages sys_migrate_pages sys_migrate_pages 288 common openat sys_openat compat_sys_openat 289 common mkdirat sys_mkdirat sys_mkdirat 290 common mknodat sys_mknodat sys_mknodat @@ -317,7 +317,7 @@ 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages compat_sys_move_pages +310 common move_pages sys_move_pages sys_move_pages 311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 313 common utimes sys_utimes sys_utimes_time32 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 7893104718c2..c37764dc764d 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -365,12 +365,12 @@ 299 common unshare sys_unshare 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 301 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -302 common migrate_pages sys_migrate_pages compat_sys_migrate_pages -303 common mbind sys_mbind compat_sys_mbind -304 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -305 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +302 common migrate_pages sys_migrate_pages +303 common mbind sys_mbind +304 common get_mempolicy sys_get_mempolicy +305 common set_mempolicy sys_set_mempolicy 306 common kexec_load sys_kexec_load compat_sys_kexec_load -307 common move_pages sys_move_pages compat_sys_move_pages +307 common move_pages sys_move_pages 308 common getcpu sys_getcpu 309 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 310 32 utimensat sys_utimensat_time32 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 61f18b72552b..960a021d543e 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -286,7 +286,7 @@ 272 i386 fadvise64_64 sys_ia32_fadvise64_64 273 i386 vserver 274 i386 mbind sys_mbind -275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy +275 i386 get_mempolicy sys_get_mempolicy 276 i386 set_mempolicy sys_set_mempolicy 277 i386 mq_open sys_mq_open compat_sys_mq_open 278 i386 mq_unlink sys_mq_unlink @@ -328,7 +328,7 @@ 314 i386 sync_file_range sys_ia32_sync_file_range 315 i386 tee sys_tee 316 i386 vmsplice sys_vmsplice -317 i386 move_pages sys_move_pages compat_sys_move_pages +317 i386 move_pages sys_move_pages 318 i386 getcpu sys_getcpu 319 i386 epoll_pwait sys_epoll_pwait 320 i386 utimensat sys_utimensat_time32 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 807b6a1de8e8..18b5500ea8bf 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -398,7 +398,7 @@ 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list 532 x32 vmsplice sys_vmsplice -533 x32 move_pages compat_sys_move_pages +533 x32 move_pages sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -- cgit From a7a08b275a8bbade798c4bdaad07ade68fe7003c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 Sep 2021 15:18:29 -0700 Subject: arch: remove compat_alloc_user_space All users of compat_alloc_user_space() and copy_in_user() have been removed from the kernel, only a few functions in sparc remain that can be changed to calling arch_copy_in_user() instead. Link: https://lkml.kernel.org/r/20210727144859.4150043-7-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Cc: Al Viro Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Eric Biederman Cc: Feng Tang Cc: Heiko Carstens Cc: Helge Deller Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Mackerras Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/compat.h | 5 --- arch/arm64/include/asm/uaccess.h | 11 ----- arch/arm64/lib/Makefile | 2 +- arch/arm64/lib/copy_in_user.S | 77 --------------------------------- arch/mips/cavium-octeon/octeon-memcpy.S | 2 - arch/mips/include/asm/compat.h | 8 ---- arch/mips/include/asm/uaccess.h | 26 ----------- arch/mips/lib/memcpy.S | 11 ----- arch/parisc/include/asm/compat.h | 6 --- arch/parisc/include/asm/uaccess.h | 2 - arch/parisc/lib/memcpy.c | 9 ---- arch/powerpc/include/asm/compat.h | 16 ------- arch/s390/include/asm/compat.h | 10 ----- arch/s390/include/asm/uaccess.h | 3 -- arch/s390/lib/uaccess.c | 63 --------------------------- arch/sparc/include/asm/compat.h | 19 -------- arch/sparc/kernel/process_64.c | 2 +- arch/sparc/kernel/signal32.c | 12 ++--- arch/sparc/kernel/signal_64.c | 8 ++-- arch/x86/include/asm/compat.h | 13 ------ arch/x86/include/asm/uaccess_64.h | 7 --- 21 files changed, 12 insertions(+), 300 deletions(-) delete mode 100644 arch/arm64/lib/copy_in_user.S (limited to 'arch') diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 79c1a750e357..eaa6ca062d89 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -107,11 +107,6 @@ struct compat_statfs { #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) #define COMPAT_MINSIGSTKSZ 2048 -static inline void __user *arch_compat_alloc_user_space(long len) -{ - return (void __user *)compat_user_stack_pointer() - len; -} - struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index b5f08621fa29..190b494e22ab 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -430,17 +430,6 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi __actu_ret; \ }) -extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n); -#define raw_copy_in_user(to, from, n) \ -({ \ - unsigned long __aciu_ret; \ - uaccess_ttbr0_enable(); \ - __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \ - __uaccess_mask_ptr(from), (n)); \ - uaccess_ttbr0_disable(); \ - __aciu_ret; \ -}) - #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 6dd56a49790a..0941180a86d3 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 lib-y := clear_user.o delay.o copy_from_user.o \ - copy_to_user.o copy_in_user.o copy_page.o \ + copy_to_user.o copy_page.o \ clear_page.o csum.o insn.o memchr.o memcpy.o \ memset.o memcmp.o strcmp.o strncmp.o strlen.o \ strnlen.o strchr.o strrchr.o tishift.o diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S deleted file mode 100644 index dbea3799c3ef..000000000000 --- a/arch/arm64/lib/copy_in_user.S +++ /dev/null @@ -1,77 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copy from user space to user space - * - * Copyright (C) 2012 ARM Ltd. - */ - -#include - -#include -#include -#include - -/* - * Copy from user space to user space (alignment handled by the hardware) - * - * Parameters: - * x0 - to - * x1 - from - * x2 - n - * Returns: - * x0 - bytes not copied - */ - .macro ldrb1 reg, ptr, val - user_ldst 9998f, ldtrb, \reg, \ptr, \val - .endm - - .macro strb1 reg, ptr, val - user_ldst 9998f, sttrb, \reg, \ptr, \val - .endm - - .macro ldrh1 reg, ptr, val - user_ldst 9997f, ldtrh, \reg, \ptr, \val - .endm - - .macro strh1 reg, ptr, val - user_ldst 9997f, sttrh, \reg, \ptr, \val - .endm - - .macro ldr1 reg, ptr, val - user_ldst 9997f, ldtr, \reg, \ptr, \val - .endm - - .macro str1 reg, ptr, val - user_ldst 9997f, sttr, \reg, \ptr, \val - .endm - - .macro ldp1 reg1, reg2, ptr, val - user_ldp 9997f, \reg1, \reg2, \ptr, \val - .endm - - .macro stp1 reg1, reg2, ptr, val - user_stp 9997f, \reg1, \reg2, \ptr, \val - .endm - -end .req x5 -srcin .req x15 -SYM_FUNC_START(__arch_copy_in_user) - add end, x0, x2 - mov srcin, x1 -#include "copy_template.S" - mov x0, #0 - ret -SYM_FUNC_END(__arch_copy_in_user) -EXPORT_SYMBOL(__arch_copy_in_user) - - .section .fixup,"ax" - .align 2 -9997: cmp dst, dstin - b.ne 9998f - // Before being absolutely sure we couldn't copy anything, try harder -USER(9998f, ldtrb tmp1w, [srcin]) -USER(9998f, sttrb tmp1w, [dst]) - add dst, dst, #1 -9998: sub x0, end, dst // bytes not copied - ret - .previous diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 600d018cf354..0a515cde1c18 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -154,8 +154,6 @@ FEXPORT(__raw_copy_from_user) EXPORT_SYMBOL(__raw_copy_from_user) FEXPORT(__raw_copy_to_user) EXPORT_SYMBOL(__raw_copy_to_user) -FEXPORT(__raw_copy_in_user) -EXPORT_SYMBOL(__raw_copy_in_user) /* * Note: dst & src may be unaligned, len may be 0 * Temps diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 53f015a1b0a7..bbb3bc5a42fd 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -96,14 +96,6 @@ struct compat_statfs { #define COMPAT_OFF_T_MAX 0x7fffffff -static inline void __user *arch_compat_alloc_user_space(long len) -{ - struct pt_regs *regs = (struct pt_regs *) - ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1; - - return (void __user *) (regs->regs[29] - len); -} - struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 783fecce65c8..f8f74f9f5883 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -428,7 +428,6 @@ do { \ extern size_t __raw_copy_from_user(void *__to, const void *__from, size_t __n); extern size_t __raw_copy_to_user(void *__to, const void *__from, size_t __n); -extern size_t __raw_copy_in_user(void *__to, const void *__from, size_t __n); static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) @@ -480,31 +479,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) #define INLINE_COPY_FROM_USER #define INLINE_COPY_TO_USER -static inline unsigned long -raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - register void __user *__cu_to_r __asm__("$4"); - register const void __user *__cu_from_r __asm__("$5"); - register long __cu_len_r __asm__("$6"); - - __cu_to_r = to; - __cu_from_r = from; - __cu_len_r = n; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - __MODULE_JAL(__raw_copy_in_user) - ".set\tnoat\n\t" - __UA_ADDU "\t$1, %1, %2\n\t" - ".set\tat\n\t" - ".set\treorder" - : "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) - : - : "$8", "$9", "$10", "$11", "$12", "$14", "$15", "$24", "$31", - DADDI_SCRATCH, "memory"); - return __cu_len_r; -} - extern __kernel_size_t __bzero(void __user *addr, __kernel_size_t size); /* diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index e19fb98b5d38..277c32296636 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -666,8 +666,6 @@ FEXPORT(__raw_copy_from_user) EXPORT_SYMBOL(__raw_copy_from_user) FEXPORT(__raw_copy_to_user) EXPORT_SYMBOL(__raw_copy_to_user) -FEXPORT(__raw_copy_in_user) -EXPORT_SYMBOL(__raw_copy_in_user) #endif /* Legacy Mode, user <-> user */ __BUILD_COPY_USER LEGACY_MODE USEROP USEROP @@ -703,13 +701,4 @@ EXPORT_SYMBOL(__raw_copy_to_user) __BUILD_COPY_USER EVA_MODE KERNELOP USEROP END(__raw_copy_to_user) -/* - * __copy_in_user (EVA) - */ - -LEAF(__raw_copy_in_user) -EXPORT_SYMBOL(__raw_copy_in_user) -__BUILD_COPY_USER EVA_MODE USEROP USEROP -END(__raw_copy_in_user) - #endif diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index b5d90e82b65d..c04f5a637c39 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -163,12 +163,6 @@ struct compat_shmid64_ds { #define COMPAT_ELF_NGREG 80 typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; -static __inline__ void __user *arch_compat_alloc_user_space(long len) -{ - struct pt_regs *regs = ¤t->thread.regs; - return (void __user *)regs->gr[30]; -} - static inline int __is_compat_task(struct task_struct *t) { return test_tsk_thread_flag(t, TIF_32BIT); diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ed2cd4fb479b..7c13314aae4a 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -215,8 +215,6 @@ unsigned long __must_check raw_copy_to_user(void __user *dst, const void *src, unsigned long len); unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, unsigned long len); -unsigned long __must_check raw_copy_in_user(void __user *dst, const void __user *src, - unsigned long len); #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 4b75388190b4..ea70a0e08321 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -38,14 +38,6 @@ unsigned long raw_copy_from_user(void *dst, const void __user *src, } EXPORT_SYMBOL(raw_copy_from_user); -unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long len) -{ - mtsp(get_user_space(), 1); - mtsp(get_user_space(), 2); - return pa_memcpy((void __force *)dst, (void __force *)src, len); -} - - void * memcpy(void * dst,const void *src, size_t count) { mtsp(get_kernel_space(), 1); @@ -54,7 +46,6 @@ void * memcpy(void * dst,const void *src, size_t count) return dst; } -EXPORT_SYMBOL(raw_copy_in_user); EXPORT_SYMBOL(memcpy); bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index e33dcf134cdd..7afc96fb6524 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -83,22 +83,6 @@ struct compat_statfs { #define COMPAT_OFF_T_MAX 0x7fffffff -static inline void __user *arch_compat_alloc_user_space(long len) -{ - struct pt_regs *regs = current->thread.regs; - unsigned long usp = regs->gpr[1]; - - /* - * We can't access below the stack pointer in the 32bit ABI and - * can access 288 bytes in the 64bit big-endian ABI, - * or 512 bytes with the new ELFv2 little-endian ABI. - */ - if (!is_32bit_task()) - usp -= USER_REDZONE_SIZE; - - return (void __user *) (usp - len); -} - /* * ipc64_perm is actually 32/64bit clean but since the compat layer refers to * it we may as well define it. diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 8d49505b4a43..cdc7ae72529d 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -176,16 +176,6 @@ static inline int is_compat_task(void) return test_thread_flag(TIF_31BIT); } -static inline void __user *arch_compat_alloc_user_space(long len) -{ - unsigned long stack; - - stack = KSTK_ESP(current); - if (is_compat_task()) - stack &= 0x7fffffffUL; - return (void __user *) (stack - len); -} - #endif struct compat_ipc64_perm { diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9ed9aa37e836..ce550d06abc3 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -227,9 +227,6 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s __get_user(x, ptr); \ }) -unsigned long __must_check -raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); - /* * Copy a null terminated string from userspace. */ diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 94ca99bde59d..a596e69d3c47 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -204,69 +204,6 @@ unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long } EXPORT_SYMBOL(raw_copy_to_user); -static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, - unsigned long size) -{ - unsigned long tmp1, tmp2; - - tmp1 = -4096UL; - /* FIXME: copy with reduced length. */ - asm volatile( - " lgr 0,%[spec]\n" - "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" - " jz 2f\n" - "1: algr %0,%3\n" - " slgr %1,%3\n" - " slgr %2,%3\n" - " j 0b\n" - "2:slgr %0,%0\n" - "3: \n" - EX_TABLE(0b,3b) - : "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2) - : [spec] "d" (0x810081UL) - : "cc", "memory", "0"); - return size; -} - -static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, - unsigned long size) -{ - unsigned long tmp1; - - asm volatile( - " sacf 256\n" - " aghi %0,-1\n" - " jo 5f\n" - " bras %3,3f\n" - "0: aghi %0,257\n" - "1: mvc 0(1,%1),0(%2)\n" - " la %1,1(%1)\n" - " la %2,1(%2)\n" - " aghi %0,-1\n" - " jnz 1b\n" - " j 5f\n" - "2: mvc 0(256,%1),0(%2)\n" - " la %1,256(%1)\n" - " la %2,256(%2)\n" - "3: aghi %0,-256\n" - " jnm 2b\n" - "4: ex %0,1b-0b(%3)\n" - "5: slgr %0,%0\n" - "6: sacf 768\n" - EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) - : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) - : : "cc", "memory"); - return size; -} - -unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - if (copy_with_mvcos()) - return copy_in_user_mvcos(to, from, n); - return copy_in_user_mvc(to, from, n); -} -EXPORT_SYMBOL(raw_copy_in_user); - static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { unsigned long tmp1, tmp2; diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 8b63410e830f..bd949fcf9d63 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -116,25 +116,6 @@ struct compat_statfs { #define COMPAT_OFF_T_MAX 0x7fffffff -#ifdef CONFIG_COMPAT -static inline void __user *arch_compat_alloc_user_space(long len) -{ - struct pt_regs *regs = current_thread_info()->kregs; - unsigned long usp = regs->u_regs[UREG_I6]; - - if (test_thread_64bit_stack(usp)) - usp += STACK_BIAS; - - if (test_thread_flag(TIF_32BIT)) - usp &= 0xffffffffUL; - - usp -= len; - usp &= ~0x7UL; - - return (void __user *) usp; -} -#endif - struct compat_ipc64_perm { compat_key_t key; __compat_uid32_t uid; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 093849bfda50..d1cc410d2f64 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -455,7 +455,7 @@ static unsigned long clone_stackframe(unsigned long csp, unsigned long psp) distance = fp - psp; rval = (csp - distance); - if (copy_in_user((void __user *) rval, (void __user *) psp, distance)) + if (raw_copy_in_user((void __user *)rval, (void __user *)psp, distance)) rval = 0; else if (!stack_64bit) { if (put_user(((u32)csp), diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 4276b9e003ca..6cc124a3bb98 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -435,9 +435,9 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs, (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); if (!wsaved) { - err |= copy_in_user((u32 __user *)sf, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); + err |= raw_copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); } else { struct reg_window *rp; @@ -567,9 +567,9 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, err |= put_compat_sigset(&sf->mask, oldset, sizeof(compat_sigset_t)); if (!wsaved) { - err |= copy_in_user((u32 __user *)sf, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); + err |= raw_copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); } else { struct reg_window *rp; diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index cea23cf95600..2a78d2af1265 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -406,10 +406,10 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) err |= copy_to_user(&sf->mask, sigmask_to_save(), sizeof(sigset_t)); if (!wsaved) { - err |= copy_in_user((u64 __user *)sf, - (u64 __user *)(regs->u_regs[UREG_FP] + - STACK_BIAS), - sizeof(struct reg_window)); + err |= raw_copy_in_user((u64 __user *)sf, + (u64 __user *)(regs->u_regs[UREG_FP] + + STACK_BIAS), + sizeof(struct reg_window)); } else { struct reg_window *rp; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 4ae01cdb99de..7516e4199b3c 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -156,19 +156,6 @@ struct compat_shmid64_ds { (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif -static inline void __user *arch_compat_alloc_user_space(long len) -{ - compat_uptr_t sp = task_pt_regs(current)->sp; - - /* - * -128 for the x32 ABI redzone. For IA32, it is not strictly - * necessary, but not harmful. - */ - sp -= 128; - - return (void __user *)round_down(sp - len, 16); -} - static inline bool in_x32_syscall(void) { #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index e7265a552f4f..45697e04d771 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -58,13 +58,6 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size) return copy_user_generic((__force void *)dst, src, size); } -static __always_inline __must_check -unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigned long size) -{ - return copy_user_generic((__force void *)dst, - (__force void *)src, size); -} - extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); -- cgit From ddb13122aa7e988e15283701afb086e0950c405f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 8 Sep 2021 18:10:23 -0700 Subject: nds32/setup: remove unused memblock_region variable in setup_memory() kernel test robot reports unused variable warning: arch/nds32/kernel/setup.c:247:26: warning: Unused variable: region [unusedVariable] struct memblock_region *region; ^ Remove the unused variable. Link: https://lkml.kernel.org/r/20210712125218.28951-1-rppt@kernel.org Signed-off-by: Mike Rapoport Reported-by: kernel test robot Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Cc: Greentime Hu Cc: Nick Hu Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nds32/kernel/setup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index 41725eaf8bac..b3d34d646652 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -244,7 +244,6 @@ static void __init setup_memory(void) unsigned long ram_start_pfn; unsigned long free_ram_start_pfn; phys_addr_t memory_start, memory_end; - struct memblock_region *region; memory_end = memory_start = 0; -- cgit From 3da6379a6d865ef4faa369eebe2019dac75cda4b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 5 Sep 2021 11:50:56 +0200 Subject: parisc: Add missing FORCE prerequisite in Makefile Signed-off-by: Helge Deller --- arch/parisc/boot/compressed/Makefile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index dff453687530..9fe54878167d 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -26,7 +26,7 @@ endif OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o LDFLAGS_vmlinux := -X -e startup --as-needed -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) FORCE $(call if_changed,ld) sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p' @@ -34,7 +34,7 @@ sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\ quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ -$(obj)/sizes.h: vmlinux +$(obj)/sizes.h: vmlinux FORCE $(call if_changed,sizes) AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER @@ -70,19 +70,19 @@ suffix-$(CONFIG_KERNEL_LZMA) := lzma suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_XZ) := xz -$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE $(call if_changed,gzip) -$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE $(call if_changed,bzip2) -$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE $(call if_changed,lz4) -$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzma) -$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE $(call if_changed,lzo) -$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE $(call if_changed,ld) -- cgit From 1260dea6d2eb75706c978da828a36f0def590d3a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 4 Sep 2021 23:49:26 +0200 Subject: parisc: Drop strnlen_user() in favour of generic version As suggested by Arnd Bergmann, drop the parisc version of strnlen_user() and switch to the generic version. Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/uaccess.h | 3 +-- arch/parisc/kernel/parisc_ksyms.c | 1 - arch/parisc/lib/lusercopy.S | 34 ---------------------------------- 4 files changed, 1 insertion(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3001a7d8fc76..86e879608e2a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,7 +10,6 @@ config PARISC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_HAS_STRNLEN_USER select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 7c13314aae4a..cfb4ca7e39fe 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -201,13 +201,12 @@ struct exception_table_entry { extern long strncpy_from_user(char *, const char __user *, long); extern unsigned lclear_user(void __user *, unsigned long); -extern long lstrnlen_user(const char __user *, long); +extern __must_check long strnlen_user(const char __user *src, long n); /* * Complex access routines -- macros */ #define user_addr_max() (~0UL) -#define strnlen_user lstrnlen_user #define clear_user lclear_user #define __clear_user lclear_user diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index e8a6a751dfd8..00297e8e1c88 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -32,7 +32,6 @@ EXPORT_SYMBOL(__xchg64); #include EXPORT_SYMBOL(lclear_user); -EXPORT_SYMBOL(lstrnlen_user); #ifndef CONFIG_64BIT /* Needed so insmod can set dp value */ diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S index 36d6a8638ead..0aad5ce89f4d 100644 --- a/arch/parisc/lib/lusercopy.S +++ b/arch/parisc/lib/lusercopy.S @@ -67,40 +67,6 @@ $lclu_done: ENDPROC_CFI(lclear_user) - /* - * long lstrnlen_user(char *s, long n) - * - * Returns 0 if exception before zero byte or reaching N, - * N+1 if N would be exceeded, - * else strlen + 1 (i.e. includes zero byte). - */ - -ENTRY_CFI(lstrnlen_user) - comib,= 0,%r25,$lslen_nzero - copy %r26,%r24 - get_sr -1: ldbs,ma 1(%sr1,%r26),%r1 -$lslen_loop: - comib,=,n 0,%r1,$lslen_done - addib,<> -1,%r25,$lslen_loop -2: ldbs,ma 1(%sr1,%r26),%r1 -$lslen_done: - bv %r0(%r2) - sub %r26,%r24,%r28 - -$lslen_nzero: - b $lslen_done - ldo 1(%r26),%r26 /* special case for N == 0 */ - -3: b $lslen_done - copy %r24,%r26 /* reset r26 so 0 is returned on fault */ - - ASM_EXCEPTIONTABLE_ENTRY(1b,3b) - ASM_EXCEPTIONTABLE_ENTRY(2b,3b) - -ENDPROC_CFI(lstrnlen_user) - - /* * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) * -- cgit From ea4b3fca18adc81e6a965b2ad0668f38d5d3485b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 6 Sep 2021 22:45:16 +0200 Subject: parisc: Drop useless debug info and comments from signal.c Signed-off-by: Helge Deller --- arch/parisc/kernel/signal.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index db1a47cf424d..77db707ce391 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -293,16 +293,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); -#if DEBUG_SIG - /* Assert that we're flushing in the correct space... */ - { - unsigned long sid; - asm ("mfsp %%sr3,%0" : "=r" (sid)); - DBG(1,"setup_rt_frame: Flushing 64 bytes at space %#x offset %p\n", - sid, frame->tramp); - } -#endif - start = (unsigned long) &frame->tramp[0]; end = (unsigned long) &frame->tramp[TRAMP_SIZE]; flush_user_dcache_range_asm(start, end); @@ -501,7 +491,6 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) DBG(1,"ERESTARTNOHAND: returning -EINTR\n"); regs->gr[28] = -EINTR; break; - case -ERESTARTSYS: if (!(ka->sa.sa_flags & SA_RESTART)) { DBG(1,"ERESTARTSYS: putting -EINTR\n"); @@ -569,10 +558,6 @@ insert_restart_trampoline(struct pt_regs *regs) } /* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - * * We need to be able to restore the syscall arguments (r21-r26) to * restart syscalls. Thus, the syscall path should save them in the * pt_regs structure (it's okay to do so since they are caller-save -- cgit From 3e4a1aff2a97cb4fd7f0268e4b69e8c9d3641277 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 5 Sep 2021 11:53:32 +0200 Subject: parisc: Check user signal stack trampoline is inside TASK_SIZE Add some additional checks to ensure the signal stack is inside userspace bounds. Signed-off-by: Helge Deller --- arch/parisc/kernel/signal.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 77db707ce391..46b1050640b8 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -237,18 +237,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif usp = (regs->gr[30] & ~(0x01UL)); + sigframe_size = PARISC_RT_SIGFRAME_SIZE; #ifdef CONFIG_64BIT if (is_compat_task()) { /* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */ usp = (compat_uint_t)usp; + sigframe_size = PARISC_RT_SIGFRAME_SIZE32; } #endif - /*FIXME: frame_size parameter is unused, remove it. */ - frame = get_sigframe(&ksig->ka, usp, sizeof(*frame)); + frame = get_sigframe(&ksig->ka, usp, sigframe_size); DBG(1,"SETUP_RT_FRAME: START\n"); DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info); + start = (unsigned long) frame; + if (start >= user_addr_max() - sigframe_size) + return -EFAULT; #ifdef CONFIG_64BIT @@ -343,11 +347,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, /* The syscall return path will create IAOQ values from r31. */ - sigframe_size = PARISC_RT_SIGFRAME_SIZE; -#ifdef CONFIG_64BIT - if (is_compat_task()) - sigframe_size = PARISC_RT_SIGFRAME_SIZE32; -#endif if (in_syscall) { regs->gr[31] = haddr; #ifdef CONFIG_64BIT @@ -518,6 +517,10 @@ insert_restart_trampoline(struct pt_regs *regs) unsigned long end = (unsigned long) &usp[5]; long err = 0; + /* check that we don't exceed the stack */ + if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int)) + return; + /* Setup a trampoline to restart the syscall * with __NR_restart_syscall * -- cgit From e4f2006f1287e7ea17660490569cff323772dac4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 7 Sep 2021 05:03:29 +0200 Subject: parisc: Reduce sigreturn trampoline to 3 instructions We can move the INSN_LDI_R20 instruction into the branch delay slot. Signed-off-by: Helge Deller --- arch/parisc/include/asm/rt_sigframe.h | 2 +- arch/parisc/kernel/signal.c | 13 ++++++------- arch/parisc/kernel/signal32.h | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h index 2b3010ade00e..4b9e3d707571 100644 --- a/arch/parisc/include/asm/rt_sigframe.h +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -2,7 +2,7 @@ #ifndef _ASM_PARISC_RT_SIGFRAME_H #define _ASM_PARISC_RT_SIGFRAME_H -#define SIGRETURN_TRAMP 4 +#define SIGRETURN_TRAMP 3 #define SIGRESTARTBLOCK_TRAMP 5 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 46b1050640b8..bbfe23c40c01 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -288,22 +288,21 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, already in userspace. The first words of tramp are used to save the previous sigrestartblock trampoline that might be on the stack. We start the sigreturn trampoline at - SIGRESTARTBLOCK_TRAMP+X. */ + SIGRESTARTBLOCK_TRAMP. */ err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0, &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]); - err |= __put_user(INSN_LDI_R20, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); err |= __put_user(INSN_BLE_SR2_R0, + &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); + err |= __put_user(INSN_LDI_R20, &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); - err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); - start = (unsigned long) &frame->tramp[0]; - end = (unsigned long) &frame->tramp[TRAMP_SIZE]; + start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]; + end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]; flush_user_dcache_range_asm(start, end); flush_user_icache_range_asm(start, end); /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP - * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP + * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP */ rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP]; diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index f166250f2d06..a5bdbb5678b7 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -36,7 +36,7 @@ struct compat_regfile { compat_int_t rf_sar; }; -#define COMPAT_SIGRETURN_TRAMP 4 +#define COMPAT_SIGRETURN_TRAMP 3 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \ COMPAT_SIGRESTARTBLOCK_TRAMP) -- cgit From d97180ad68bdb7ee10f327205a649bc2f558741d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 8 Sep 2021 23:27:00 +0200 Subject: parisc: Mark sched_clock unstable only if clocks are not syncronized We check at runtime if the cr16 clocks are stable across CPUs. Only mark the sched_clock unstable by calling clear_sched_clock_stable() if we know that we run on a system which isn't syncronized across CPUs. Signed-off-by: Helge Deller --- arch/parisc/kernel/setup.c | 2 -- arch/parisc/kernel/time.c | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 3fb86ee507dd..cceb09855e03 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -150,8 +150,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_PA11 dma_ops_init(); #endif - - clear_sched_clock_stable(); } /* diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 08e4d480abe1..9fb1e794831b 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -265,6 +265,9 @@ static int __init init_cr16_clocksource(void) (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) continue; + /* mark sched_clock unstable */ + clear_sched_clock_stable(); + clocksource_cr16.name = "cr16_unstable"; clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; clocksource_cr16.rating = 0; @@ -272,10 +275,6 @@ static int __init init_cr16_clocksource(void) } } - /* XXX: We may want to mark sched_clock stable here if cr16 clocks are - * in sync: - * (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */ - /* register at clocksource framework */ clocksource_register_hz(&clocksource_cr16, 100 * PAGE0->mem_10msec); -- cgit From 88053ec8cb1b91df566353cd3116470193797e00 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 26 Aug 2021 18:56:13 +0200 Subject: arm64: mm: limit linear region to 51 bits for KVM in nVHE mode KVM in nVHE mode divides up its VA space into two equal halves, and picks the half that does not conflict with the HYP ID map to map its linear region. This worked fine when the kernel's linear map itself was guaranteed to cover precisely as many bits of VA space, but this was changed by commit f4693c2716b35d08 ("arm64: mm: extend linear region for 52-bit VA configurations"). The result is that, depending on the placement of the ID map, kernel-VA to hyp-VA translations may produce addresses that either conflict with other HYP mappings (including the ID map itself) or generate addresses outside of the 52-bit addressable range, neither of which is likely to lead to anything useful. Given that 52-bit capable cores are guaranteed to implement VHE, this only affects configurations such as pKVM where we opt into non-VHE mode even if the hardware is VHE capable. So just for these configurations, let's limit the kernel linear map to 51 bits and work around the problem. Fixes: f4693c2716b3 ("arm64: mm: extend linear region for 52-bit VA configurations") Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210826165613.60774-1-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8490ed2917ff..2b28d77e29a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -282,7 +282,21 @@ static void __init fdt_enforce_memory_region(void) void __init arm64_memblock_init(void) { - const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); + s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual); + + /* + * Corner case: 52-bit VA capable systems running KVM in nVHE mode may + * be limited in their ability to support a linear map that exceeds 51 + * bits of VA space, depending on the placement of the ID map. Given + * that the placement of the ID map may be randomized, let's simply + * limit the kernel's linear map to 51 bits as well if we detect this + * configuration. + */ + if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 && + is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { + pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n"); + linear_region_size = min_t(u64, linear_region_size, BIT(51)); + } /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); -- cgit From 671028728083e856e9919221b109e3b2cd2ccc49 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 9 Sep 2021 12:47:00 +0200 Subject: parisc: Implement __get/put_kernel_nofault() Remove CONFIG_SET_FS from parisc, so we need to add __get_kernel_nofault() and __put_kernel_nofault(), define HAVE_GET_KERNEL_NOFAULT and remove set_fs(), get_fs(), load_sr2(), thread_info->addr_limit, KERNEL_DS and USER_DS. The nice side-effect of this patch is that we now can directly access userspace via sr3 without the need to use a temporary sr2 which is either copied from sr3 or set to zero (for kernel space). Signed-off-by: Helge Deller Suggested-by: Arnd Bergmann --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/processor.h | 4 -- arch/parisc/include/asm/thread_info.h | 2 - arch/parisc/include/asm/uaccess.h | 122 +++++++++++++++++----------------- arch/parisc/kernel/asm-offsets.c | 1 - arch/parisc/lib/lusercopy.S | 18 +---- 6 files changed, 62 insertions(+), 86 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 86e879608e2a..4742b6f169b7 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -64,7 +64,6 @@ config PARISC select HAVE_KPROBES_ON_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS - select SET_FS select TRACE_IRQFLAGS_SUPPORT help diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index b5fbcd2c1780..eeb7da064289 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -101,10 +101,6 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data); #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF) -typedef struct { - int seg; -} mm_segment_t; - #define ARCH_MIN_TASKALIGN 8 struct thread_struct { diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 0bd38a972cea..00ad50fef769 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -11,7 +11,6 @@ struct thread_info { struct task_struct *task; /* main task structure */ unsigned long flags; /* thread_info flags (see TIF_*) */ - mm_segment_t addr_limit; /* user-level address space limit */ __u32 cpu; /* current CPU */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ }; @@ -21,7 +20,6 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .cpu = 0, \ - .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ } diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index cfb4ca7e39fe..192ad9e11b25 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -11,14 +11,6 @@ #include #include -#define KERNEL_DS ((mm_segment_t){0}) -#define USER_DS ((mm_segment_t){1}) - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) - -#define get_fs() (current_thread_info()->addr_limit) -#define set_fs(x) (current_thread_info()->addr_limit = (x)) - /* * Note that since kernel addresses are in a separate address space on * parisc, we don't need to do anything for access_ok(). @@ -33,11 +25,11 @@ #define get_user __get_user #if !defined(CONFIG_64BIT) -#define LDD_USER(val, ptr) __get_user_asm64(val, ptr) -#define STD_USER(x, ptr) __put_user_asm64(x, ptr) +#define LDD_USER(sr, val, ptr) __get_user_asm64(sr, val, ptr) +#define STD_USER(sr, x, ptr) __put_user_asm64(sr, x, ptr) #else -#define LDD_USER(val, ptr) __get_user_asm(val, "ldd", ptr) -#define STD_USER(x, ptr) __put_user_asm("std", x, ptr) +#define LDD_USER(sr, val, ptr) __get_user_asm(sr, val, "ldd", ptr) +#define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif /* @@ -67,28 +59,15 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) -/* - * load_sr2() preloads the space register %%sr2 - based on the value of - * get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which - * is 0), or with the current value of %%sr3 to access user space (USER_DS) - * memory. The following __get_user_asm() and __put_user_asm() functions have - * %%sr2 hard-coded to access the requested memory. - */ -#define load_sr2() \ - __asm__(" or,= %0,%%r0,%%r0\n\t" \ - " mfsp %%sr3,%0\n\t" \ - " mtsp %0,%%sr2\n\t" \ - : : "r"(get_fs()) : ) - -#define __get_user_internal(val, ptr) \ +#define __get_user_internal(sr, val, ptr) \ ({ \ register long __gu_err __asm__ ("r8") = 0; \ \ switch (sizeof(*(ptr))) { \ - case 1: __get_user_asm(val, "ldb", ptr); break; \ - case 2: __get_user_asm(val, "ldh", ptr); break; \ - case 4: __get_user_asm(val, "ldw", ptr); break; \ - case 8: LDD_USER(val, ptr); break; \ + case 1: __get_user_asm(sr, val, "ldb", ptr); break; \ + case 2: __get_user_asm(sr, val, "ldh", ptr); break; \ + case 4: __get_user_asm(sr, val, "ldw", ptr); break; \ + case 8: LDD_USER(sr, val, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -97,15 +76,14 @@ struct exception_table_entry { #define __get_user(val, ptr) \ ({ \ - load_sr2(); \ - __get_user_internal(val, ptr); \ + __get_user_internal("%%sr3,", val, ptr); \ }) -#define __get_user_asm(val, ldx, ptr) \ +#define __get_user_asm(sr, val, ldx, ptr) \ { \ register long __gu_val; \ \ - __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \ + __asm__("1: " ldx " 0(" sr "%2),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "=r"(__gu_val), "=r"(__gu_err) \ @@ -114,9 +92,22 @@ struct exception_table_entry { (val) = (__force __typeof__(*(ptr))) __gu_val; \ } +#define HAVE_GET_KERNEL_NOFAULT +#define __get_kernel_nofault(dst, src, type, err_label) \ +{ \ + type __z; \ + long __err; \ + __err = __get_user_internal("%%sr0,", __z, (type *)(src)); \ + if (unlikely(__err)) \ + goto err_label; \ + else \ + *(type *)(dst) = __z; \ +} + + #if !defined(CONFIG_64BIT) -#define __get_user_asm64(val, ptr) \ +#define __get_user_asm64(sr, val, ptr) \ { \ union { \ unsigned long long l; \ @@ -124,8 +115,8 @@ struct exception_table_entry { } __gu_tmp; \ \ __asm__(" copy %%r0,%R0\n" \ - "1: ldw 0(%%sr2,%2),%0\n" \ - "2: ldw 4(%%sr2,%2),%R0\n" \ + "1: ldw 0(" sr "%2),%0\n" \ + "2: ldw 4(" sr "%2),%R0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ @@ -138,16 +129,16 @@ struct exception_table_entry { #endif /* !defined(CONFIG_64BIT) */ -#define __put_user_internal(x, ptr) \ +#define __put_user_internal(sr, x, ptr) \ ({ \ register long __pu_err __asm__ ("r8") = 0; \ __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \ \ switch (sizeof(*(ptr))) { \ - case 1: __put_user_asm("stb", __x, ptr); break; \ - case 2: __put_user_asm("sth", __x, ptr); break; \ - case 4: __put_user_asm("stw", __x, ptr); break; \ - case 8: STD_USER(__x, ptr); break; \ + case 1: __put_user_asm(sr, "stb", __x, ptr); break; \ + case 2: __put_user_asm(sr, "sth", __x, ptr); break; \ + case 4: __put_user_asm(sr, "stw", __x, ptr); break; \ + case 8: STD_USER(sr, __x, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -156,10 +147,20 @@ struct exception_table_entry { #define __put_user(x, ptr) \ ({ \ - load_sr2(); \ - __put_user_internal(x, ptr); \ + __put_user_internal("%%sr3,", x, ptr); \ }) +#define __put_kernel_nofault(dst, src, type, err_label) \ +{ \ + type __z = *(type *)(src); \ + long __err; \ + __err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \ + if (unlikely(__err)) \ + goto err_label; \ +} + + + /* * The "__put_user/kernel_asm()" macros tell gcc they read from memory @@ -170,26 +171,26 @@ struct exception_table_entry { * r8 is already listed as err. */ -#define __put_user_asm(stx, x, ptr) \ - __asm__ __volatile__ ( \ - "1: " stx " %2,0(%%sr2,%1)\n" \ - "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__pu_err) \ +#define __put_user_asm(sr, stx, x, ptr) \ + __asm__ __volatile__ ( \ + "1: " stx " %2,0(" sr "%1)\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + : "=r"(__pu_err) \ : "r"(ptr), "r"(x), "0"(__pu_err)) #if !defined(CONFIG_64BIT) -#define __put_user_asm64(__val, ptr) do { \ - __asm__ __volatile__ ( \ - "1: stw %2,0(%%sr2,%1)\n" \ - "2: stw %R2,4(%%sr2,%1)\n" \ - "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(__val), "0"(__pu_err)); \ +#define __put_user_asm64(sr, __val, ptr) do { \ + __asm__ __volatile__ ( \ + "1: stw %2,0(" sr "%1)\n" \ + "2: stw %R2,4(" sr "%1)\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + : "=r"(__pu_err) \ + : "r"(ptr), "r"(__val), "0"(__pu_err)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ @@ -200,12 +201,11 @@ struct exception_table_entry { */ extern long strncpy_from_user(char *, const char __user *, long); -extern unsigned lclear_user(void __user *, unsigned long); +extern __must_check unsigned lclear_user(void __user *, unsigned long); extern __must_check long strnlen_user(const char __user *src, long n); /* * Complex access routines -- macros */ -#define user_addr_max() (~0UL) #define clear_user lclear_user #define __clear_user lclear_user diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 33113ba24054..22924a3f1728 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -230,7 +230,6 @@ int main(void) DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_SEGMENT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); DEFINE(THREAD_SZ, sizeof(struct thread_info)); /* THREAD_SZ_ALGN includes space for a stack frame. */ diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S index 0aad5ce89f4d..b428d29e45fb 100644 --- a/arch/parisc/lib/lusercopy.S +++ b/arch/parisc/lib/lusercopy.S @@ -27,21 +27,6 @@ #include #include - /* - * get_sr gets the appropriate space value into - * sr1 for kernel/user space access, depending - * on the flag stored in the task structure. - */ - - .macro get_sr - mfctl %cr30,%r1 - ldw TI_SEGMENT(%r1),%r22 - mfsp %sr3,%r1 - or,<> %r22,%r0,%r0 - copy %r0,%r1 - mtsp %r1,%sr1 - .endm - /* * unsigned long lclear_user(void *to, unsigned long n) * @@ -51,10 +36,9 @@ ENTRY_CFI(lclear_user) comib,=,n 0,%r25,$lclu_done - get_sr $lclu_loop: addib,<> -1,%r25,$lclu_loop -1: stbs,ma %r0,1(%sr1,%r26) +1: stbs,ma %r0,1(%sr3,%r26) $lclu_done: bv %r0(%r2) -- cgit From 85f58eb1889826b9745737718723a80b639e0fbd Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Fri, 10 Sep 2021 14:48:44 +0800 Subject: arm64: kdump: Skip kmemleak scan reserved memory for kdump Trying to boot with kdump + kmemleak, command will result in a crash: "echo scan > /sys/kernel/debug/kmemleak" crashkernel reserved: 0x0000000007c00000 - 0x0000000027c00000 (512 MB) Kernel command line: BOOT_IMAGE=(hd1,gpt2)/vmlinuz-5.14.0-rc5-next-20210809+ root=/dev/mapper/ao-root ro rd.lvm.lv=ao/root rd.lvm.lv=ao/swap crashkernel=512M Unable to handle kernel paging request at virtual address ffff000007c00000 Mem abort info: ESR = 0x96000007 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x07: level 3 translation fault Data abort info: ISV = 0, ISS = 0x00000007 CM = 0, WnR = 0 swapper pgtable: 64k pages, 48-bit VAs, pgdp=00002024f0d80000 [ffff000007c00000] pgd=1800205ffffd0003, p4d=1800205ffffd0003, pud=1800205ffffd0003, pmd=1800205ffffc0003, pte=0068000007c00f06 Internal error: Oops: 96000007 [#1] SMP pstate: 804000c9 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : scan_block+0x98/0x230 lr : scan_block+0x94/0x230 sp : ffff80008d6cfb70 x29: ffff80008d6cfb70 x28: 0000000000000000 x27: 0000000000000000 x26: 00000000000000c0 x25: 0000000000000001 x24: 0000000000000000 x23: ffffa88a6b18b398 x22: ffff000007c00ff9 x21: ffffa88a6ac7fc40 x20: ffffa88a6af6a830 x19: ffff000007c00000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: ffffffffffffffff x14: ffffffff00000000 x13: ffffffffffffffff x12: 0000000000000020 x11: 0000000000000000 x10: 0000000001080000 x9 : ffffa88a6951c77c x8 : ffffa88a6a893988 x7 : ffff203ff6cfb3c0 x6 : ffffa88a6a52b3c0 x5 : ffff203ff6cfb3c0 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000001 x1 : ffff20226cb56a40 x0 : 0000000000000000 Call trace: scan_block+0x98/0x230 scan_gray_list+0x120/0x270 kmemleak_scan+0x3a0/0x648 kmemleak_write+0x3ac/0x4c8 full_proxy_write+0x6c/0xa0 vfs_write+0xc8/0x2b8 ksys_write+0x70/0xf8 __arm64_sys_write+0x24/0x30 invoke_syscall+0x4c/0x110 el0_svc_common+0x9c/0x190 do_el0_svc+0x30/0x98 el0_svc+0x28/0xd8 el0t_64_sync_handler+0x90/0xb8 el0t_64_sync+0x180/0x184 The reserved memory for kdump will be looked up by kmemleak, this area will be set invalid when kdump service is bring up. That will result in crash when kmemleak scan this area. Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Signed-off-by: Chen Wandun Reviewed-by: Kefeng Wang Reviewed-by: Mike Rapoport Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210910064844.3827813-1-chenwandun@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2b28d77e29a0..5a16f5715716 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -115,6 +116,11 @@ static void __init reserve_crashkernel(void) pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", crash_base, crash_base + crash_size, crash_size >> 20); + /* + * The crashkernel memory will be removed from the kernel linear + * map. Inform kmemleak so that it won't try to access it. + */ + kmemleak_ignore_phys(crash_base); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; } -- cgit From 3a87ff891290e1b9ef3f03396c22fd3b2f6eb955 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Thu, 12 Aug 2021 10:10:26 +0200 Subject: riscv: defconfig: enable BLK_DEV_NVME NVMe is a non-volatile storage media attached via PCIe. As NVMe has much higher throughput than other block devices like SATA it is a must have for RISC-V. Enable CONFIG_BLK_DEV_NVME. The HiFive Unmatched is a board providing M.2 slots for NVMe drives. Enable CONFIG_PCIE_FU740. Signed-off-by: Heinrich Schuchardt Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index bc68231a8fb7..a7d915a5932e 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -39,10 +39,12 @@ CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PCIE_XILINX=y +CONFIG_PCIE_FU740=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_NVME=m CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_VIRTIO=y -- cgit From efe1e08bca9a1fab2b0ad886be4fb5335dcbf29c Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Thu, 12 Aug 2021 10:10:27 +0200 Subject: riscv: defconfig: enable NLS_CODEPAGE_437, NLS_ISO8859_1 The EFI system partition uses the FAT file system. Many distributions add an entry in /etc/fstab for the ESP. We must ensure that mounting does not fail. The default code page for FAT is 437 (cf. CONFIG_FAT_DEFAULT_CODEPAGE). The default IO character set is "iso8859-1" (cf. CONFIG_NLS_ISO8859_1). So let's enable NLS_CODEPAGE_437 and NLS_ISO8859_1 in defconfig. Signed-off-by: Heinrich Schuchardt Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index a7d915a5932e..4ebc80315f01 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -110,6 +110,8 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_9P_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=m CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y -- cgit From d5935537c8256fc63c77d5f4914dfd6e3ef43241 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 12 Aug 2021 19:47:02 +0800 Subject: riscv: Improve stack randomisation on RV64 This enlarges the bits availiable for stack randomisation on RV64 from the default of 8MiB to 1GiB, to match arm64 and x86. Also, update the documentation to reflect our support for stack randomisation. Signed-off-by: Kefeng Wang [Palmer: commit text] Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/elf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index f4b490cd0e5d..f53c40026c7a 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -42,6 +42,9 @@ */ #define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#ifdef CONFIG_64BIT +#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#endif /* * This yields a mask that user programs can use to figure out what * instruction set this CPU supports. This could be done in user space, -- cgit From 399c1ec8467c563ae9db4c19a40a9d5e728b1e72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Jul 2021 23:21:47 +0900 Subject: riscv: move the (z)install rules to arch/riscv/Makefile Currently, the (z)install targets in arch/riscv/Makefile descend into arch/riscv/boot/Makefile to invoke the shell script, but there is no good reason to do so. arch/riscv/Makefile can run the shell script directly. Signed-off-by: Masahiro Yamada Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 7 +++++-- arch/riscv/boot/Makefile | 8 -------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 01906a90c501..0eb4568fbd29 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -132,8 +132,11 @@ $(BOOT_TARGETS): vmlinux Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -zinstall install: - $(Q)$(MAKE) $(build)=$(boot) $@ +install: install-image = Image +zinstall: install-image = Image.gz +install zinstall: + $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \ + $(boot)/$(install-image) System.map "$(INSTALL_PATH)" archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 6bf299f70c27..becd0621071c 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -58,11 +58,3 @@ $(obj)/Image.lzo: $(obj)/Image FORCE $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) - -install: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ - $(obj)/Image System.map "$(INSTALL_PATH)" - -zinstall: - $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ - $(obj)/Image.gz System.map "$(INSTALL_PATH)" -- cgit From cbba17870881cd17bca24673ccb72859431da5bd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 26 Aug 2021 15:19:39 +0200 Subject: riscv: dts: microchip: mpfs-icicle: Fix serial console Currently, nothing is output on the serial console, unless "console=ttyS0,115200n8" or "earlycon" are appended to the kernel command line. Enable automatic console selection using chosen/stdout-path by adding a proper alias, and configure the expected serial rate. While at it, add aliases for the other three serial ports, which are provided on the same micro-USB connector as the first one. Fixes: 0fa6107eca4186ad ("RISC-V: Initial DTS for Microchip ICICLE board") Signed-off-by: Geert Uytterhoeven Reviewed-by: Bin Meng Reviewed-by: Conor Dooley Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts index baea7d204639..b254c60589a1 100644 --- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -16,10 +16,14 @@ aliases { ethernet0 = &emac1; + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + serial3 = &serial3; }; chosen { - stdout-path = &serial0; + stdout-path = "serial0:115200n8"; }; cpus { -- cgit From 54fed35fd3939398be292e4090b0b1c5ff2238b4 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 26 Aug 2021 22:10:29 +0800 Subject: riscv: Enable BUILDTIME_TABLE_SORT Enable BUILDTIME_TABLE_SORT to sort the exception table at build time rather than during boot. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index baf60fc350a4..c1abbc876e5b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -41,6 +41,7 @@ config RISCV select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU + select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS select CLINT_TIMER if !MMU select COMMON_CLK -- cgit From 6f55ab36bef505b449723300a5a445ddc76a94d7 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 26 Aug 2021 22:11:18 +0800 Subject: riscv: Move EXCEPTION_TABLE to RO_DATA segment _ex_table section is read-only, so move it to RO_DATA. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vmlinux-xip.lds.S | 1 - arch/riscv/kernel/vmlinux.lds.S | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index af776555ded9..9c9f35091ef0 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -121,7 +121,6 @@ SECTIONS } BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) - EXCEPTION_TABLE(0x10) .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) { *(.rel.dyn*) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 502d0826ecb1..5104f3a871e3 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -4,6 +4,8 @@ * Copyright (C) 2017 SiFive */ +#define RO_EXCEPTION_TABLE_ALIGN 16 + #ifdef CONFIG_XIP_KERNEL #include "vmlinux-xip.lds.S" #else @@ -112,8 +114,6 @@ SECTIONS *(.srodata*) } - EXCEPTION_TABLE(0x10) - . = ALIGN(SECTION_ALIGN); _data = .; -- cgit