From 08245672cdc6505550d1a5020603b0a8d4a6dcc7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Dec 2022 13:51:49 +0000 Subject: perf/x86/amd: fix potential integer overflow on shift of a int The left shift of int 32 bit integer constant 1 is evaluated using 32 bit arithmetic and then passed as a 64 bit function argument. In the case where i is 32 or more this can lead to an overflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: 471af006a747 ("perf/x86/amd: Constrain Large Increment per Cycle events") Signed-off-by: Colin Ian King Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ian Rogers Acked-by: Kim Phillips Link: https://lore.kernel.org/r/20221202135149.1797974-1-colin.i.king@gmail.com --- arch/x86/events/amd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index d6f3703e4119..4386b10682ce 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1387,7 +1387,7 @@ static int __init amd_core_pmu_init(void) * numbered counter following it. */ for (i = 0; i < x86_pmu.num_counters - 1; i += 2) - even_ctr_mask |= 1 << i; + even_ctr_mask |= BIT_ULL(i); pair_constraint = (struct event_constraint) __EVENT_CONSTRAINT(0, even_ctr_mask, 0, -- cgit From ade8c20847fcc3f4de08b35f730ec04ef29ddbdc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:43:23 +0100 Subject: x86/calldepth: Fix incorrect init section references The addition of callthunks_translate_call_dest means that skip_addr() and patch_dest() can no longer be discarded as part of the __init section freeing: WARNING: modpost: vmlinux.o: section mismatch in reference: callthunks_translate_call_dest.cold (section: .text.unlikely) -> skip_addr (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: callthunks_translate_call_dest.cold (section: .text.unlikely) -> patch_dest (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: is_callthunk.cold (section: .text.unlikely) -> skip_addr (section: .init.text) ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. Fixes: b2e9dfe54be4 ("x86/bpf: Emit call depth accounting if required") Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221215164334.968863-1-arnd@kernel.org --- arch/x86/kernel/callthunks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 7d2c75ec9a8c..ffea98f9064b 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -119,7 +119,7 @@ static bool is_coretext(const struct core_text *ct, void *addr) return within_module_coretext(addr); } -static __init_or_module bool skip_addr(void *dest) +static bool skip_addr(void *dest) { if (dest == error_entry) return true; @@ -181,7 +181,7 @@ static const u8 nops[] = { 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, }; -static __init_or_module void *patch_dest(void *dest, bool direct) +static void *patch_dest(void *dest, bool direct) { unsigned int tsize = SKL_TMPL_SIZE; u8 *pad = dest - tsize; -- cgit From 1993bf97992df2d560287f3c4120eda57426843d Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 19 Dec 2022 23:35:10 +0900 Subject: x86/kprobes: Fix kprobes instruction boudary check with CONFIG_RETHUNK Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping speculative execution after RET instruction, kprobes always failes to check the probed instruction boundary by decoding the function body if the probed address is after such sequence. (Note that some conditional code blocks will be placed after function return, if compiler decides it is not on the hot path.) This is because kprobes expects kgdb puts the INT3 as a software breakpoint and it will replace the original instruction. But these INT3 are not such purpose, it doesn't need to recover the original instruction. To avoid this issue, kprobes checks whether the INT3 is owned by kgdb or not, and if so, stop decoding and make it fail. The other INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be treated as a one-byte instruction. Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/167146051026.1374301.392728975473572291.stgit@devnote3 --- arch/x86/kernel/kprobes/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 66299682b6b7..b36f3c367cb2 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -281,12 +282,15 @@ static int can_probe(unsigned long paddr) if (ret < 0) return 0; +#ifdef CONFIG_KGDB /* - * Another debugging subsystem might insert this breakpoint. - * In that case, we can't recover it. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) return 0; +#endif addr += insn.length; } -- cgit From 63dc6325ff41ee9e570bde705ac34a39c5dbeb44 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 19 Dec 2022 23:35:19 +0900 Subject: x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping speculative execution after function return, kprobe jump optimization always fails on the functions with such INT3 inside the function body. (It already checks the INT3 padding between functions, but not inside the function) To avoid this issue, as same as kprobes, check whether the INT3 comes from kgdb or not, and if so, stop decoding and make it fail. The other INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be treated as a one-byte instruction. Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/167146051929.1374301.7419382929328081706.stgit@devnote3 --- arch/x86/kernel/kprobes/opt.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index e6b8c5362b94..e57e07b0edb6 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -279,19 +280,6 @@ static int insn_is_indirect_jump(struct insn *insn) return ret; } -static bool is_padding_int3(unsigned long addr, unsigned long eaddr) -{ - unsigned char ops; - - for (; addr < eaddr; addr++) { - if (get_kernel_nofault(ops, (void *)addr) < 0 || - ops != INT3_INSN_OPCODE) - return false; - } - - return true; -} - /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -334,15 +322,15 @@ static int can_optimize(unsigned long paddr) ret = insn_decode_kernel(&insn, (void *)recovered_insn); if (ret < 0) return 0; - +#ifdef CONFIG_KGDB /* - * In the case of detecting unknown breakpoint, this could be - * a padding INT3 between functions. Let's check that all the - * rest of the bytes are also INT3. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) - return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; - + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) + return 0; +#endif /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); -- cgit From d00dd2f2645dca04cf399d8fc692f3f69b6dd996 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Nov 2022 12:51:22 +0100 Subject: x86/kexec: Fix double-free of elf header buffer After b3e34a47f989 ("x86/kexec: fix memory leak of elf header buffer"), freeing image->elf_headers in the error path of crash_load_segments() is not needed because kimage_file_post_load_cleanup() will take care of that later. And not clearing it could result in a double-free. Drop the superfluous vfree() call at the error path of crash_load_segments(). Fixes: b3e34a47f989 ("x86/kexec: fix memory leak of elf header buffer") Signed-off-by: Takashi Iwai Signed-off-by: Borislav Petkov (AMD) Acked-by: Baoquan He Acked-by: Vlastimil Babka Cc: Link: https://lore.kernel.org/r/20221122115122.13937-1-tiwai@suse.de --- arch/x86/kernel/crash.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9730c88530fc..305514431f26 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -401,10 +401,8 @@ int crash_load_segments(struct kimage *image) kbuf.buf_align = ELF_CORE_HEADER_ALIGN; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree((void *)image->elf_headers); + if (ret) return ret; - } image->elf_load_addr = kbuf.mem; pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); -- cgit From 55d235361fccef573990dfa5724ab453866e7816 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Jan 2023 10:24:11 -0500 Subject: x86/asm: Fix an assembler warning with current binutils Fix a warning: "found `movsd'; assuming `movsl' was meant" Signed-off-by: Mikulas Patocka Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org --- arch/x86/lib/iomap_copy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index a1f9416bf67a..6ff2f56cb0f7 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -10,6 +10,6 @@ */ SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx - rep movsd + rep movsl RET SYM_FUNC_END(__iowrite32_copy) -- cgit From 72bb8f8cc088730c4d84117a6906f458c2fc64bb Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 1 Jan 2023 17:29:04 +0100 Subject: x86/insn: Avoid namespace clash by separating instruction decoder MMIO type from MMIO trace type Both and define various MMIO_ enum constants, whose namespace overlaps. Rename the ones to have a INSN_ prefix, so that the headers can be used from the same source file. Signed-off-by: Jason A. Donenfeld Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230101162910.710293-2-Jason@zx2c4.com --- arch/x86/coco/tdx/tdx.c | 26 +++++++++++++------------- arch/x86/include/asm/insn-eval.h | 18 +++++++++--------- arch/x86/kernel/sev.c | 18 +++++++++--------- arch/x86/lib/insn-eval.c | 20 ++++++++++---------- 4 files changed, 41 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index cfd4c95b9f04..669d9e4f2901 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -386,8 +386,8 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) { unsigned long *reg, val, vaddr; char buffer[MAX_INSN_SIZE]; + enum insn_mmio_type mmio; struct insn insn = {}; - enum mmio_type mmio; int size, extend_size; u8 extend_val = 0; @@ -402,10 +402,10 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EINVAL; mmio = insn_decode_mmio(&insn, &size); - if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) + if (WARN_ON_ONCE(mmio == INSN_MMIO_DECODE_FAILED)) return -EINVAL; - if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { + if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { reg = insn_get_modrm_reg_ptr(&insn, regs); if (!reg) return -EINVAL; @@ -426,23 +426,23 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) /* Handle writes first */ switch (mmio) { - case MMIO_WRITE: + case INSN_MMIO_WRITE: memcpy(&val, reg, size); if (!mmio_write(size, ve->gpa, val)) return -EIO; return insn.length; - case MMIO_WRITE_IMM: + case INSN_MMIO_WRITE_IMM: val = insn.immediate.value; if (!mmio_write(size, ve->gpa, val)) return -EIO; return insn.length; - case MMIO_READ: - case MMIO_READ_ZERO_EXTEND: - case MMIO_READ_SIGN_EXTEND: + case INSN_MMIO_READ: + case INSN_MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: /* Reads are handled below */ break; - case MMIO_MOVS: - case MMIO_DECODE_FAILED: + case INSN_MMIO_MOVS: + case INSN_MMIO_DECODE_FAILED: /* * MMIO was accessed with an instruction that could not be * decoded or handled properly. It was likely not using io.h @@ -459,15 +459,15 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) return -EIO; switch (mmio) { - case MMIO_READ: + case INSN_MMIO_READ: /* Zero-extend for 32-bit operation */ extend_size = size == 4 ? sizeof(*reg) : 0; break; - case MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_ZERO_EXTEND: /* Zero extend based on operand size */ extend_size = insn.opnd_bytes; break; - case MMIO_READ_SIGN_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: /* Sign extend based on operand size */ extend_size = insn.opnd_bytes; if (size == 1 && val & BIT(7)) diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index f07faa61c7f3..54368a43abf6 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -32,16 +32,16 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size); -enum mmio_type { - MMIO_DECODE_FAILED, - MMIO_WRITE, - MMIO_WRITE_IMM, - MMIO_READ, - MMIO_READ_ZERO_EXTEND, - MMIO_READ_SIGN_EXTEND, - MMIO_MOVS, +enum insn_mmio_type { + INSN_MMIO_DECODE_FAILED, + INSN_MMIO_WRITE, + INSN_MMIO_WRITE_IMM, + INSN_MMIO_READ, + INSN_MMIO_READ_ZERO_EXTEND, + INSN_MMIO_READ_SIGN_EXTEND, + INSN_MMIO_MOVS, }; -enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes); +enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes); #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index a428c62330d3..679026a640ef 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1536,32 +1536,32 @@ static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct insn *insn = &ctxt->insn; + enum insn_mmio_type mmio; unsigned int bytes = 0; - enum mmio_type mmio; enum es_result ret; u8 sign_byte; long *reg_data; mmio = insn_decode_mmio(insn, &bytes); - if (mmio == MMIO_DECODE_FAILED) + if (mmio == INSN_MMIO_DECODE_FAILED) return ES_DECODE_FAILED; - if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { + if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); if (!reg_data) return ES_DECODE_FAILED; } switch (mmio) { - case MMIO_WRITE: + case INSN_MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); ret = vc_do_mmio(ghcb, ctxt, bytes, false); break; - case MMIO_WRITE_IMM: + case INSN_MMIO_WRITE_IMM: memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); ret = vc_do_mmio(ghcb, ctxt, bytes, false); break; - case MMIO_READ: + case INSN_MMIO_READ: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; @@ -1572,7 +1572,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) memcpy(reg_data, ghcb->shared_buffer, bytes); break; - case MMIO_READ_ZERO_EXTEND: + case INSN_MMIO_READ_ZERO_EXTEND: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; @@ -1581,7 +1581,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) memset(reg_data, 0, insn->opnd_bytes); memcpy(reg_data, ghcb->shared_buffer, bytes); break; - case MMIO_READ_SIGN_EXTEND: + case INSN_MMIO_READ_SIGN_EXTEND: ret = vc_do_mmio(ghcb, ctxt, bytes, true); if (ret) break; @@ -1600,7 +1600,7 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) memset(reg_data, sign_byte, insn->opnd_bytes); memcpy(reg_data, ghcb->shared_buffer, bytes); break; - case MMIO_MOVS: + case INSN_MMIO_MOVS: ret = vc_handle_mmio_movs(ctxt, bytes); break; default: diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 21104c41cba0..558a605929db 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1595,16 +1595,16 @@ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs, * Returns: * * Type of the instruction. Size of the memory operand is stored in - * @bytes. If decode failed, MMIO_DECODE_FAILED returned. + * @bytes. If decode failed, INSN_MMIO_DECODE_FAILED returned. */ -enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) +enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes) { - enum mmio_type type = MMIO_DECODE_FAILED; + enum insn_mmio_type type = INSN_MMIO_DECODE_FAILED; *bytes = 0; if (insn_get_opcode(insn)) - return MMIO_DECODE_FAILED; + return INSN_MMIO_DECODE_FAILED; switch (insn->opcode.bytes[0]) { case 0x88: /* MOV m8,r8 */ @@ -1613,7 +1613,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_WRITE; + type = INSN_MMIO_WRITE; break; case 0xc6: /* MOV m8, imm8 */ @@ -1622,7 +1622,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_WRITE_IMM; + type = INSN_MMIO_WRITE_IMM; break; case 0x8a: /* MOV r8, m8 */ @@ -1631,7 +1631,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_READ; + type = INSN_MMIO_READ; break; case 0xa4: /* MOVS m8, m8 */ @@ -1640,7 +1640,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */ if (!*bytes) *bytes = insn->opnd_bytes; - type = MMIO_MOVS; + type = INSN_MMIO_MOVS; break; case 0x0f: /* Two-byte instruction */ @@ -1651,7 +1651,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xb7: /* MOVZX r32/r64, m16 */ if (!*bytes) *bytes = 2; - type = MMIO_READ_ZERO_EXTEND; + type = INSN_MMIO_READ_ZERO_EXTEND; break; case 0xbe: /* MOVSX r16/r32/r64, m8 */ @@ -1660,7 +1660,7 @@ enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes) case 0xbf: /* MOVSX r32/r64, m16 */ if (!*bytes) *bytes = 2; - type = MMIO_READ_SIGN_EXTEND; + type = INSN_MMIO_READ_SIGN_EXTEND; break; } break; -- cgit From c07311b5509f6035f1dd828db3e90ff4859cf3b9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Dec 2022 06:34:54 -0500 Subject: perf/x86/rapl: Treat Tigerlake like Icelake Since Tigerlake seems to have inherited its cstates and other RAPL power caps from Icelake, assume it also follows Icelake for its RAPL events. Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Zhang Rui Link: https://lore.kernel.org/r/20221228113454.1199118-1-rodrigo.vivi@intel.com --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index a829492bca4c..ae5779ea4417 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -800,6 +800,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), -- cgit From a664ec9158eeddd75121d39c9a0758016097fa96 Mon Sep 17 00:00:00 2001 From: Rodrigo Branco Date: Tue, 3 Jan 2023 14:17:51 -0600 Subject: x86/bugs: Flush IBP in ib_prctl_set() We missed the window between the TIF flag update and the next reschedule. Signed-off-by: Rodrigo Branco Reviewed-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/cpu/bugs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d970ddb0cc65..bca0bd8f4846 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1981,6 +1981,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); task_update_spec_tif(task); + if (task == current) + indirect_branch_prediction_barrier(); break; default: return -ERANGE; -- cgit From f52853a668bfeddd79f319d536a506f68cc2b478 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:58:30 +0800 Subject: perf/x86/rapl: Add support for Intel Meteor Lake Meteor Lake RAPL support is the same as previous Sky Lake. Add Meteor Lake model for RAPL. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230104145831.25498-1-rui.zhang@intel.com --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index ae5779ea4417..589c6885560d 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -809,6 +809,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); -- cgit From 57512b57dcfaf63c52d8ad2fb35321328cde31b0 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:58:31 +0800 Subject: perf/x86/rapl: Add support for Intel Emerald Rapids Emerald Rapids RAPL support is the same as previous Sapphire Rapids. Add Emerald Rapids model for RAPL. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230104145831.25498-2-rui.zhang@intel.com --- arch/x86/events/rapl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 589c6885560d..52e6e7ed4f78 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -806,6 +806,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), -- cgit From 74905e3de8adf0e6b5d7f455dcd32cdec13dfb6c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2022 06:03:03 -0500 Subject: KVM: nSVM: clarify recalc_intercepts() wrt CR8 The mysterious comment "We only want the cr8 intercept bits of L1" dates back to basically the introduction of nested SVM, back when the handling of "less typical" hypervisors was very haphazard. With the development of kvm-unit-tests for interrupt handling, the same code grew another vmcb_clr_intercept for the interrupt window (VINTR) vmexit, this time with a comment that is at least decent. It turns out however that the same comment applies to the CR8 write intercept, which is also a "recheck if an interrupt should be injected" intercept. The CR8 read intercept instead has not been used by KVM for 14 years (commit 649d68643ebf, "KVM: SVM: sync TPR value to V_TPR field in the VMCB"), so do not bother clearing it and let one comment describe both CR8 write and VINTR handling. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index bc9cd7086fa9..add65dd59756 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -138,15 +138,13 @@ void recalc_intercepts(struct vcpu_svm *svm) c->intercepts[i] = h->intercepts[i]; if (g->int_ctl & V_INTR_MASKING_MASK) { - /* We only want the cr8 intercept bits of L1 */ - vmcb_clr_intercept(c, INTERCEPT_CR8_READ); - vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); - /* - * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not - * affect any interrupt we may want to inject; therefore, - * interrupt window vmexits are irrelevant to L0. + * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8 + * does not affect any interrupt we may want to inject; + * therefore, writes to CR8 are irrelevant to L0, as are + * interrupt window vmexits. */ + vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); vmcb_clr_intercept(c, INTERCEPT_VINTR); } -- cgit From 45e966fcca03ecdcccac7cb236e16eea38cc18af Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 22 Oct 2022 04:17:53 -0400 Subject: KVM: x86: Do not return host topology information from KVM_GET_SUPPORTED_CPUID Passing the host topology to the guest is almost certainly wrong and will confuse the scheduler. In addition, several fields of these CPUID leaves vary on each processor; it is simply impossible to return the right values from KVM_GET_SUPPORTED_CPUID in such a way that they can be passed to KVM_SET_CPUID2. The values that will most likely prevent confusion are all zeroes. Userspace will have to override it anyway if it wishes to present a specific topology to the guest. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b14653b61470..596061c1610e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -770,16 +770,22 @@ struct kvm_cpuid_array { int nent; }; +static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) +{ + if (array->nent >= array->maxnent) + return NULL; + + return &array->entries[array->nent++]; +} + static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, u32 function, u32 index) { - struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); - if (array->nent >= array->maxnent) + if (!entry) return NULL; - entry = &array->entries[array->nent++]; - memset(entry, 0, sizeof(*entry)); entry->function = function; entry->index = index; @@ -956,22 +962,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = edx.full; break; } - /* - * Per Intel's SDM, the 0x1f is a superset of 0xb, - * thus they can be handled by common code. - */ case 0x1f: case 0xb: /* - * Populate entries until the level type (ECX[15:8]) of the - * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is - * the starting entry, filled by the primary do_host_cpuid(). + * No topology; a valid topology is indicated by the presence + * of subleaf 1. */ - for (i = 1; entry->ecx & 0xff00; ++i) { - entry = do_host_cpuid(array, function, i); - if (!entry) - goto out; - } + entry->eax = entry->ebx = entry->ecx = 0; break; case 0xd: { u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm(); @@ -1202,6 +1199,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = entry->ecx = entry->edx = 0; break; case 0x8000001e: + /* Do not return host topology information. */ + entry->eax = entry->ebx = entry->ecx = 0; + entry->edx = 0; /* reserved */ break; case 0x8000001F: if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { -- cgit From 23e60258aeafb04e5dd813f03cb0c8ab7b01462a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:48 +0000 Subject: KVM: x86/xen: Fix lockdep warning on "recursive" gpc locking In commit 5ec3289b31 ("KVM: x86/xen: Compatibility fixes for shared runstate area") we declared it safe to obtain two gfn_to_pfn_cache locks at the same time: /* * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else * takes them more than one at a time. */ However, we forgot to tell lockdep. Do so, by setting a subclass on the first lock before taking the second. Fixes: 5ec3289b31 ("KVM: x86/xen: Compatibility fixes for shared runstate area") Suggested-by: Peter Zijlstra Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-1-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2e29bdc2949c..bfa9809721b5 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -304,8 +304,10 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else - * takes them more than one at a time. + * takes them more than one at a time. Set a subclass on the + * gpc1 lock to make lockdep shut up about it. */ + lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); read_lock(&gpc2->lock); if (!kvm_gpc_check(gpc2, user_len2)) { -- cgit From bbe17c625d6843e9cdf14d81fbece1b0f0c3fb2f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:49 +0000 Subject: KVM: x86/xen: Fix potential deadlock in kvm_xen_update_runstate_guest() The kvm_xen_update_runstate_guest() function can be called when the vCPU is being scheduled out, from a preempt notifier. It *opportunistically* updates the runstate area in the guest memory, if the gfn_to_pfn_cache which caches the appropriate address is still valid. If there is *contention* when it attempts to obtain gpc->lock, then locking inside the priority inheritance checks may cause a deadlock. Lockdep reports: [13890.148997] Chain exists of: &gpc->lock --> &p->pi_lock --> &rq->__lock [13890.149002] Possible unsafe locking scenario: [13890.149003] CPU0 CPU1 [13890.149004] ---- ---- [13890.149005] lock(&rq->__lock); [13890.149007] lock(&p->pi_lock); [13890.149009] lock(&rq->__lock); [13890.149011] lock(&gpc->lock); [13890.149013] *** DEADLOCK *** In the general case, if there's contention for a read lock on gpc->lock, that's going to be because something else is either invalidating or revalidating the cache. Either way, we've raced with seeing it in an invalid state, in which case we would have aborted the opportunistic update anyway. So in the 'atomic' case when called from the preempt notifier, just switch to using read_trylock() and avoid the PI handling altogether. Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index bfa9809721b5..651f9c5b873d 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -271,7 +271,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * Attempt to obtain the GPC lock on *both* (if there are two) * gfn_to_pfn caches that cover the region. */ - read_lock_irqsave(&gpc1->lock, flags); + if (atomic) { + local_irq_save(flags); + if (!read_trylock(&gpc1->lock)) { + local_irq_restore(flags); + return; + } + } else { + read_lock_irqsave(&gpc1->lock, flags); + } while (!kvm_gpc_check(gpc1, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); @@ -308,7 +316,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) * gpc1 lock to make lockdep shut up about it. */ lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); - read_lock(&gpc2->lock); + if (atomic) { + if (!read_trylock(&gpc2->lock)) { + read_unlock_irqrestore(&gpc1->lock, flags); + return; + } + } else { + read_lock(&gpc2->lock); + } if (!kvm_gpc_check(gpc2, user_len2)) { read_unlock(&gpc2->lock); -- cgit From 310bc39546a435c83cc27a0eba878afac0d74714 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Jan 2023 18:06:51 +0000 Subject: KVM: x86/xen: Avoid deadlock by adding kvm->arch.xen.xen_lock leaf node lock In commit 14243b387137a ("KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery") the clever version of me left some helpful notes for those who would come after him: /* * For the irqfd workqueue, using the main kvm->lock mutex is * fine since this function is invoked from kvm_set_irq() with * no other lock held, no srcu. In future if it will be called * directly from a vCPU thread (e.g. on hypercall for an IPI) * then it may need to switch to using a leaf-node mutex for * serializing the shared_info mapping. */ mutex_lock(&kvm->lock); In commit 2fd6df2f2b47 ("KVM: x86/xen: intercept EVTCHNOP_send from guests") the other version of me ran straight past that comment without reading it, and introduced a potential deadlock by taking vcpu->mutex and kvm->lock in the wrong order. Solve this as originally suggested, by adding a leaf-node lock in the Xen state rather than using kvm->lock for it. Fixes: 2fd6df2f2b47 ("KVM: x86/xen: intercept EVTCHNOP_send from guests") Signed-off-by: David Woodhouse Message-Id: <20230111180651.14394-4-dwmw2@infradead.org> [Rebase, add docs. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/xen.c | 67 ++++++++++++++++++----------------------- 2 files changed, 31 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f35f1ff4427b..6aaae18f1854 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1111,6 +1111,7 @@ struct msr_bitmap_range { /* Xen emulation context */ struct kvm_xen { + struct mutex xen_lock; u32 xen_version; bool long_mode; bool runstate_update_flag; diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 651f9c5b873d..8fd41f5deae3 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -607,26 +607,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { r = -EINVAL; } else { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.long_mode = !!data->u.long_mode; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); break; case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.upcall_vector = data->u.vector; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; @@ -636,9 +636,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.xen_version = data->u.xen_version; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; @@ -647,9 +647,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = -EOPNOTSUPP; break; } - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; @@ -664,7 +664,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: @@ -703,7 +703,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) break; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return r; } @@ -711,7 +711,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int idx, r = -ENOENT; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.xen.xen_lock); idx = srcu_read_lock(&vcpu->kvm->srcu); switch (data->type) { @@ -939,7 +939,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) } srcu_read_unlock(&vcpu->kvm->srcu, idx); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } @@ -947,7 +947,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int r = -ENOENT; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: @@ -1030,7 +1030,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; } - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } @@ -1123,7 +1123,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); if (xhc->msr && !kvm->arch.xen_hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); @@ -1132,7 +1132,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return 0; } @@ -1675,15 +1675,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) mm_borrowed = true; } - /* - * For the irqfd workqueue, using the main kvm->lock mutex is - * fine since this function is invoked from kvm_set_irq() with - * no other lock held, no srcu. In future if it will be called - * directly from a vCPU thread (e.g. on hypercall for an IPI) - * then it may need to switch to using a leaf-node mutex for - * serializing the shared_info mapping. - */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); /* * It is theoretically possible for the page to be unmapped @@ -1712,7 +1704,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); } while(!rc); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (mm_borrowed) kthread_unuse_mm(kvm->mm); @@ -1828,7 +1820,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, int ret; /* Protect writes to evtchnfd as well as the idr lookup. */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); ret = -ENOENT; @@ -1859,7 +1851,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, } ret = 0; out_unlock: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return ret; } @@ -1922,10 +1914,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; } - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, GFP_KERNEL); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (ret >= 0) return 0; @@ -1943,9 +1935,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) { struct evtchnfd *evtchnfd; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); if (!evtchnfd) return -ENOENT; @@ -1963,7 +1955,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) int i; int n = 0; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.xen.xen_lock); /* * Because synchronize_srcu() cannot be called inside the @@ -1975,7 +1967,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); if (!all_evtchnfds) { - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); return -ENOMEM; } @@ -1984,7 +1976,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm) all_evtchnfds[n++] = evtchnfd; idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.xen.xen_lock); synchronize_srcu(&kvm->srcu); @@ -2086,6 +2078,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_init_vm(struct kvm *kvm) { + mutex_init(&kvm->arch.xen.xen_lock); idr_init(&kvm->arch.xen.evtchn_ports); kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); } -- cgit