diff options
Diffstat (limited to 'arch/x86/kernel/kprobes/core.c')
| -rw-r--r-- | arch/x86/kernel/kprobes/core.c | 241 |
1 files changed, 129 insertions, 112 deletions
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb8bc82846b9..c1fac3a9fecc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -37,12 +37,15 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/kallsyms.h> +#include <linux/kgdb.h> #include <linux/ftrace.h> #include <linux/kasan.h> -#include <linux/moduleloader.h> #include <linux/objtool.h> #include <linux/vmalloc.h> #include <linux/pgtable.h> +#include <linux/set_memory.h> +#include <linux/cfi.h> +#include <linux/execmem.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -51,7 +54,6 @@ #include <asm/alternative.h> #include <asm/insn.h> #include <asm/debugreg.h> -#include <asm/set_memory.h> #include <asm/ibt.h> #include "common.h" @@ -135,14 +137,13 @@ NOKPROBE_SYMBOL(synthesize_relcall); * Returns non-zero if INSN is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(struct insn *insn, void *addr) +bool can_boost(struct insn *insn, void *addr) { kprobe_opcode_t opcode; insn_byte_t prefix; - int i; if (search_exception_tables((unsigned long)addr)) - return 0; /* Page fault may occur on this address. */ + return false; /* Page fault may occur on this address. */ /* 2nd-byte opcode */ if (insn->opcode.nbytes == 2) @@ -150,15 +151,15 @@ int can_boost(struct insn *insn, void *addr) (unsigned long *)twobyte_is_boostable); if (insn->opcode.nbytes != 1) - return 0; + return false; - for_each_insn_prefix(insn, i, prefix) { + for_each_insn_prefix(insn, prefix) { insn_attr_t attr; attr = inat_get_opcode_attribute(prefix); /* Can't boost Address-size override prefix and CS override prefix */ if (prefix == 0x2e || inat_is_address_size_prefix(attr)) - return 0; + return false; } opcode = insn->opcode.bytes[0]; @@ -167,24 +168,35 @@ int can_boost(struct insn *insn, void *addr) case 0x62: /* bound */ case 0x70 ... 0x7f: /* Conditional jumps */ case 0x9a: /* Call far */ - case 0xc0 ... 0xc1: /* Grp2 */ case 0xcc ... 0xce: /* software exceptions */ - case 0xd0 ... 0xd3: /* Grp2 */ case 0xd6: /* (UD) */ case 0xd8 ... 0xdf: /* ESC */ case 0xe0 ... 0xe3: /* LOOP*, JCXZ */ case 0xe8 ... 0xe9: /* near Call, JMP */ case 0xeb: /* Short JMP */ case 0xf0 ... 0xf4: /* LOCK/REP, HLT */ + /* ... are not boostable */ + return false; + case 0xc0 ... 0xc1: /* Grp2 */ + case 0xd0 ... 0xd3: /* Grp2 */ + /* + * AMD uses nnn == 110 as SHL/SAL, but Intel makes it reserved. + */ + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b110; case 0xf6 ... 0xf7: /* Grp3 */ + /* AMD uses nnn == 001 as TEST, but Intel makes it reserved. */ + return X86_MODRM_REG(insn->modrm.bytes[0]) != 0b001; case 0xfe: /* Grp4 */ - /* ... are not boostable */ - return 0; + /* Only INC and DEC are boostable */ + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 || + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001; case 0xff: /* Grp5 */ - /* Only indirect jmp is boostable */ - return X86_MODRM_REG(insn->modrm.bytes[0]) == 4; + /* Only INC, DEC, and indirect JMP are boostable */ + return X86_MODRM_REG(insn->modrm.bytes[0]) == 0b000 || + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b001 || + X86_MODRM_REG(insn->modrm.bytes[0]) == 0b100; default: - return 1; + return true; } } @@ -250,21 +262,40 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add return __recover_probed_insn(buf, addr); } -/* Check if paddr is at an instruction boundary */ -static int can_probe(unsigned long paddr) +/* Check if insn is INT or UD */ +static inline bool is_exception_insn(struct insn *insn) +{ + /* UD uses 0f escape */ + if (insn->opcode.bytes[0] == 0x0f) { + /* UD0 / UD1 / UD2 */ + return insn->opcode.bytes[1] == 0xff || + insn->opcode.bytes[1] == 0xb9 || + insn->opcode.bytes[1] == 0x0b; + } + + /* INT3 / INT n / INTO / INT1 */ + return insn->opcode.bytes[0] == 0xcc || + insn->opcode.bytes[0] == 0xcd || + insn->opcode.bytes[0] == 0xce || + insn->opcode.bytes[0] == 0xf1; +} + +/* + * Check if paddr is at an instruction boundary and that instruction can + * be probed + */ +static bool can_probe(unsigned long paddr) { unsigned long addr, __addr, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) - return 0; + return false; /* Decode instructions */ addr = paddr - offset; while (addr < paddr) { - int ret; - /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the @@ -275,29 +306,73 @@ static int can_probe(unsigned long paddr) */ __addr = recover_probed_instruction(buf, addr); if (!__addr) - return 0; + return false; - ret = insn_decode_kernel(&insn, (void *)__addr); - if (ret < 0) - return 0; + if (insn_decode_kernel(&insn, (void *)__addr) < 0) + return false; +#ifdef CONFIG_KGDB /* - * Another debugging subsystem might insert this breakpoint. - * In that case, we can't recover it. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) - return 0; + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) + return false; +#endif addr += insn.length; } - return (addr == paddr); + /* Check if paddr is at an instruction boundary */ + if (addr != paddr) + return false; + + __addr = recover_probed_instruction(buf, addr); + if (!__addr) + return false; + + if (insn_decode_kernel(&insn, (void *)__addr) < 0) + return false; + + /* INT and UD are special and should not be kprobed */ + if (is_exception_insn(&insn)) + return false; + + if (IS_ENABLED(CONFIG_CFI)) { + /* + * The compiler generates the following instruction sequence + * for indirect call checks and cfi.c decodes this; + * + * movl -<id>, %r10d ; 6 bytes + * addl -4(%reg), %r10d ; 4 bytes + * je .Ltmp1 ; 2 bytes + * ud2 ; <- regs->ip + * .Ltmp1: + * + * Also, these movl and addl are used for showing expected + * type. So those must not be touched. + */ + if (insn.opcode.value == 0xBA) + offset = 12; + else if (insn.opcode.value == 0x3) + offset = 6; + else + goto out; + + /* This movl/addl is used for decoding CFI. */ + if (is_cfi_trap(addr + offset)) + return false; + } + +out: + return true; } /* If x86 supports IBT (ENDBR) it must be skipped. */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + if (is_endbr((u32 *)addr)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; @@ -405,31 +480,6 @@ static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, return len; } -/* Make page to RO mode when allocate it */ -void *alloc_insn_page(void) -{ - void *page; - - page = module_alloc(PAGE_SIZE); - if (!page) - return NULL; - - set_vm_flush_reset_perms(page); - /* - * First make the page read-only, and only then make it executable to - * prevent it from being W+X in between. - */ - set_memory_ro((unsigned long)page, 1); - - /* - * TODO: Once additional kernel code protection mechanisms are set, ensure - * that the page was not maliciously altered and it is still zeroed. - */ - set_memory_x((unsigned long)page, 1); - - return page; -} - /* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) @@ -467,50 +517,26 @@ static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_emulate_call); -static nokprobe_inline -void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond) +static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) { unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; - if (cond) - ip += p->ainsn.rel32; + ip += p->ainsn.rel32; int3_emulate_jmp(regs, ip); } - -static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) -{ - __kprobe_emulate_jmp(p, regs, true); -} NOKPROBE_SYMBOL(kprobe_emulate_jmp); -static const unsigned long jcc_mask[6] = { - [0] = X86_EFLAGS_OF, - [1] = X86_EFLAGS_CF, - [2] = X86_EFLAGS_ZF, - [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, - [4] = X86_EFLAGS_SF, - [5] = X86_EFLAGS_PF, -}; - static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) { - bool invert = p->ainsn.jcc.type & 1; - bool match; + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; - if (p->ainsn.jcc.type < 0xc) { - match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1]; - } else { - match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ - ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); - if (p->ainsn.jcc.type >= 0xe) - match = match || (regs->flags & X86_EFLAGS_ZF); - } - __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); + int3_emulate_jcc(regs, p->ainsn.jcc.type, ip, p->ainsn.rel32); } NOKPROBE_SYMBOL(kprobe_emulate_jcc); static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) { + unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; bool match; if (p->ainsn.loop.type != 3) { /* LOOP* */ @@ -538,7 +564,9 @@ static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) else if (p->ainsn.loop.type == 1) /* LOOPE */ match = match && (regs->flags & X86_EFLAGS_ZF); - __kprobe_emulate_jmp(p, regs, match); + if (match) + ip += p->ainsn.rel32; + int3_emulate_jmp(regs, ip); } NOKPROBE_SYMBOL(kprobe_emulate_loop); @@ -567,7 +595,8 @@ static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs) { unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; - int3_emulate_call(regs, regs_get_register(regs, offs)); + int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + p->ainsn.size); + int3_emulate_jmp(regs, regs_get_register(regs, offs)); } NOKPROBE_SYMBOL(kprobe_emulate_call_indirect); @@ -628,7 +657,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn) /* 1 byte conditional jump */ p->ainsn.emulate_op = kprobe_emulate_jcc; p->ainsn.jcc.type = opcode & 0xf; - p->ainsn.rel32 = *(char *)insn->immediate.bytes; + p->ainsn.rel32 = insn->immediate.value; break; case 0x0f: opcode = insn->opcode.bytes[1]; @@ -662,17 +691,19 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn) * is determined by the MOD/RM byte. */ opcode = insn->modrm.bytes[0]; - if ((opcode & 0x30) == 0x10) { - if ((opcode & 0x8) == 0x8) - return -EOPNOTSUPP; /* far call */ - /* call absolute, indirect */ + switch (X86_MODRM_REG(opcode)) { + case 0b010: /* FF /2, call near, absolute indirect */ p->ainsn.emulate_op = kprobe_emulate_call_indirect; - } else if ((opcode & 0x30) == 0x20) { - if ((opcode & 0x8) == 0x8) - return -EOPNOTSUPP; /* far jmp */ - /* jmp near absolute indirect */ + break; + case 0b100: /* FF /4, jmp near, absolute indirect */ p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; - } else + break; + case 0b011: /* FF /3, call far, absolute indirect */ + case 0b101: /* FF /5, jmp far, absolute indirect */ + return -EOPNOTSUPP; + } + + if (!p->ainsn.emulate_op) break; if (insn->addr_bytes != sizeof(unsigned long)) @@ -758,7 +789,7 @@ void arch_arm_kprobe(struct kprobe *p) u8 int3 = INT3_INSN_OPCODE; text_poke(p->addr, &int3, 1); - text_poke_sync(); + smp_text_poke_sync_each_cpu(); perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1); } @@ -768,7 +799,7 @@ void arch_disarm_kprobe(struct kprobe *p) perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1); text_poke(p->addr, &p->opcode, 1); - text_poke_sync(); + smp_text_poke_sync_each_cpu(); } void arch_remove_kprobe(struct kprobe *p) @@ -993,20 +1024,6 @@ int kprobe_int3_handler(struct pt_regs *regs) kprobe_post_process(p, regs, kcb); return 1; } - } - - if (*addr != INT3_INSN_OPCODE) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - * Back up over the (now missing) int3 and run - * the original instruction. - */ - regs->ip = (unsigned long)addr; - return 1; } /* else: not a kprobe fault; let the kernel handle it */ return 0; |
