From 93281c4a96572a34504244969b938e035204778d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Nov 2020 17:28:30 +0100 Subject: x86/insn: Add an insn_decode() API Users of the instruction decoder should use this to decode instruction bytes. For that, have insn*() helpers return an int value to denote success/failure. When there's an error fetching the next insn byte and the insn falls short, return -ENODATA to denote that. While at it, make insn_get_opcode() more stricter as to whether what has seen so far is a valid insn and if not. Copy linux/kconfig.h for the tools-version of the decoder so that it can use IS_ENABLED(). Also, cast the INSN_MODE_KERN dummy define value to (enum insn_mode) for tools use of the decoder because perf tool builds with -Werror and errors out with -Werror=sign-compare otherwise. Signed-off-by: Borislav Petkov Acked-by: Masami Hiramatsu Link: https://lkml.kernel.org/r/20210304174237.31945-5-bp@alien8.de --- tools/arch/x86/include/asm/insn.h | 24 +++-- tools/arch/x86/lib/insn.c | 222 +++++++++++++++++++++++++++++--------- 2 files changed, 191 insertions(+), 55 deletions(-) (limited to 'tools/arch') diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 7e1239aa7dd9..2fe19b1a8765 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -132,13 +132,23 @@ struct insn { #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); -extern void insn_get_prefixes(struct insn *insn); -extern void insn_get_opcode(struct insn *insn); -extern void insn_get_modrm(struct insn *insn); -extern void insn_get_sib(struct insn *insn); -extern void insn_get_displacement(struct insn *insn); -extern void insn_get_immediate(struct insn *insn); -extern void insn_get_length(struct insn *insn); +extern int insn_get_prefixes(struct insn *insn); +extern int insn_get_opcode(struct insn *insn); +extern int insn_get_modrm(struct insn *insn); +extern int insn_get_sib(struct insn *insn); +extern int insn_get_displacement(struct insn *insn); +extern int insn_get_immediate(struct insn *insn); +extern int insn_get_length(struct insn *insn); + +enum insn_mode { + INSN_MODE_32, + INSN_MODE_64, + /* Mode is determined by the current kernel build. */ + INSN_MODE_KERN, + INSN_NUM_MODES, +}; + +extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 1174c43b3944..be2b057f91d4 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -11,10 +11,13 @@ #else #include #endif -#include "../include/asm/inat.h" /* __ignore_sync_check__ */ -#include "../include/asm/insn.h" /* __ignore_sync_check__ */ +#include /*__ignore_sync_check__ */ +#include /* __ignore_sync_check__ */ -#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */ +#include +#include + +#include /* __ignore_sync_check__ */ #define leXX_to_cpu(t, r) \ ({ \ @@ -112,8 +115,12 @@ static void insn_get_emulate_prefix(struct insn *insn) * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already set. + * + * * Returns: + * 0: on success + * < 0: on error */ -void insn_get_prefixes(struct insn *insn) +int insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; @@ -121,7 +128,7 @@ void insn_get_prefixes(struct insn *insn) int i, nb; if (prefixes->got) - return; + return 0; insn_get_emulate_prefix(insn); @@ -231,8 +238,10 @@ vex_end: prefixes->got = 1; + return 0; + err_out: - return; + return -ENODATA; } /** @@ -244,16 +253,25 @@ err_out: * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_opcode(struct insn *insn) +int insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; + int pfx_id, ret; insn_byte_t op; - int pfx_id; + if (opcode->got) - return; - if (!insn->prefixes.got) - insn_get_prefixes(insn); + return 0; + + if (!insn->prefixes.got) { + ret = insn_get_prefixes(insn); + if (ret) + return ret; + } /* Get first opcode */ op = get_next(insn_byte_t, insn); @@ -268,9 +286,13 @@ void insn_get_opcode(struct insn *insn) insn->attr = inat_get_avx_attribute(op, m, p); if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || (!inat_accept_vex(insn->attr) && - !inat_is_group(insn->attr))) - insn->attr = 0; /* This instruction is bad */ - goto end; /* VEX has only 1 byte for opcode */ + !inat_is_group(insn->attr))) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } + /* VEX has only 1 byte for opcode */ + goto end; } insn->attr = inat_get_opcode_attribute(op); @@ -281,13 +303,18 @@ void insn_get_opcode(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } - if (inat_must_vex(insn->attr)) - insn->attr = 0; /* This instruction is bad */ + + if (inat_must_vex(insn->attr)) { + /* This instruction is bad */ + insn->attr = 0; + return -EINVAL; + } end: opcode->got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -297,15 +324,25 @@ err_out: * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_modrm(struct insn *insn) +int insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; insn_byte_t pfx_id, mod; + int ret; + if (modrm->got) - return; - if (!insn->opcode.got) - insn_get_opcode(insn); + return 0; + + if (!insn->opcode.got) { + ret = insn_get_opcode(insn); + if (ret) + return ret; + } if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); @@ -314,17 +351,22 @@ void insn_get_modrm(struct insn *insn) pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); - if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) - insn->attr = 0; /* This is bad */ + if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { + /* Bad insn */ + insn->attr = 0; + return -EINVAL; + } } } if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; + modrm->got = 1; + return 0; err_out: - return; + return -ENODATA; } @@ -338,11 +380,16 @@ err_out: int insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; + int ret; if (!insn->x86_64) return 0; - if (!modrm->got) - insn_get_modrm(insn); + + if (!modrm->got) { + ret = insn_get_modrm(insn); + if (ret) + return 0; + } /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. @@ -356,15 +403,25 @@ int insn_rip_relative(struct insn *insn) * * If necessary, first collects the instruction up to and including the * ModRM byte. + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_sib(struct insn *insn) +int insn_get_sib(struct insn *insn) { insn_byte_t modrm; + int ret; if (insn->sib.got) - return; - if (!insn->modrm.got) - insn_get_modrm(insn); + return 0; + + if (!insn->modrm.got) { + ret = insn_get_modrm(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && @@ -375,8 +432,10 @@ void insn_get_sib(struct insn *insn) } insn->sib.got = 1; + return 0; + err_out: - return; + return -ENODATA; } @@ -387,15 +446,25 @@ err_out: * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. + * + * * Returns: + * 0: if decoding succeeded + * < 0: otherwise. */ -void insn_get_displacement(struct insn *insn) +int insn_get_displacement(struct insn *insn) { insn_byte_t mod, rm, base; + int ret; if (insn->displacement.got) - return; - if (!insn->sib.got) - insn_get_sib(insn); + return 0; + + if (!insn->sib.got) { + ret = insn_get_sib(insn); + if (ret) + return ret; + } + if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: @@ -437,9 +506,10 @@ void insn_get_displacement(struct insn *insn) } out: insn->displacement.got = 1; + return 0; err_out: - return; + return -ENODATA; } /* Decode moffset16/32/64. Return 0 if failed */ @@ -538,20 +608,30 @@ err_out: } /** - * insn_get_immediate() - Get the immediates of instruction + * insn_get_immediate() - Get the immediate in an instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be - * get by bit masking with ((1 << (nbytes * 8)) - 1) + * computed by bit masking with ((1 << (nbytes * 8)) - 1) + * + * Returns: + * 0: on success + * < 0: on error */ -void insn_get_immediate(struct insn *insn) +int insn_get_immediate(struct insn *insn) { + int ret; + if (insn->immediate.got) - return; - if (!insn->displacement.got) - insn_get_displacement(insn); + return 0; + + if (!insn->displacement.got) { + ret = insn_get_displacement(insn); + if (ret) + return ret; + } if (inat_has_moffset(insn->attr)) { if (!__get_moffset(insn)) @@ -598,9 +678,10 @@ void insn_get_immediate(struct insn *insn) } done: insn->immediate.got = 1; + return 0; err_out: - return; + return -ENODATA; } /** @@ -609,13 +690,58 @@ err_out: * * If necessary, first collects the instruction up to and including the * immediates bytes. - */ -void insn_get_length(struct insn *insn) + * + * Returns: + * - 0 on success + * - < 0 on error +*/ +int insn_get_length(struct insn *insn) { + int ret; + if (insn->length) - return; - if (!insn->immediate.got) - insn_get_immediate(insn); + return 0; + + if (!insn->immediate.got) { + ret = insn_get_immediate(insn); + if (ret) + return ret; + } + insn->length = (unsigned char)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); + + return 0; +} + +/** + * insn_decode() - Decode an x86 instruction + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @buf_len: length of the insn buffer at @kaddr + * @m: insn mode, see enum insn_mode + * + * Returns: + * 0: if decoding succeeded + * < 0: otherwise. + */ +int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) +{ + int ret; + +#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */ + + if (m == INSN_MODE_KERN) + insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); + else + insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); + + ret = insn_get_length(insn); + if (ret) + return ret; + + if (insn_complete(insn)) + return 0; + + return -EINVAL; } -- cgit