From d955189ae42796621fb439e5e778ccaeebc2a1e7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 30 Aug 2017 14:12:38 +1000 Subject: powerpc: Handle opposite-endian processes in emulation code This adds code to the load and store emulation code to byte-swap the data appropriately when the process being emulated is set to the opposite endianness to that of the kernel. This also enables the emulation for the multiple-register loads and stores (lmw, stmw, lswi, stswi, lswx, stswx) to work for little-endian. In little-endian mode, the partial word at the end of a transfer for lsw*/stsw* (when the byte count is not a multiple of 4) is loaded/stored at the least-significant end of the register. Additionally, this fixes a bug in the previous code in that it could call read_mem/write_mem with a byte count that was not 1, 2, 4 or 8. Note that this only works correctly on processors with "true" little-endian mode, such as IBM POWER processors from POWER6 on, not the so-called "PowerPC" little-endian mode that uses address swizzling as implemented on the old 32-bit 603, 604, 740/750, 74xx CPUs. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 7 +- arch/powerpc/lib/sstep.c | 184 +++++++++++++++++++++++++++------------ 2 files changed, 131 insertions(+), 60 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 793639a5aa5e..958c2c55bcfe 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -153,7 +153,8 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); extern int emulate_step(struct pt_regs *regs, unsigned int instr); extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem); -extern void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem); + const void *mem, bool cross_endian); +extern void emulate_vsx_store(struct instruction_op *op, + const union vsx_reg *reg, void *mem, + bool cross_endian); extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 04ffab970aec..5d8284938898 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -217,6 +217,33 @@ static nokprobe_inline unsigned long byterev_8(unsigned long x) } #endif +static nokprobe_inline void do_byte_reverse(void *ptr, int nb) +{ + switch (nb) { + case 2: + *(u16 *)ptr = byterev_2(*(u16 *)ptr); + break; + case 4: + *(u32 *)ptr = byterev_4(*(u32 *)ptr); + break; +#ifdef __powerpc64__ + case 8: + *(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr); + break; + case 16: { + unsigned long *up = (unsigned long *)ptr; + unsigned long tmp; + tmp = byterev_8(up[0]); + up[0] = byterev_8(up[1]); + up[1] = tmp; + break; + } +#endif + default: + WARN_ON_ONCE(1); + } +} + static nokprobe_inline int read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs) @@ -430,7 +457,8 @@ NOKPROBE_SYMBOL(write_mem); * These access either the real FP register or the image in the * thread_struct, depending on regs->msr & MSR_FP. */ -static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) +static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs, + bool cross_endian) { int err; union { @@ -445,6 +473,11 @@ static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) err = copy_mem_in(u.b, ea, nb, regs); if (err) return err; + if (unlikely(cross_endian)) { + do_byte_reverse(u.b, min(nb, 8)); + if (nb == 16) + do_byte_reverse(&u.b[8], 8); + } preempt_disable(); if (nb == 4) conv_sp_to_dp(&u.f, &u.d[0]); @@ -465,7 +498,8 @@ static int do_fp_load(int rn, unsigned long ea, int nb, struct pt_regs *regs) } NOKPROBE_SYMBOL(do_fp_load); -static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs) +static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs, + bool cross_endian) { union { float f; @@ -491,6 +525,11 @@ static int do_fp_store(int rn, unsigned long ea, int nb, struct pt_regs *regs) u.l[1] = current->thread.TS_FPR(rn); } preempt_enable(); + if (unlikely(cross_endian)) { + do_byte_reverse(u.b, min(nb, 8)); + if (nb == 16) + do_byte_reverse(&u.b[8], 8); + } return copy_mem_out(u.b, ea, nb, regs); } NOKPROBE_SYMBOL(do_fp_store); @@ -499,7 +538,8 @@ NOKPROBE_SYMBOL(do_fp_store); #ifdef CONFIG_ALTIVEC /* For Altivec/VMX, no need to worry about alignment */ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, - int size, struct pt_regs *regs) + int size, struct pt_regs *regs, + bool cross_endian) { int err; union { @@ -514,7 +554,8 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs); if (err) return err; - + if (unlikely(cross_endian)) + do_byte_reverse(&u.b[ea & 0xf], size); preempt_disable(); if (regs->msr & MSR_VEC) put_vr(rn, &u.v); @@ -525,7 +566,8 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, } static nokprobe_inline int do_vec_store(int rn, unsigned long ea, - int size, struct pt_regs *regs) + int size, struct pt_regs *regs, + bool cross_endian) { union { __vector128 v; @@ -543,49 +585,60 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, else u.v = current->thread.vr_state.vr[rn]; preempt_enable(); + if (unlikely(cross_endian)) + do_byte_reverse(&u.b[ea & 0xf], size); return copy_mem_out(&u.b[ea & 0xf], ea, size, regs); } #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea, - int reg) + int reg, bool cross_endian) { int err; if (!address_ok(regs, ea, 16)) return -EFAULT; /* if aligned, should be atomic */ - if ((ea & 0xf) == 0) - return do_lq(ea, ®s->gpr[reg]); - - err = read_mem(®s->gpr[reg + IS_LE], ea, 8, regs); - if (!err) - err = read_mem(®s->gpr[reg + IS_BE], ea + 8, 8, regs); + if ((ea & 0xf) == 0) { + err = do_lq(ea, ®s->gpr[reg]); + } else { + err = read_mem(®s->gpr[reg + IS_LE], ea, 8, regs); + if (!err) + err = read_mem(®s->gpr[reg + IS_BE], ea + 8, 8, regs); + } + if (!err && unlikely(cross_endian)) + do_byte_reverse(®s->gpr[reg], 16); return err; } static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, - int reg) + int reg, bool cross_endian) { int err; + unsigned long vals[2]; if (!address_ok(regs, ea, 16)) return -EFAULT; + vals[0] = regs->gpr[reg]; + vals[1] = regs->gpr[reg + 1]; + if (unlikely(cross_endian)) + do_byte_reverse(vals, 16); + /* if aligned, should be atomic */ if ((ea & 0xf) == 0) - return do_stq(ea, regs->gpr[reg], regs->gpr[reg + 1]); + return do_stq(ea, vals[0], vals[1]); - err = write_mem(regs->gpr[reg + IS_LE], ea, 8, regs); + err = write_mem(vals[IS_LE], ea, 8, regs); if (!err) - err = write_mem(regs->gpr[reg + IS_BE], ea + 8, 8, regs); + err = write_mem(vals[IS_BE], ea + 8, 8, regs); return err; } #endif /* __powerpc64 */ #ifdef CONFIG_VSX void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem) + const void *mem, bool rev) { int size, read_size; int i, j; @@ -602,19 +655,18 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, if (size == 0) break; memcpy(reg, mem, size); - if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) { - /* reverse 16 bytes */ - unsigned long tmp; - tmp = byterev_8(reg->d[0]); - reg->d[0] = byterev_8(reg->d[1]); - reg->d[1] = tmp; - } + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) + do_byte_reverse(reg, 16); break; case 8: /* scalar loads, lxvd2x, lxvdsx */ read_size = (size >= 8) ? 8 : size; i = IS_LE ? 8 : 8 - read_size; memcpy(®->b[i], mem, read_size); + if (rev) + do_byte_reverse(®->b[i], 8); if (size < 8) { if (op->type & SIGNEXT) { /* size == 4 is the only case here */ @@ -626,9 +678,10 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, preempt_enable(); } } else { - if (size == 16) - reg->d[IS_BE] = *(unsigned long *)(mem + 8); - else if (op->vsx_flags & VSX_SPLAT) + if (size == 16) { + unsigned long v = *(unsigned long *)(mem + 8); + reg->d[IS_BE] = !rev ? v : byterev_8(v); + } else if (op->vsx_flags & VSX_SPLAT) reg->d[IS_BE] = reg->d[IS_LE]; } break; @@ -637,7 +690,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, wp = mem; for (j = 0; j < size / 4; ++j) { i = IS_LE ? 3 - j : j; - reg->w[i] = *wp++; + reg->w[i] = !rev ? *wp++ : byterev_4(*wp++); } if (op->vsx_flags & VSX_SPLAT) { u32 val = reg->w[IS_LE ? 3 : 0]; @@ -652,7 +705,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, hp = mem; for (j = 0; j < size / 2; ++j) { i = IS_LE ? 7 - j : j; - reg->h[i] = *hp++; + reg->h[i] = !rev ? *hp++ : byterev_2(*hp++); } break; case 1: @@ -669,7 +722,7 @@ EXPORT_SYMBOL_GPL(emulate_vsx_load); NOKPROBE_SYMBOL(emulate_vsx_load); void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem) + void *mem, bool rev) { int size, write_size; int i, j; @@ -685,7 +738,9 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, /* stxv, stxvx, stxvl, stxvll */ if (size == 0) break; - if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) { + if (IS_LE && (op->vsx_flags & VSX_LDLEFT)) + rev = !rev; + if (rev) { /* reverse 16 bytes */ buf.d[0] = byterev_8(reg->d[1]); buf.d[1] = byterev_8(reg->d[0]); @@ -707,13 +762,18 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, memcpy(mem, ®->b[i], write_size); if (size == 16) memcpy(mem + 8, ®->d[IS_BE], 8); + if (unlikely(rev)) { + do_byte_reverse(mem, write_size); + if (size == 16) + do_byte_reverse(mem + 8, 8); + } break; case 4: /* stxvw4x */ wp = mem; for (j = 0; j < size / 4; ++j) { i = IS_LE ? 3 - j : j; - *wp++ = reg->w[i]; + *wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]); } break; case 2: @@ -721,7 +781,7 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, hp = mem; for (j = 0; j < size / 2; ++j) { i = IS_LE ? 7 - j : j; - *hp++ = reg->h[i]; + *hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]); } break; case 1: @@ -738,7 +798,8 @@ EXPORT_SYMBOL_GPL(emulate_vsx_store); NOKPROBE_SYMBOL(emulate_vsx_store); static nokprobe_inline int do_vsx_load(struct instruction_op *op, - unsigned long ea, struct pt_regs *regs) + unsigned long ea, struct pt_regs *regs, + bool cross_endian) { int reg = op->reg; u8 mem[16]; @@ -748,7 +809,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - emulate_vsx_load(op, &buf, mem); + emulate_vsx_load(op, &buf, mem, cross_endian); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ @@ -769,7 +830,8 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, } static nokprobe_inline int do_vsx_store(struct instruction_op *op, - unsigned long ea, struct pt_regs *regs) + unsigned long ea, struct pt_regs *regs, + bool cross_endian) { int reg = op->reg; u8 mem[16]; @@ -795,7 +857,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, buf.v = current->thread.vr_state.vr[reg - 32]; } preempt_enable(); - emulate_vsx_store(op, &buf, mem); + emulate_vsx_store(op, &buf, mem, cross_endian); return copy_mem_out(mem, ea, size, regs); } #endif /* CONFIG_VSX */ @@ -2619,6 +2681,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) unsigned int cr; int i, rd, nb; unsigned long ea; + bool cross_endian; r = analyse_instr(&op, regs, instr); if (r < 0) @@ -2631,6 +2694,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) err = 0; size = GETSIZE(op.type); type = op.type & INSTR_TYPE_MASK; + cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); ea = op.ea; if (OP_IS_LOAD_STORE(type) || type == CACHEOP) @@ -2746,7 +2810,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case LOAD: #ifdef __powerpc64__ if (size == 16) { - err = emulate_lq(regs, ea, op.reg); + err = emulate_lq(regs, ea, op.reg, cross_endian); goto ldst_done; } #endif @@ -2754,7 +2818,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) if (!err) { if (op.type & SIGNEXT) do_signext(®s->gpr[op.reg], size); - if (op.type & BYTEREV) + if ((op.type & BYTEREV) == (cross_endian ? 0 : BYTEREV)) do_byterev(®s->gpr[op.reg], size); } goto ldst_done; @@ -2769,14 +2833,14 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) */ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_load(op.reg, ea, size, regs); + err = do_fp_load(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_load(op.reg, ea, size, regs); + err = do_vec_load(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_VSX @@ -2791,23 +2855,26 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) msrbit = MSR_VEC; if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - err = do_vsx_load(&op, ea, regs); + err = do_vsx_load(&op, ea, regs, cross_endian); goto ldst_done; } #endif case LOAD_MULTI: - if (regs->msr & MSR_LE) - return 0; + if (!address_ok(regs, ea, size)) + return -EFAULT; rd = op.reg; for (i = 0; i < size; i += 4) { + unsigned int v32 = 0; + nb = size - i; if (nb > 4) nb = 4; - err = read_mem(®s->gpr[rd], ea, nb, regs); + err = copy_mem_in((u8 *) &v32, ea, nb, regs); if (err) return 0; - if (nb < 4) /* left-justify last bytes */ - regs->gpr[rd] <<= 32 - 8 * nb; + if (unlikely(cross_endian)) + v32 = byterev_4(v32); + regs->gpr[rd] = v32; ea += 4; ++rd; } @@ -2816,7 +2883,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case STORE: #ifdef __powerpc64__ if (size == 16) { - err = emulate_stq(regs, ea, op.reg); + err = emulate_stq(regs, ea, op.reg, cross_endian); goto ldst_done; } #endif @@ -2827,6 +2894,8 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) err = handle_stack_update(ea, regs); goto ldst_done; } + if (unlikely(cross_endian)) + do_byterev(&op.val, size); err = write_mem(op.val, ea, size, regs); goto ldst_done; @@ -2834,14 +2903,14 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) case STORE_FP: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) return 0; - err = do_fp_store(op.reg, ea, size, regs); + err = do_fp_store(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) return 0; - err = do_vec_store(op.reg, ea, size, regs); + err = do_vec_store(op.reg, ea, size, regs, cross_endian); goto ldst_done; #endif #ifdef CONFIG_VSX @@ -2856,22 +2925,23 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) msrbit = MSR_VEC; if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) return 0; - err = do_vsx_store(&op, ea, regs); + err = do_vsx_store(&op, ea, regs, cross_endian); goto ldst_done; } #endif case STORE_MULTI: - if (regs->msr & MSR_LE) - return 0; + if (!address_ok(regs, ea, size)) + return -EFAULT; rd = op.reg; for (i = 0; i < size; i += 4) { - val = regs->gpr[rd]; + unsigned int v32 = regs->gpr[rd]; + nb = size - i; if (nb > 4) nb = 4; - else - val >>= 32 - 8 * nb; - err = write_mem(val, ea, nb, regs); + if (unlikely(cross_endian)) + v32 = byterev_4(v32); + err = copy_mem_out((u8 *) &v32, ea, nb, regs); if (err) return 0; ea += 4; -- cgit