summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitris Papastamos <dimitris.papastamos@arm.com>2018-01-11 15:29:36 +0000
committerKostya Porotchkin <kostap@marvell.com>2018-04-16 14:06:21 +0300
commit80bb363257d27f9c454481fcec78e6460d9b4191 (patch)
tree24ca3d990cf51ad12eedb139ba5262c4bcb2f5a0
parent1186130161bc536f99f1429801ea6efa2aff6aa0 (diff)
Optimize/cleanup BPIALL workaround
In the initial implementation of this workaround we used a dedicated workaround context to save/restore state. This patch reduces the footprint as no additional context is needed. Additionally, this patch reduces the memory loads and stores by 20%, reduces the instruction count and exploits static branch prediction to optimize the SMC path. Change-Id: Ia9f6bf06fbf8a9037cfe7f1f1fb32e8aec38ec7d Signed-off-by: Dimitris Papastamos <dimitris.papastamos@arm.com> Reviewed-on: http://vgitil04.il.marvell.com:8080/53233 Tested-by: iSoC Platform CI <ykjenk@marvell.com> Reviewed-by: Kostya Porotchkin <kostap@marvell.com>
-rw-r--r--include/lib/el3_runtime/aarch64/context.h28
-rw-r--r--lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S208
2 files changed, 89 insertions, 147 deletions
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index 35628167..31e25689 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -46,26 +46,12 @@
#define CTX_GPREG_SP_EL0 U(0xf8)
#define CTX_GPREGS_END U(0x100)
-#if WORKAROUND_CVE_2017_5715
-#define CTX_CVE_2017_5715_OFFSET (CTX_GPREGS_OFFSET + CTX_GPREGS_END)
-#define CTX_CVE_2017_5715_QUAD0 U(0x0)
-#define CTX_CVE_2017_5715_QUAD1 U(0x8)
-#define CTX_CVE_2017_5715_QUAD2 U(0x10)
-#define CTX_CVE_2017_5715_QUAD3 U(0x18)
-#define CTX_CVE_2017_5715_QUAD4 U(0x20)
-#define CTX_CVE_2017_5715_QUAD5 U(0x28)
-#define CTX_CVE_2017_5715_END U(0x30)
-#else
-#define CTX_CVE_2017_5715_OFFSET CTX_GPREGS_OFFSET
-#define CTX_CVE_2017_5715_END CTX_GPREGS_END
-#endif
-
/*******************************************************************************
* Constants that allow assembler code to access members of and the 'el3_state'
* structure at their correct offsets. Note that some of the registers are only
* 32-bits wide but are stored as 64-bit values for convenience
******************************************************************************/
-#define CTX_EL3STATE_OFFSET (CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_END)
+#define CTX_EL3STATE_OFFSET (CTX_GPREGS_OFFSET + CTX_GPREGS_END)
#define CTX_SCR_EL3 U(0x0)
#define CTX_RUNTIME_SP U(0x8)
#define CTX_SPSR_EL3 U(0x10)
@@ -195,9 +181,6 @@
/* Constants to determine the size of individual context structures */
#define CTX_GPREG_ALL (CTX_GPREGS_END >> DWORD_SHIFT)
-#if WORKAROUND_CVE_2017_5715
-#define CTX_CVE_2017_5715_ALL (CTX_CVE_2017_5715_END >> DWORD_SHIFT)
-#endif
#define CTX_SYSREG_ALL (CTX_SYSREGS_END >> DWORD_SHIFT)
#if CTX_INCLUDE_FPREGS
#define CTX_FPREG_ALL (CTX_FPREGS_END >> DWORD_SHIFT)
@@ -213,10 +196,6 @@
*/
DEFINE_REG_STRUCT(gp_regs, CTX_GPREG_ALL);
-#if WORKAROUND_CVE_2017_5715
-DEFINE_REG_STRUCT(cve_2017_5715_regs, CTX_CVE_2017_5715_ALL);
-#endif
-
/*
* AArch64 EL1 system register context structure for preserving the
* architectural state during switches from one security state to
@@ -258,9 +237,6 @@ DEFINE_REG_STRUCT(el3_state, CTX_EL3STATE_ALL);
*/
typedef struct cpu_context {
gp_regs_t gpregs_ctx;
-#if WORKAROUND_CVE_2017_5715
- cve_2017_5715_regs_t cve_2017_5715_regs_ctx;
-#endif
el3_state_t el3state_ctx;
el1_sys_regs_t sysregs_ctx;
#if CTX_INCLUDE_FPREGS
diff --git a/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S b/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S
index cd29266e..9677e2e0 100644
--- a/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S
+++ b/lib/cpus/aarch64/workaround_cve_2017_5715_bpiall.S
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -11,10 +11,15 @@
.globl workaround_bpiall_vbar0_runtime_exceptions
#define EMIT_BPIALL 0xee070fd5
-#define EMIT_MOV_R0_IMM(v) 0xe3a0000##v
#define EMIT_SMC 0xe1600070
- .macro enter_workaround _stub_name
+ .macro enter_workaround _from_vector
+ /*
+ * Save register state to enable a call to AArch32 S-EL1 and return
+ * Identify the original calling vector in w2 (==_from_vector)
+ * Use w3-w6 for additional register state preservation while in S-EL1
+ */
+
/* Save GP regs */
stp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
stp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
@@ -32,47 +37,50 @@
stp x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
stp x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
- adr x4, \_stub_name
+ /* Identify the original exception vector */
+ mov w2, \_from_vector
+
+ /* Preserve 32-bit system registers in GP registers through the workaround */
+ mrs x3, esr_el3
+ mrs x4, spsr_el3
+ mrs x5, scr_el3
+ mrs x6, sctlr_el1
/*
- * Load SPSR_EL3 and VBAR_EL3. SPSR_EL3 is set up to have
- * all interrupts masked in preparation to running the workaround
- * stub in S-EL1. VBAR_EL3 points to the vector table that
- * will handle the SMC back from the workaround stub.
+ * Preserve LR and ELR_EL3 registers in the GP regs context.
+ * Temporarily use the CTX_GPREG_SP_EL0 slot to preserve ELR_EL3
+ * through the workaround. This is OK because at this point the
+ * current state for this context's SP_EL0 is in the live system
+ * register, which is unmodified by the workaround.
*/
- ldp x0, x1, [x4, #0]
+ mrs x7, elr_el3
+ stp x30, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
/*
- * Load SCTLR_EL1 and ELR_EL3. SCTLR_EL1 is configured to disable
- * the MMU in S-EL1. ELR_EL3 points to the appropriate stub in S-EL1.
+ * Load system registers for entry to S-EL1.
*/
- ldp x2, x3, [x4, #16]
- mrs x4, scr_el3
- mrs x5, spsr_el3
- mrs x6, elr_el3
- mrs x7, sctlr_el1
- mrs x8, esr_el3
+ /* Mask all interrupts and set AArch32 Supervisor mode */
+ movz w8, SPSR_MODE32(MODE32_svc, SPSR_T_ARM, SPSR_E_LITTLE, SPSR_AIF_MASK)
+
+ /* Switch EL3 exception vectors while the workaround is executing. */
+ adr x9, workaround_bpiall_vbar1_runtime_exceptions
+
+ /* Setup SCTLR_EL1 with MMU off and I$ on */
+ ldr x10, stub_sel1_sctlr
- /* Preserve system registers in the workaround context */
- stp x4, x5, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD0]
- stp x6, x7, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD2]
- stp x8, x30, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD4]
+ /* Land at the S-EL1 workaround stub */
+ adr x11, aarch32_stub
/*
* Setting SCR_EL3 to all zeroes means that the NS, RW
* and SMD bits are configured as expected.
*/
msr scr_el3, xzr
-
- /*
- * Reload system registers with the crafted values
- * in preparation for entry in S-EL1.
- */
- msr spsr_el3, x0
- msr vbar_el3, x1
- msr sctlr_el1, x2
- msr elr_el3, x3
+ msr spsr_el3, x8
+ msr vbar_el3, x9
+ msr sctlr_el1, x10
+ msr elr_el3, x11
eret
.endm
@@ -91,76 +99,31 @@ vector_base workaround_bpiall_vbar0_runtime_exceptions
*/
vector_entry workaround_bpiall_vbar0_sync_exception_sp_el0
b sync_exception_sp_el0
+ nop /* to force 8 byte alignment for the following stub */
+
/*
* Since each vector table entry is 128 bytes, we can store the
* stub context in the unused space to minimize memory footprint.
*/
-aarch32_stub_smc:
+stub_sel1_sctlr:
+ .quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
+
+aarch32_stub:
.word EMIT_BPIALL
- .word EMIT_MOV_R0_IMM(1)
.word EMIT_SMC
-aarch32_stub_ctx_smc:
- /* Mask all interrupts and set AArch32 Supervisor mode */
- .quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
- SPSR_M_AARCH32 << SPSR_M_SHIFT | \
- MODE32_svc << MODE32_SHIFT)
- /*
- * VBAR_EL3 points to vbar1 which is the vector table
- * used while the workaround is executing.
- */
- .quad workaround_bpiall_vbar1_runtime_exceptions
-
- /* Setup SCTLR_EL1 with MMU off and I$ on */
- .quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
-
- /* ELR_EL3 is setup to point to the sync exception stub in AArch32 */
- .quad aarch32_stub_smc
check_vector_size workaround_bpiall_vbar0_sync_exception_sp_el0
vector_entry workaround_bpiall_vbar0_irq_sp_el0
b irq_sp_el0
-aarch32_stub_irq:
- .word EMIT_BPIALL
- .word EMIT_MOV_R0_IMM(2)
- .word EMIT_SMC
-aarch32_stub_ctx_irq:
- .quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
- SPSR_M_AARCH32 << SPSR_M_SHIFT | \
- MODE32_svc << MODE32_SHIFT)
- .quad workaround_bpiall_vbar1_runtime_exceptions
- .quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
- .quad aarch32_stub_irq
check_vector_size workaround_bpiall_vbar0_irq_sp_el0
vector_entry workaround_bpiall_vbar0_fiq_sp_el0
b fiq_sp_el0
-aarch32_stub_fiq:
- .word EMIT_BPIALL
- .word EMIT_MOV_R0_IMM(4)
- .word EMIT_SMC
-aarch32_stub_ctx_fiq:
- .quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
- SPSR_M_AARCH32 << SPSR_M_SHIFT | \
- MODE32_svc << MODE32_SHIFT)
- .quad workaround_bpiall_vbar1_runtime_exceptions
- .quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
- .quad aarch32_stub_fiq
check_vector_size workaround_bpiall_vbar0_fiq_sp_el0
vector_entry workaround_bpiall_vbar0_serror_sp_el0
b serror_sp_el0
-aarch32_stub_serror:
- .word EMIT_BPIALL
- .word EMIT_MOV_R0_IMM(8)
- .word EMIT_SMC
-aarch32_stub_ctx_serror:
- .quad (SPSR_AIF_MASK << SPSR_AIF_SHIFT | \
- SPSR_M_AARCH32 << SPSR_M_SHIFT | \
- MODE32_svc << MODE32_SHIFT)
- .quad workaround_bpiall_vbar1_runtime_exceptions
- .quad SCTLR_AARCH32_EL1_RES1 | SCTLR_I_BIT
- .quad aarch32_stub_serror
check_vector_size workaround_bpiall_vbar0_serror_sp_el0
/* ---------------------------------------------------------------------
@@ -188,19 +151,19 @@ vector_entry workaround_bpiall_vbar0_serror_sp_elx
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpiall_vbar0_sync_exception_aarch64
- enter_workaround aarch32_stub_ctx_smc
+ enter_workaround 1
check_vector_size workaround_bpiall_vbar0_sync_exception_aarch64
vector_entry workaround_bpiall_vbar0_irq_aarch64
- enter_workaround aarch32_stub_ctx_irq
+ enter_workaround 2
check_vector_size workaround_bpiall_vbar0_irq_aarch64
vector_entry workaround_bpiall_vbar0_fiq_aarch64
- enter_workaround aarch32_stub_ctx_fiq
+ enter_workaround 4
check_vector_size workaround_bpiall_vbar0_fiq_aarch64
vector_entry workaround_bpiall_vbar0_serror_aarch64
- enter_workaround aarch32_stub_ctx_serror
+ enter_workaround 8
check_vector_size workaround_bpiall_vbar0_serror_aarch64
/* ---------------------------------------------------------------------
@@ -208,19 +171,19 @@ vector_entry workaround_bpiall_vbar0_serror_aarch64
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpiall_vbar0_sync_exception_aarch32
- enter_workaround aarch32_stub_ctx_smc
+ enter_workaround 1
check_vector_size workaround_bpiall_vbar0_sync_exception_aarch32
vector_entry workaround_bpiall_vbar0_irq_aarch32
- enter_workaround aarch32_stub_ctx_irq
+ enter_workaround 2
check_vector_size workaround_bpiall_vbar0_irq_aarch32
vector_entry workaround_bpiall_vbar0_fiq_aarch32
- enter_workaround aarch32_stub_ctx_fiq
+ enter_workaround 4
check_vector_size workaround_bpiall_vbar0_fiq_aarch32
vector_entry workaround_bpiall_vbar0_serror_aarch32
- enter_workaround aarch32_stub_ctx_serror
+ enter_workaround 8
check_vector_size workaround_bpiall_vbar0_serror_aarch32
/* ---------------------------------------------------------------------
@@ -297,31 +260,33 @@ vector_entry workaround_bpiall_vbar1_serror_aarch64
* ---------------------------------------------------------------------
*/
vector_entry workaround_bpiall_vbar1_sync_exception_aarch32
- /* Restore register state from the workaround context */
- ldp x2, x3, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD0]
- ldp x4, x5, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD2]
- ldp x6, x30, [sp, #CTX_CVE_2017_5715_OFFSET + CTX_CVE_2017_5715_QUAD4]
+ /*
+ * w2 indicates which SEL1 stub was run and thus which original vector was used
+ * w3-w6 contain saved system register state (esr_el3 in w3)
+ * Restore LR and ELR_EL3 register state from the GP regs context
+ */
+ ldp x30, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
/* Apply the restored system register state */
- msr scr_el3, x2
- msr spsr_el3, x3
- msr elr_el3, x4
- msr sctlr_el1, x5
- msr esr_el3, x6
+ msr esr_el3, x3
+ msr spsr_el3, x4
+ msr scr_el3, x5
+ msr sctlr_el1, x6
+ msr elr_el3, x7
/*
* Workaround is complete, so swap VBAR_EL3 to point
* to workaround entry table in preparation for subsequent
* Sync/IRQ/FIQ/SError exceptions.
*/
- adr x2, workaround_bpiall_vbar0_runtime_exceptions
- msr vbar_el3, x2
+ adr x0, workaround_bpiall_vbar0_runtime_exceptions
+ msr vbar_el3, x0
/*
- * Restore all GP regs except x0 and x1. The value in x0
+ * Restore all GP regs except x2 and x3 (esr). The value in x2
* indicates the type of the original exception.
*/
- ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+ ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
ldp x4, x5, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X4]
ldp x6, x7, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X6]
ldp x8, x9, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X8]
@@ -336,37 +301,38 @@ vector_entry workaround_bpiall_vbar1_sync_exception_aarch32
ldp x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
ldp x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
- /*
- * Each of these handlers will first restore x0 and x1 from
- * the context and the branch to the common implementation for
- * each of the exception types.
- */
- tbnz x0, #1, workaround_bpiall_vbar1_irq
- tbnz x0, #2, workaround_bpiall_vbar1_fiq
- tbnz x0, #3, workaround_bpiall_vbar1_serror
-
- /* Fallthrough case for Sync exception */
- ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+ /* Fast path Sync exceptions. Static predictor will fall through. */
+ tbz w2, #0, workaround_not_sync
+ ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
b sync_exception_aarch64
check_vector_size workaround_bpiall_vbar1_sync_exception_aarch32
vector_entry workaround_bpiall_vbar1_irq_aarch32
b report_unhandled_interrupt
-workaround_bpiall_vbar1_irq:
- ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
+
+ /*
+ * Post-workaround fan-out for non-sync exceptions
+ */
+workaround_not_sync:
+ tbnz w2, #3, workaround_bpiall_vbar1_serror
+ tbnz w2, #2, workaround_bpiall_vbar1_fiq
+ /* IRQ */
+ ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
b irq_aarch64
+
+workaround_bpiall_vbar1_fiq:
+ ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+ b fiq_aarch64
+
+workaround_bpiall_vbar1_serror:
+ ldp x2, x3, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X2]
+ b serror_aarch64
check_vector_size workaround_bpiall_vbar1_irq_aarch32
vector_entry workaround_bpiall_vbar1_fiq_aarch32
b report_unhandled_interrupt
-workaround_bpiall_vbar1_fiq:
- ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
- b fiq_aarch64
check_vector_size workaround_bpiall_vbar1_fiq_aarch32
vector_entry workaround_bpiall_vbar1_serror_aarch32
b report_unhandled_exception
-workaround_bpiall_vbar1_serror:
- ldp x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
- b serror_aarch64
check_vector_size workaround_bpiall_vbar1_serror_aarch32