summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/relocate_kernel_64.S
blob: b44d8863e57f622aff3f20d98ef4a2b716b01251 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * relocate_kernel.S - put the kernel image in place to boot
 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
 */

#include <linux/linkage.h>
#include <linux/stringify.h>
#include <asm/alternative.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable_types.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
#include <asm/asm-offsets.h>

/*
 * Must be relocatable PIC code callable as a C function, in particular
 * there must be a plain RET and not jump to return thunk.
 */

#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

/*
 * The .text..relocate_kernel and .data..relocate_kernel sections are copied
 * into the control page, and the remainder of the page is used as the stack.
 */

	.section .data..relocate_kernel,"a";
/* Minimal CPU state */
SYM_DATA_LOCAL(saved_rsp, .quad 0)
SYM_DATA_LOCAL(saved_cr0, .quad 0)
SYM_DATA_LOCAL(saved_cr3, .quad 0)
SYM_DATA_LOCAL(saved_cr4, .quad 0)
	/* other data */
SYM_DATA(kexec_va_control_page, .quad 0)
SYM_DATA(kexec_pa_table_page, .quad 0)
SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)

	.section .text..relocate_kernel,"ax";
	.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
	UNWIND_HINT_END_OF_STACK
	ANNOTATE_NOENDBR
	/*
	 * %rdi indirection_page
	 * %rsi pa_control_page
	 * %rdx start address
	 * %rcx preserve_context
	 * %r8  host_mem_enc_active
	 */

	/* Save the CPU context, used for jumping back */
	pushq %rbx
	pushq %rbp
	pushq %r12
	pushq %r13
	pushq %r14
	pushq %r15
	pushf

	/* zero out flags, and disable interrupts */
	pushq $0
	popfq

	/* Switch to the identity mapped page tables */
	movq	%cr3, %rax
	movq	kexec_pa_table_page(%rip), %r9
	movq	%r9, %cr3

	/* Leave CR4 in %r13 to enable the right paging mode later. */
	movq	%cr4, %r13

	/* Disable global pages immediately to ensure this mapping is RWX */
	movq	%r13, %r12
	andq	$~(X86_CR4_PGE), %r12
	movq	%r12, %cr4

	/* Save %rsp and CRs. */
	movq	%r13, saved_cr4(%rip)
	movq    %rsp, saved_rsp(%rip)
	movq	%rax, saved_cr3(%rip)
	movq	%cr0, %rax
	movq	%rax, saved_cr0(%rip)

	/* save indirection list for jumping back */
	movq	%rdi, pa_backup_pages_map(%rip)

	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
	movq	%rcx, %r11

	/* setup a new stack at the end of the physical control page */
	lea	PAGE_SIZE(%rsi), %rsp

	/* jump to identity mapped page */
0:	addq	$identity_mapped - 0b, %rsi
	subq	$__relocate_kernel_start - 0b, %rsi
	ANNOTATE_RETPOLINE_SAFE
	jmp	*%rsi
SYM_CODE_END(relocate_kernel)

SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
	UNWIND_HINT_END_OF_STACK
	/*
	 * %rdi	indirection page
	 * %rdx start address
	 * %r8 host_mem_enc_active
	 * %r9 page table page
	 * %r11 preserve_context
	 * %r13 original CR4 when relocate_kernel() was invoked
	 */

	/* store the start address on the stack */
	pushq   %rdx

	/*
	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
	 * below.
	 */
	movq	%cr4, %rax
	andq	$~(X86_CR4_CET), %rax
	movq	%rax, %cr4

	/*
	 * Set cr0 to a known state:
	 *  - Paging enabled
	 *  - Alignment check disabled
	 *  - Write protect disabled
	 *  - No task switch
	 *  - Don't do FP software emulation.
	 *  - Protected mode enabled
	 */
	movq	%cr0, %rax
	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
	movq	%rax, %cr0

	/*
	 * Set cr4 to a known state:
	 *  - physical address extension enabled
	 *  - 5-level paging, if it was enabled before
	 *  - Machine check exception on TDX guest, if it was enabled before.
	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
	 *
	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
	 * PAE is always set in the original CR4.
	 */
	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
	movq	%r13, %cr4

	/* Flush the TLB (needed?) */
	movq	%r9, %cr3

	/*
	 * If SME is active, there could be old encrypted cache line
	 * entries that will conflict with the now unencrypted memory
	 * used by kexec. Flush the caches before copying the kernel.
	 */
	testq	%r8, %r8
	jz .Lsme_off
	wbinvd
.Lsme_off:

	call	swap_pages

	/*
	 * To be certain of avoiding problems with self-modifying code
	 * I need to execute a serializing instruction here.
	 * So I flush the TLB by reloading %cr3 here, it's handy,
	 * and not processor dependent.
	 */
	movq	%cr3, %rax
	movq	%rax, %cr3

	testq	%r11, %r11	/* preserve_context */
	jnz .Lrelocate

	/*
	 * set all of the registers to known values
	 * leave %rsp alone
	 */

	xorl	%eax, %eax
	xorl	%ebx, %ebx
	xorl    %ecx, %ecx
	xorl    %edx, %edx
	xorl    %esi, %esi
	xorl    %edi, %edi
	xorl    %ebp, %ebp
	xorl	%r8d, %r8d
	xorl	%r9d, %r9d
	xorl	%r10d, %r10d
	xorl	%r11d, %r11d
	xorl	%r12d, %r12d
	xorl	%r13d, %r13d
	xorl	%r14d, %r14d
	xorl	%r15d, %r15d

	ANNOTATE_UNRET_SAFE
	ret
	int3

.Lrelocate:
	popq	%rdx

	/* Use the swap page for the callee's stack */
	movq	kexec_pa_swap_page(%rip), %r10
	leaq	PAGE_SIZE(%r10), %rsp

	/* push the existing entry point onto the callee's stack */
	pushq	%rdx

	ANNOTATE_RETPOLINE_SAFE
	call	*%rdx

	/* get the re-entry point of the peer system */
	popq	%rbp
	movq	kexec_pa_swap_page(%rip), %r10
	movq	pa_backup_pages_map(%rip), %rdi
	movq	kexec_pa_table_page(%rip), %rax
	movq	%rax, %cr3

	/* Find start (and end) of this physical mapping of control page */
	leaq	(%rip), %r8
	ANNOTATE_NOENDBR
	andq	$PAGE_MASK, %r8
	lea	PAGE_SIZE(%r8), %rsp
	movl	$1, %r11d	/* Ensure preserve_context flag is set */
	call	swap_pages
	movq	kexec_va_control_page(%rip), %rax
0:	addq	$virtual_mapped - 0b, %rax
	subq	$__relocate_kernel_start - 0b, %rax
	pushq	%rax
	ANNOTATE_UNRET_SAFE
	ret
	int3
SYM_CODE_END(identity_mapped)

SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
	UNWIND_HINT_END_OF_STACK
	ANNOTATE_NOENDBR // RET target, above
	movq	saved_rsp(%rip), %rsp
	movq	saved_cr4(%rip), %rax
	movq	%rax, %cr4
	movq	saved_cr3(%rip), %rax
	movq	saved_cr0(%rip), %r8
	movq	%rax, %cr3
	movq	%r8, %cr0

#ifdef CONFIG_KEXEC_JUMP
	/* Saved in save_processor_state. */
	movq    $saved_context, %rax
	lgdt    saved_context_gdt_desc(%rax)
#endif

	/* relocate_kernel() returns the re-entry point for next time */
	movq	%rbp, %rax

	popf
	popq	%r15
	popq	%r14
	popq	%r13
	popq	%r12
	popq	%rbp
	popq	%rbx
	ANNOTATE_UNRET_SAFE
	ret
	int3
SYM_CODE_END(virtual_mapped)

	/* Do the copies */
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
	UNWIND_HINT_END_OF_STACK
	/*
	 * %rdi indirection page
	 * %r11 preserve_context
	 */
	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
	xorl	%edi, %edi
	xorl	%esi, %esi
	jmp	.Lstart		/* Should start with an indirection record */

.Lloop:	/* top, read another word for the indirection page */

	movq	(%rbx), %rcx
	addq	$8,	%rbx
.Lstart:
	testb	$0x1,	%cl   /* is it a destination page? */
	jz	.Lnotdest
	movq	%rcx,	%rdi
	andq	$0xfffffffffffff000, %rdi
	jmp	.Lloop
.Lnotdest:
	testb	$0x2,	%cl   /* is it an indirection page? */
	jz	.Lnotind
	movq	%rcx,   %rbx
	andq	$0xfffffffffffff000, %rbx
	jmp	.Lloop
.Lnotind:
	testb	$0x4,	%cl   /* is it the done indicator? */
	jz	.Lnotdone
	jmp	.Ldone
.Lnotdone:
	testb	$0x8,	%cl   /* is it the source indicator? */
	jz	.Lloop	      /* Ignore it otherwise */
	movq	%rcx,   %rsi  /* For ever source page do a copy */
	andq	$0xfffffffffffff000, %rsi

	movq	%rdi, %rdx    /* Save destination page to %rdx */
	movq	%rsi, %rax    /* Save source page to %rax */

	testq	%r11, %r11    /* Only actually swap for ::preserve_context */
	jz	.Lnoswap

	/* copy source page to swap page */
	movq	kexec_pa_swap_page(%rip), %rdi
	movl	$512, %ecx
	rep ; movsq

	/* copy destination page to source page */
	movq	%rax, %rdi
	movq	%rdx, %rsi
	movl	$512, %ecx
	rep ; movsq

	/* copy swap page to destination page */
	movq	%rdx, %rdi
	movq	kexec_pa_swap_page(%rip), %rsi
.Lnoswap:
	movl	$512, %ecx
	rep ; movsq

	lea	PAGE_SIZE(%rax), %rsi
	jmp	.Lloop
.Ldone:
	ANNOTATE_UNRET_SAFE
	ret
	int3
SYM_CODE_END(swap_pages)