1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2022, Oracle and/or its affiliates.
*/
#include <linux/kallsyms.h>
#include <linux/kernel.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/string.h>
#include <asm/cacheflush.h>
#include <asm/ktext.h>
#include <asm/memory.h>
static void *kernel_texts[MAX_NUMNODES];
noinstr void ktext_replication_write(void *addr, void *data, size_t size)
{
unsigned long offset;
void *ptr;
int nid;
if (!is_kernel_text((unsigned long)addr))
return;
offset = (unsigned long)addr - (unsigned long)_stext;
for_each_node(nid) {
if (!kernel_texts[nid] || !nid)
continue;
ptr = kernel_texts[nid] + offset;
memcpy(ptr, data, size);
}
}
void __kprobes ktext_replication_patch(u32 *tp, __le32 insn)
{
unsigned long offset;
int nid, this_nid;
__le32 *p;
if (!is_kernel_text((unsigned long)tp))
return;
offset = (unsigned long)tp - (unsigned long)_stext;
this_nid = numa_node_id();
if (this_nid) {
/* The cache maintenance by aarch64_insn_patch_text_nosync()
* will occur on this node. We need it to occur on node 0.
*/
p = (void *)lm_alias(_stext) + offset;
caches_clean_inval_pou((u64)p, (u64)p + AARCH64_INSN_SIZE);
}
for_each_node(nid) {
if (!kernel_texts[nid])
continue;
p = kernel_texts[nid] + offset;
WRITE_ONCE(*p, insn);
caches_clean_inval_pou((u64)p, (u64)p + AARCH64_INSN_SIZE);
}
}
/* Copy the patched alternative from the node0 image to the other
* modes. src is the node 0 linear-mapping address.
*/
void ktext_replication_patch_alternative(__le32 *src, int nr_inst)
{
unsigned long offset;
size_t size;
int nid;
__le32 *p;
offset = (unsigned long)src - (unsigned long)lm_alias(_stext);
if (offset >= _etext - _stext)
return;
size = AARCH64_INSN_SIZE * nr_inst;
for_each_node(nid) {
if (!kernel_texts[nid])
continue;
p = kernel_texts[nid] + offset;
memcpy(p, src, size);
clean_dcache_range_nopatch((u64)p, (u64)p + size);
}
}
/* Allocate memory for the replicated kernel texts. */
void __init ktext_replication_init(void)
{
size_t size = _etext - _stext;
int kidx = pgd_index((phys_addr_t)KERNEL_START);
int nid;
/*
* If we've messed up and the kernel shares a L0 entry with the
* module or vmalloc area, then don't even attempt to use text
* replication.
*/
if (pgd_index(MODULES_VADDR) == kidx) {
pr_warn("Kernel is located in the same L0 index as modules - text replication disabled\n");
return;
}
if (pgd_index(VMALLOC_START) == kidx) {
pr_warn("Kernel is located in the same L0 index as vmalloc - text replication disabled\n");
return;
}
for_each_node(nid) {
/* Nothing to do for node 0 */
if (!nid)
continue;
/* Allocate and copy initial kernel text for this node */
kernel_texts[nid] = memblock_alloc_node(size, PAGE_SIZE, nid);
memcpy(kernel_texts[nid], _stext, size);
caches_clean_inval_pou((u64)kernel_texts[nid],
(u64)kernel_texts[nid] + size);
}
}
|