1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <linux/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <shared/irq_kern.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/mmu_context.h>
#include <as-layout.h>
#include <os.h>
#include <skas.h>
#include <stub-data.h>
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
spinlock_t mm_list_lock;
struct list_head mm_list;
int init_new_context(struct task_struct *task, struct mm_struct *mm)
{
struct mm_id *new_id = &mm->context.id;
unsigned long stack = 0;
int ret = -ENOMEM;
stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES));
if (stack == 0)
goto out;
new_id->stack = stack;
scoped_guard(spinlock_irqsave, &mm_list_lock) {
/* Insert into list, used for lookups when the child dies */
list_add(&mm->context.list, &mm_list);
}
ret = start_userspace(new_id);
if (ret < 0)
goto out_free;
/* Ensure the new MM is clean and nothing unwanted is mapped */
unmap(new_id, 0, STUB_START);
return 0;
out_free:
free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out:
return ret;
}
void destroy_context(struct mm_struct *mm)
{
struct mm_context *mmu = &mm->context;
/*
* If init_new_context wasn't called, this will be
* zero, resulting in a kill(0), which will result in the
* whole UML suddenly dying. Also, cover negative and
* 1 cases, since they shouldn't happen either.
*
* Negative cases happen if the child died unexpectedly.
*/
if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
mmu->id.pid);
return;
}
if (mmu->id.pid > 0) {
os_kill_ptraced_process(mmu->id.pid, 1);
mmu->id.pid = -1;
}
if (using_seccomp && mmu->id.sock)
os_close_file(mmu->id.sock);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
guard(spinlock_irqsave)(&mm_list_lock);
list_del(&mm->context.list);
}
static irqreturn_t mm_sigchld_irq(int irq, void* dev)
{
struct mm_context *mm_context;
pid_t pid;
guard(spinlock)(&mm_list_lock);
while ((pid = os_reap_child()) > 0) {
/*
* A child died, check if we have an MM with the PID. This is
* only relevant in SECCOMP mode (as ptrace will fail anyway).
*
* See wait_stub_done_seccomp for more details.
*/
list_for_each_entry(mm_context, &mm_list, list) {
if (mm_context->id.pid == pid) {
struct stub_data *stub_data;
printk("Unexpectedly lost MM child! Affected tasks will segfault.");
/* Marks the MM as dead */
mm_context->id.pid = -1;
/*
* NOTE: If SMP is implemented, a futex_wake
* needs to be added here.
*/
stub_data = (void *)mm_context->id.stack;
stub_data->futex = FUTEX_IN_KERN;
/*
* NOTE: Currently executing syscalls by
* affected tasks may finish normally.
*/
break;
}
}
}
return IRQ_HANDLED;
}
static int __init init_child_tracking(void)
{
int err;
spin_lock_init(&mm_list_lock);
INIT_LIST_HEAD(&mm_list);
err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
if (err < 0)
panic("Failed to register SIGCHLD IRQ: %d", err);
return 0;
}
early_initcall(init_child_tracking)
|