summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit.c100
-rw-r--r--kernel/audit.h2
-rw-r--r--kernel/auditfilter.c16
-rw-r--r--kernel/auditsc.c31
-rw-r--r--kernel/bpf/core.c24
-rw-r--r--kernel/bpf/syscall.c42
-rw-r--r--kernel/bpf/verifier.c34
-rw-r--r--kernel/cgroup/rstat.c16
-rw-r--r--kernel/compat.c12
-rw-r--r--kernel/cpu.c20
-rw-r--r--kernel/crash_dump.c6
-rw-r--r--kernel/dma/remap.c48
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c29
-rw-r--r--kernel/events/internal.h2
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/irq/Kconfig1
-rw-r--r--kernel/irq/irq_sim.c267
-rw-r--r--kernel/irq/irqdomain.c53
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/kprobes.c85
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/module.c22
-rw-r--r--kernel/notifier.c1
-rw-r--r--kernel/padata.c14
-rw-r--r--kernel/power/Kconfig12
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/hibernate.c20
-rw-r--r--kernel/power/power.h4
-rw-r--r--kernel/power/user.c22
-rw-r--r--kernel/printk/console_cmdline.h1
-rw-r--r--kernel/printk/printk.c177
-rw-r--r--kernel/rcu/tree.c100
-rw-r--r--kernel/reboot.c6
-rw-r--r--kernel/relay.c2
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/scs.c104
-rw-r--r--kernel/signal.c106
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/sys.c35
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/trace.c12
48 files changed, 980 insertions, 499 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/audit.c b/kernel/audit.c
index 87f31bf1f0a0..8c201f414226 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -880,7 +880,7 @@ main_queue:
return 0;
}
-int audit_send_list(void *_dest)
+int audit_send_list_thread(void *_dest)
{
struct audit_netlink_list *dest = _dest;
struct sk_buff *skb;
@@ -924,19 +924,30 @@ out_kfree_skb:
return NULL;
}
+static void audit_free_reply(struct audit_reply *reply)
+{
+ if (!reply)
+ return;
+
+ if (reply->skb)
+ kfree_skb(reply->skb);
+ if (reply->net)
+ put_net(reply->net);
+ kfree(reply);
+}
+
static int audit_send_reply_thread(void *arg)
{
struct audit_reply *reply = (struct audit_reply *)arg;
- struct sock *sk = audit_get_sk(reply->net);
audit_ctl_lock();
audit_ctl_unlock();
/* Ignore failure. It'll only happen if the sender goes away,
because our timeout is set to infinite. */
- netlink_unicast(sk, reply->skb, reply->portid, 0);
- put_net(reply->net);
- kfree(reply);
+ netlink_unicast(audit_get_sk(reply->net), reply->skb, reply->portid, 0);
+ reply->skb = NULL;
+ audit_free_reply(reply);
return 0;
}
@@ -950,35 +961,32 @@ static int audit_send_reply_thread(void *arg)
* @payload: payload data
* @size: payload size
*
- * Allocates an skb, builds the netlink message, and sends it to the port id.
- * No failure notifications.
+ * Allocates a skb, builds the netlink message, and sends it to the port id.
*/
static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done,
int multi, const void *payload, int size)
{
- struct net *net = sock_net(NETLINK_CB(request_skb).sk);
- struct sk_buff *skb;
struct task_struct *tsk;
- struct audit_reply *reply = kmalloc(sizeof(struct audit_reply),
- GFP_KERNEL);
+ struct audit_reply *reply;
+ reply = kzalloc(sizeof(*reply), GFP_KERNEL);
if (!reply)
return;
- skb = audit_make_reply(seq, type, done, multi, payload, size);
- if (!skb)
- goto out;
-
- reply->net = get_net(net);
+ reply->skb = audit_make_reply(seq, type, done, multi, payload, size);
+ if (!reply->skb)
+ goto err;
+ reply->net = get_net(sock_net(NETLINK_CB(request_skb).sk));
reply->portid = NETLINK_CB(request_skb).portid;
- reply->skb = skb;
tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply");
- if (!IS_ERR(tsk))
- return;
- kfree_skb(skb);
-out:
- kfree(reply);
+ if (IS_ERR(tsk))
+ goto err;
+
+ return;
+
+err:
+ audit_free_reply(reply);
}
/*
@@ -1525,20 +1533,60 @@ static void audit_receive(struct sk_buff *skb)
audit_ctl_unlock();
}
+/* Log information about who is connecting to the audit multicast socket */
+static void audit_log_multicast(int group, const char *op, int err)
+{
+ const struct cred *cred;
+ struct tty_struct *tty;
+ char comm[sizeof(current->comm)];
+ struct audit_buffer *ab;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_EVENT_LISTENER);
+ if (!ab)
+ return;
+
+ cred = current_cred();
+ tty = audit_get_tty();
+ audit_log_format(ab, "pid=%u uid=%u auid=%u tty=%s ses=%u",
+ task_pid_nr(current),
+ from_kuid(&init_user_ns, cred->uid),
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)",
+ audit_get_sessionid(current));
+ audit_put_tty(tty);
+ audit_log_task_context(ab); /* subj= */
+ audit_log_format(ab, " comm=");
+ audit_log_untrustedstring(ab, get_task_comm(comm, current));
+ audit_log_d_path_exe(ab, current->mm); /* exe= */
+ audit_log_format(ab, " nl-mcgrp=%d op=%s res=%d", group, op, !err);
+ audit_log_end(ab);
+}
+
/* Run custom bind function on netlink socket group connect or bind requests. */
-static int audit_bind(struct net *net, int group)
+static int audit_multicast_bind(struct net *net, int group)
{
+ int err = 0;
+
if (!capable(CAP_AUDIT_READ))
- return -EPERM;
+ err = -EPERM;
+ audit_log_multicast(group, "connect", err);
+ return err;
+}
- return 0;
+static void audit_multicast_unbind(struct net *net, int group)
+{
+ audit_log_multicast(group, "disconnect", 0);
}
static int __net_init audit_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = audit_receive,
- .bind = audit_bind,
+ .bind = audit_multicast_bind,
+ .unbind = audit_multicast_unbind,
.flags = NL_CFG_F_NONROOT_RECV,
.groups = AUDIT_NLGRP_MAX,
};
diff --git a/kernel/audit.h b/kernel/audit.h
index 2eed4d231624..f0233dc40b17 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -229,7 +229,7 @@ struct audit_netlink_list {
struct sk_buff_head q;
};
-int audit_send_list(void *_dest);
+int audit_send_list_thread(void *_dest);
extern int selinux_audit_rule_update(void);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 026e34da4ace..a10e2997aa6c 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1161,11 +1161,8 @@ int audit_rule_change(int type, int seq, void *data, size_t datasz)
*/
int audit_list_rules_send(struct sk_buff *request_skb, int seq)
{
- u32 portid = NETLINK_CB(request_skb).portid;
- struct net *net = sock_net(NETLINK_CB(request_skb).sk);
struct task_struct *tsk;
struct audit_netlink_list *dest;
- int err = 0;
/* We can't just spew out the rules here because we might fill
* the available socket buffer space and deadlock waiting for
@@ -1173,25 +1170,26 @@ int audit_list_rules_send(struct sk_buff *request_skb, int seq)
* happen if we're actually running in the context of auditctl
* trying to _send_ the stuff */
- dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL);
+ dest = kmalloc(sizeof(*dest), GFP_KERNEL);
if (!dest)
return -ENOMEM;
- dest->net = get_net(net);
- dest->portid = portid;
+ dest->net = get_net(sock_net(NETLINK_CB(request_skb).sk));
+ dest->portid = NETLINK_CB(request_skb).portid;
skb_queue_head_init(&dest->q);
mutex_lock(&audit_filter_mutex);
audit_list_rules(seq, &dest->q);
mutex_unlock(&audit_filter_mutex);
- tsk = kthread_run(audit_send_list, dest, "audit_send_list");
+ tsk = kthread_run(audit_send_list_thread, dest, "audit_send_list");
if (IS_ERR(tsk)) {
skb_queue_purge(&dest->q);
+ put_net(dest->net);
kfree(dest);
- err = PTR_ERR(tsk);
+ return PTR_ERR(tsk);
}
- return err;
+ return 0;
}
int audit_comparator(u32 left, u32 op, u32 right)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 814406a35db1..468a23390457 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -130,6 +130,17 @@ struct audit_tree_refs {
struct audit_chunk *c[31];
};
+struct audit_nfcfgop_tab {
+ enum audit_nfcfgop op;
+ const char *s;
+};
+
+static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
+ { AUDIT_XT_OP_REGISTER, "register" },
+ { AUDIT_XT_OP_REPLACE, "replace" },
+ { AUDIT_XT_OP_UNREGISTER, "unregister" },
+};
+
static int audit_match_perm(struct audit_context *ctx, int mask)
{
unsigned n;
@@ -2542,6 +2553,26 @@ void __audit_ntp_log(const struct audit_ntp_data *ad)
audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST);
}
+void __audit_log_nfcfg(const char *name, u8 af, unsigned int nentries,
+ enum audit_nfcfgop op)
+{
+ struct audit_buffer *ab;
+ char comm[sizeof(current->comm)];
+
+ ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_NETFILTER_CFG);
+ if (!ab)
+ return;
+ audit_log_format(ab, "table=%s family=%u entries=%u op=%s",
+ name, af, nentries, audit_nfcfgs[op].s);
+
+ audit_log_format(ab, " pid=%u", task_pid_nr(current));
+ audit_log_task_context(ab); /* subj= */
+ audit_log_format(ab, " comm=");
+ audit_log_untrustedstring(ab, get_task_comm(comm, current));
+ audit_log_end(ab);
+}
+EXPORT_SYMBOL_GPL(__audit_log_nfcfg);
+
static void audit_log_task(struct audit_buffer *ab)
{
kuid_t auid, uid;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 916f5132a984..cf6fe9107f5c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -82,7 +82,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
struct bpf_prog *fp;
size = round_up(size, PAGE_SIZE);
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+ fp = __vmalloc(size, gfp_flags);
if (fp == NULL)
return NULL;
@@ -232,7 +232,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
if (ret)
return NULL;
- fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+ fp = __vmalloc(size, gfp_flags);
if (fp == NULL) {
__bpf_prog_uncharge(fp_old->aux->user, delta);
} else {
@@ -262,10 +262,10 @@ void __bpf_prog_free(struct bpf_prog *fp)
int bpf_prog_calc_tag(struct bpf_prog *fp)
{
- const u32 bits_offset = SHA_MESSAGE_BYTES - sizeof(__be64);
+ const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
u32 raw_size = bpf_prog_tag_scratch_size(fp);
- u32 digest[SHA_DIGEST_WORDS];
- u32 ws[SHA_WORKSPACE_WORDS];
+ u32 digest[SHA1_DIGEST_WORDS];
+ u32 ws[SHA1_WORKSPACE_WORDS];
u32 i, bsize, psize, blocks;
struct bpf_insn *dst;
bool was_ld_map;
@@ -277,7 +277,7 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
if (!raw)
return -ENOMEM;
- sha_init(digest);
+ sha1_init(digest);
memset(ws, 0, sizeof(ws));
/* We need to take out the map fd for the digest calculation
@@ -308,8 +308,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
memset(&raw[psize], 0, raw_size - psize);
raw[psize++] = 0x80;
- bsize = round_up(psize, SHA_MESSAGE_BYTES);
- blocks = bsize / SHA_MESSAGE_BYTES;
+ bsize = round_up(psize, SHA1_BLOCK_SIZE);
+ blocks = bsize / SHA1_BLOCK_SIZE;
todo = raw;
if (bsize - psize >= sizeof(__be64)) {
bits = (__be64 *)(todo + bsize - sizeof(__be64));
@@ -320,12 +320,12 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
*bits = cpu_to_be64((psize - 1) << 3);
while (blocks--) {
- sha_transform(digest, todo, ws);
- todo += SHA_MESSAGE_BYTES;
+ sha1_transform(digest, todo, ws);
+ todo += SHA1_BLOCK_SIZE;
}
result = (__force __be32 *)digest;
- for (i = 0; i < SHA_DIGEST_WORDS; i++)
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++)
result[i] = cpu_to_be32(digest[i]);
memcpy(fp->tag, result, sizeof(fp->tag));
@@ -1089,7 +1089,7 @@ static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
- fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
+ fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags);
if (fp != NULL) {
/* aux->prog still points to the fp_other one, so
* when promoting the clone to the real program,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2843bbba9ca1..42c7a42fc9c8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -25,6 +25,7 @@
#include <linux/nospec.h>
#include <linux/audit.h>
#include <uapi/linux/btf.h>
+#include <asm/pgtable.h>
#include <linux/bpf_lsm.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -281,27 +282,29 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
* __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
+ const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
+ unsigned int flags = 0;
+ unsigned long align = 1;
void *area;
if (size >= SIZE_MAX)
return NULL;
/* kmalloc()'ed memory can't be mmap()'ed */
- if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
+ if (mmapable) {
+ BUG_ON(!PAGE_ALIGNED(size));
+ align = SHMLBA;
+ flags = VM_USERMAP;
+ } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+ area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
numa_node);
if (area != NULL)
return area;
}
- if (mmapable) {
- BUG_ON(!PAGE_ALIGNED(size));
- return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL |
- __GFP_RETRY_MAYFAIL | flags);
- }
- return __vmalloc_node_flags_caller(size, numa_node,
- GFP_KERNEL | __GFP_RETRY_MAYFAIL |
- flags, __builtin_return_address(0));
+
+ return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+ gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
+ flags, numa_node, __builtin_return_address(0));
}
void *bpf_map_area_alloc(u64 size, int numa_node)
@@ -623,9 +626,20 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
mutex_lock(&map->freeze_mutex);
- if ((vma->vm_flags & VM_WRITE) && map->frozen) {
- err = -EPERM;
- goto out;
+ if (vma->vm_flags & VM_WRITE) {
+ if (map->frozen) {
+ err = -EPERM;
+ goto out;
+ }
+ /* map is meant to be read-only, so do not allow mapping as
+ * writable, because it's possible to leak a writable page
+ * reference and allows user-space to still modify it after
+ * freezing, while verifier will assume contents do not change
+ */
+ if (map->map_flags & BPF_F_RDONLY_PROG) {
+ err = -EACCES;
+ goto out;
+ }
}
/* set default open/close callbacks */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8d7ee40e2748..efe14cf24bc6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1168,14 +1168,14 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
* but must be positive otherwise set to worse case bounds
* and refine later from tnum.
*/
- if (reg->s32_min_value > 0)
- reg->smin_value = reg->s32_min_value;
- else
- reg->smin_value = 0;
- if (reg->s32_max_value > 0)
+ if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
reg->smax_value = reg->s32_max_value;
else
reg->smax_value = U32_MAX;
+ if (reg->s32_min_value >= 0)
+ reg->smin_value = reg->s32_min_value;
+ else
+ reg->smin_value = 0;
}
static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
@@ -10428,22 +10428,13 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
#define SECURITY_PREFIX "security_"
-static int check_attach_modify_return(struct bpf_verifier_env *env)
+static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
{
- struct bpf_prog *prog = env->prog;
- unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr;
-
- /* This is expected to be cleaned up in the future with the KRSI effort
- * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h.
- */
if (within_error_injection_list(addr) ||
!strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
sizeof(SECURITY_PREFIX) - 1))
return 0;
- verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n",
- prog->aux->attach_btf_id, prog->aux->attach_func_name);
-
return -EINVAL;
}
@@ -10654,11 +10645,18 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
goto out;
}
}
+
+ if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+ ret = check_attach_modify_return(prog, addr);
+ if (ret)
+ verbose(env, "%s() is not modifiable\n",
+ prog->aux->attach_func_name);
+ }
+
+ if (ret)
+ goto out;
tr->func.addr = (void *)addr;
prog->aux->trampoline = tr;
-
- if (prog->expected_attach_type == BPF_MODIFY_RETURN)
- ret = check_attach_modify_return(env);
out:
mutex_unlock(&tr->mutex);
if (ret)
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 6f87352f8219..41ca996568df 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -33,12 +33,9 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
return;
/*
- * Paired with the one in cgroup_rstat_cpu_pop_updated(). Either we
- * see NULL updated_next or they see our updated stat.
- */
- smp_mb();
-
- /*
+ * Speculative already-on-list test. This may race leading to
+ * temporary inaccuracies, which is fine.
+ *
* Because @parent's updated_children is terminated with @parent
* instead of NULL, we can tell whether @cgrp is on the list by
* testing the next pointer for NULL.
@@ -134,13 +131,6 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
*nextp = rstatc->updated_next;
rstatc->updated_next = NULL;
- /*
- * Paired with the one in cgroup_rstat_cpu_updated().
- * Either they see NULL updated_next or we see their
- * updated stat.
- */
- smp_mb();
-
return pos;
}
diff --git a/kernel/compat.c b/kernel/compat.c
index 843dd17e6078..b8d2800bb4b7 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -199,7 +199,7 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
- if (!user_access_begin(umask, bitmap_size / 8))
+ if (!user_read_access_begin(umask, bitmap_size / 8))
return -EFAULT;
while (nr_compat_longs > 1) {
@@ -211,11 +211,11 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
}
if (nr_compat_longs)
unsafe_get_user(*mask, umask++, Efault);
- user_access_end();
+ user_read_access_end();
return 0;
Efault:
- user_access_end();
+ user_read_access_end();
return -EFAULT;
}
@@ -228,7 +228,7 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
bitmap_size = ALIGN(bitmap_size, BITS_PER_COMPAT_LONG);
nr_compat_longs = BITS_TO_COMPAT_LONGS(bitmap_size);
- if (!user_access_begin(umask, bitmap_size / 8))
+ if (!user_write_access_begin(umask, bitmap_size / 8))
return -EFAULT;
while (nr_compat_longs > 1) {
@@ -239,10 +239,10 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
}
if (nr_compat_longs)
unsafe_put_user((compat_ulong_t)*mask, umask++, Efault);
- user_access_end();
+ user_write_access_end();
return 0;
Efault:
- user_access_end();
+ user_write_access_end();
return -EFAULT;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 244d30544377..6ff2578ecf17 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -433,7 +433,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
/*
* On x86 it's required to boot all logical CPUs at least once so
* that the init code can get a chance to set CR4.MCE on each
- * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
+ * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
* core will shutdown the machine.
*/
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
@@ -1343,7 +1343,7 @@ void bringup_nonboot_cpus(unsigned int setup_max_cpus)
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
-int __freeze_secondary_cpus(int primary, bool suspend)
+int freeze_secondary_cpus(int primary)
{
int cpu, error = 0;
@@ -1368,7 +1368,7 @@ int __freeze_secondary_cpus(int primary, bool suspend)
if (cpu == primary)
continue;
- if (suspend && pm_wakeup_pending()) {
+ if (pm_wakeup_pending()) {
pr_info("Wakeup pending. Abort CPU freeze\n");
error = -EBUSY;
break;
@@ -1392,8 +1392,8 @@ int __freeze_secondary_cpus(int primary, bool suspend)
/*
* Make sure the CPUs won't be enabled by someone else. We need to do
- * this even in case of failure as all disable_nonboot_cpus() users are
- * supposed to do enable_nonboot_cpus() on the failure path.
+ * this even in case of failure as all freeze_secondary_cpus() users are
+ * supposed to do thaw_secondary_cpus() on the failure path.
*/
cpu_hotplug_disabled++;
@@ -1401,15 +1401,15 @@ int __freeze_secondary_cpus(int primary, bool suspend)
return error;
}
-void __weak arch_enable_nonboot_cpus_begin(void)
+void __weak arch_thaw_secondary_cpus_begin(void)
{
}
-void __weak arch_enable_nonboot_cpus_end(void)
+void __weak arch_thaw_secondary_cpus_end(void)
{
}
-void enable_nonboot_cpus(void)
+void thaw_secondary_cpus(void)
{
int cpu, error;
@@ -1421,7 +1421,7 @@ void enable_nonboot_cpus(void)
pr_info("Enabling non-boot CPUs ...\n");
- arch_enable_nonboot_cpus_begin();
+ arch_thaw_secondary_cpus_begin();
for_each_cpu(cpu, frozen_cpus) {
trace_suspend_resume(TPS("CPU_ON"), cpu, true);
@@ -1434,7 +1434,7 @@ void enable_nonboot_cpus(void)
pr_warn("Error taking CPU%d up: %d\n", cpu, error);
}
- arch_enable_nonboot_cpus_end();
+ arch_thaw_secondary_cpus_end();
cpumask_clear(frozen_cpus);
out:
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 9c23ae074b40..92da32275af5 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -6,12 +6,6 @@
#include <linux/export.h>
/*
- * If we have booted due to a crash, max_pfn will be a very low value. We need
- * to know the amount of memory that the previous kernel used.
- */
-unsigned long saved_max_pfn;
-
-/*
* stores the physical address of elf header of crash image
*
* Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index d14cbc83986a..914ff5a58dd5 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -20,23 +20,6 @@ struct page **dma_common_find_pages(void *cpu_addr)
return area->pages;
}
-static struct vm_struct *__dma_common_pages_remap(struct page **pages,
- size_t size, pgprot_t prot, const void *caller)
-{
- struct vm_struct *area;
-
- area = get_vm_area_caller(size, VM_DMA_COHERENT, caller);
- if (!area)
- return NULL;
-
- if (map_vm_area(area, prot, pages)) {
- vunmap(area->addr);
- return NULL;
- }
-
- return area;
-}
-
/*
* Remaps an array of PAGE_SIZE pages into another vm_area.
* Cannot be used in non-sleeping contexts
@@ -44,15 +27,12 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages,
void *dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller)
{
- struct vm_struct *area;
+ void *vaddr;
- area = __dma_common_pages_remap(pages, size, prot, caller);
- if (!area)
- return NULL;
-
- area->pages = pages;
-
- return area->addr;
+ vaddr = vmap(pages, size >> PAGE_SHIFT, VM_DMA_COHERENT, prot);
+ if (vaddr)
+ find_vm_area(vaddr)->pages = pages;
+ return vaddr;
}
/*
@@ -62,24 +42,20 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
void *dma_common_contiguous_remap(struct page *page, size_t size,
pgprot_t prot, const void *caller)
{
- int i;
+ int count = size >> PAGE_SHIFT;
struct page **pages;
- struct vm_struct *area;
+ void *vaddr;
+ int i;
- pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
+ pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return NULL;
-
- for (i = 0; i < (size >> PAGE_SHIFT); i++)
+ for (i = 0; i < count; i++)
pages[i] = nth_page(page, i);
-
- area = __dma_common_pages_remap(pages, size, prot, caller);
-
+ vaddr = vmap(pages, count, VM_DMA_COHERENT, prot);
kfree(pages);
- if (!area)
- return NULL;
- return area->addr;
+ return vaddr;
}
/*
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index c2b41a263166..b1991043b7d8 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -16,7 +16,7 @@
struct callchain_cpus_entries {
struct rcu_head rcu_head;
- struct perf_callchain_entry *cpu_entries[0];
+ struct perf_callchain_entry *cpu_entries[];
};
int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 633b4ae72ed5..e296c5c59c6f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -95,11 +95,11 @@ static void remote_function(void *data)
* @info: the function call argument
*
* Calls the function @func when the task is currently running. This might
- * be on the current CPU, which just calls the function directly
+ * be on the current CPU, which just calls the function directly. This will
+ * retry due to any failures in smp_call_function_single(), such as if the
+ * task_cpu() goes offline concurrently.
*
- * returns: @func return value, or
- * -ESRCH - when the process isn't running
- * -EAGAIN - when the process moved away
+ * returns @func return value or -ESRCH when the process isn't running
*/
static int
task_function_call(struct task_struct *p, remote_function_f func, void *info)
@@ -112,11 +112,16 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
};
int ret;
- do {
- ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
- if (!ret)
- ret = data.ret;
- } while (ret == -EAGAIN);
+ for (;;) {
+ ret = smp_call_function_single(task_cpu(p), remote_function,
+ &data, 1);
+ ret = !ret ? data.ret : -EAGAIN;
+
+ if (ret != -EAGAIN)
+ break;
+
+ cond_resched();
+ }
return ret;
}
@@ -9404,7 +9409,7 @@ static int perf_kprobe_event_init(struct perf_event *event)
if (event->attr.type != perf_kprobe.type)
return -ENOENT;
- if (!capable(CAP_SYS_ADMIN))
+ if (!perfmon_capable())
return -EACCES;
/*
@@ -9464,7 +9469,7 @@ static int perf_uprobe_event_init(struct perf_event *event)
if (event->attr.type != perf_uprobe.type)
return -ENOENT;
- if (!capable(CAP_SYS_ADMIN))
+ if (!perfmon_capable())
return -EACCES;
/*
@@ -11511,7 +11516,7 @@ SYSCALL_DEFINE5(perf_event_open,
}
if (attr.namespaces) {
- if (!capable(CAP_SYS_ADMIN))
+ if (!perfmon_capable())
return -EACCES;
}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index f16f66b6b655..fcbf5616a441 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -55,7 +55,7 @@ struct perf_buffer {
void *aux_priv;
struct perf_event_mmap_page *user_page;
- void *data_pages[0];
+ void *data_pages[];
};
extern void rb_free(struct perf_buffer *rb);
diff --git a/kernel/exit.c b/kernel/exit.c
index d56fe51bdf07..c81805a6e03b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1565,7 +1565,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
if (!infop)
return err;
- if (!user_access_begin(infop, sizeof(*infop)))
+ if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1574,10 +1574,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
unsafe_put_user(info.pid, &infop->si_pid, Efault);
unsafe_put_user(info.uid, &infop->si_uid, Efault);
unsafe_put_user(info.status, &infop->si_status, Efault);
- user_access_end();
+ user_write_access_end();
return err;
Efault:
- user_access_end();
+ user_write_access_end();
return -EFAULT;
}
@@ -1692,7 +1692,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
if (!infop)
return err;
- if (!user_access_begin(infop, sizeof(*infop)))
+ if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1701,10 +1701,10 @@ COMPAT_SYSCALL_DEFINE5(waitid,
unsafe_put_user(info.pid, &infop->si_pid, Efault);
unsafe_put_user(info.uid, &infop->si_uid, Efault);
unsafe_put_user(info.status, &infop->si_status, Efault);
- user_access_end();
+ user_write_access_end();
return err;
Efault:
- user_access_end();
+ user_write_access_end();
return -EFAULT;
}
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index c40478e749a7..be98e94cb3cc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -840,6 +843,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif
+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;
+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/futex.c b/kernel/futex.c
index b59532862bc0..b4b9f960b610 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -486,10 +486,13 @@ static u64 get_inode_sequence_number(struct inode *inode)
* The key words are stored in @key on success.
*
* For shared mappings (when @fshared), the key is:
+ *
* ( inode->i_sequence, page->index, offset_within_page )
+ *
* [ also see get_inode_sequence_number() ]
*
* For private mappings (or when !@fshared), the key is:
+ *
* ( current->mm, address, 0 )
*
* This allows (cross process, where applicable) identification of the futex
diff --git a/kernel/groups.c b/kernel/groups.c
index daae2f2dc6d4..6ee6691f6839 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -20,7 +20,7 @@ struct group_info *groups_alloc(int gidsetsize)
len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
if (!gi)
- gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
+ gi = __vmalloc(len, GFP_KERNEL_ACCOUNT);
if (!gi)
return NULL;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 20d501af4f2e..d63c324895ea 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -72,6 +72,7 @@ config IRQ_DOMAIN
config IRQ_SIM
bool
select IRQ_WORK
+ select IRQ_DOMAIN
# Support for hierarchical irq domains
config IRQ_DOMAIN_HIERARCHY
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index b992f88c5613..48006608baf0 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -1,14 +1,31 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
+ * Copyright (C) 2020 Bartosz Golaszewski <bgolaszewski@baylibre.com>
*/
-#include <linux/slab.h>
-#include <linux/irq_sim.h>
#include <linux/irq.h>
+#include <linux/irq_sim.h>
+#include <linux/irq_work.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+
+struct irq_sim_work_ctx {
+ struct irq_work work;
+ int irq_base;
+ unsigned int irq_count;
+ unsigned long *pending;
+ struct irq_domain *domain;
+};
+
+struct irq_sim_irq_ctx {
+ int irqnum;
+ bool enabled;
+ struct irq_sim_work_ctx *work_ctx;
+};
struct irq_sim_devres {
- struct irq_sim *sim;
+ struct irq_domain *domain;
};
static void irq_sim_irqmask(struct irq_data *data)
@@ -36,159 +53,205 @@ static int irq_sim_set_type(struct irq_data *data, unsigned int type)
return 0;
}
+static int irq_sim_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which, bool *state)
+{
+ struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
+
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ if (irq_ctx->enabled)
+ *state = test_bit(hwirq, irq_ctx->work_ctx->pending);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int irq_sim_set_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which, bool state)
+{
+ struct irq_sim_irq_ctx *irq_ctx = irq_data_get_irq_chip_data(data);
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
+
+ switch (which) {
+ case IRQCHIP_STATE_PENDING:
+ if (irq_ctx->enabled) {
+ assign_bit(hwirq, irq_ctx->work_ctx->pending, state);
+ if (state)
+ irq_work_queue(&irq_ctx->work_ctx->work);
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct irq_chip irq_sim_irqchip = {
- .name = "irq_sim",
- .irq_mask = irq_sim_irqmask,
- .irq_unmask = irq_sim_irqunmask,
- .irq_set_type = irq_sim_set_type,
+ .name = "irq_sim",
+ .irq_mask = irq_sim_irqmask,
+ .irq_unmask = irq_sim_irqunmask,
+ .irq_set_type = irq_sim_set_type,
+ .irq_get_irqchip_state = irq_sim_get_irqchip_state,
+ .irq_set_irqchip_state = irq_sim_set_irqchip_state,
};
static void irq_sim_handle_irq(struct irq_work *work)
{
struct irq_sim_work_ctx *work_ctx;
unsigned int offset = 0;
- struct irq_sim *sim;
int irqnum;
work_ctx = container_of(work, struct irq_sim_work_ctx, work);
- sim = container_of(work_ctx, struct irq_sim, work_ctx);
- while (!bitmap_empty(work_ctx->pending, sim->irq_count)) {
+ while (!bitmap_empty(work_ctx->pending, work_ctx->irq_count)) {
offset = find_next_bit(work_ctx->pending,
- sim->irq_count, offset);
+ work_ctx->irq_count, offset);
clear_bit(offset, work_ctx->pending);
- irqnum = irq_sim_irqnum(sim, offset);
+ irqnum = irq_find_mapping(work_ctx->domain, offset);
handle_simple_irq(irq_to_desc(irqnum));
}
}
+static int irq_sim_domain_map(struct irq_domain *domain,
+ unsigned int virq, irq_hw_number_t hw)
+{
+ struct irq_sim_work_ctx *work_ctx = domain->host_data;
+ struct irq_sim_irq_ctx *irq_ctx;
+
+ irq_ctx = kzalloc(sizeof(*irq_ctx), GFP_KERNEL);
+ if (!irq_ctx)
+ return -ENOMEM;
+
+ irq_set_chip(virq, &irq_sim_irqchip);
+ irq_set_chip_data(virq, irq_ctx);
+ irq_set_handler(virq, handle_simple_irq);
+ irq_modify_status(virq, IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
+ irq_ctx->work_ctx = work_ctx;
+
+ return 0;
+}
+
+static void irq_sim_domain_unmap(struct irq_domain *domain, unsigned int virq)
+{
+ struct irq_sim_irq_ctx *irq_ctx;
+ struct irq_data *irqd;
+
+ irqd = irq_domain_get_irq_data(domain, virq);
+ irq_ctx = irq_data_get_irq_chip_data(irqd);
+
+ irq_set_handler(virq, NULL);
+ irq_domain_reset_irq_data(irqd);
+ kfree(irq_ctx);
+}
+
+static const struct irq_domain_ops irq_sim_domain_ops = {
+ .map = irq_sim_domain_map,
+ .unmap = irq_sim_domain_unmap,
+};
+
/**
- * irq_sim_init - Initialize the interrupt simulator: allocate a range of
- * dummy interrupts.
+ * irq_domain_create_sim - Create a new interrupt simulator irq_domain and
+ * allocate a range of dummy interrupts.
*
- * @sim: The interrupt simulator object to initialize.
- * @num_irqs: Number of interrupts to allocate
+ * @fnode: struct fwnode_handle to be associated with this domain.
+ * @num_irqs: Number of interrupts to allocate.
*
- * On success: return the base of the allocated interrupt range.
- * On failure: a negative errno.
+ * On success: return a new irq_domain object.
+ * On failure: a negative errno wrapped with ERR_PTR().
*/
-int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs)
+struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode,
+ unsigned int num_irqs)
{
- int i;
+ struct irq_sim_work_ctx *work_ctx;
- sim->irqs = kmalloc_array(num_irqs, sizeof(*sim->irqs), GFP_KERNEL);
- if (!sim->irqs)
- return -ENOMEM;
+ work_ctx = kmalloc(sizeof(*work_ctx), GFP_KERNEL);
+ if (!work_ctx)
+ goto err_out;
- sim->irq_base = irq_alloc_descs(-1, 0, num_irqs, 0);
- if (sim->irq_base < 0) {
- kfree(sim->irqs);
- return sim->irq_base;
- }
+ work_ctx->pending = bitmap_zalloc(num_irqs, GFP_KERNEL);
+ if (!work_ctx->pending)
+ goto err_free_work_ctx;
- sim->work_ctx.pending = bitmap_zalloc(num_irqs, GFP_KERNEL);
- if (!sim->work_ctx.pending) {
- kfree(sim->irqs);
- irq_free_descs(sim->irq_base, num_irqs);
- return -ENOMEM;
- }
+ work_ctx->domain = irq_domain_create_linear(fwnode, num_irqs,
+ &irq_sim_domain_ops,
+ work_ctx);
+ if (!work_ctx->domain)
+ goto err_free_bitmap;
- for (i = 0; i < num_irqs; i++) {
- sim->irqs[i].irqnum = sim->irq_base + i;
- sim->irqs[i].enabled = false;
- irq_set_chip(sim->irq_base + i, &irq_sim_irqchip);
- irq_set_chip_data(sim->irq_base + i, &sim->irqs[i]);
- irq_set_handler(sim->irq_base + i, &handle_simple_irq);
- irq_modify_status(sim->irq_base + i,
- IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE);
- }
+ work_ctx->irq_count = num_irqs;
+ init_irq_work(&work_ctx->work, irq_sim_handle_irq);
- init_irq_work(&sim->work_ctx.work, irq_sim_handle_irq);
- sim->irq_count = num_irqs;
+ return work_ctx->domain;
- return sim->irq_base;
+err_free_bitmap:
+ bitmap_free(work_ctx->pending);
+err_free_work_ctx:
+ kfree(work_ctx);
+err_out:
+ return ERR_PTR(-ENOMEM);
}
-EXPORT_SYMBOL_GPL(irq_sim_init);
+EXPORT_SYMBOL_GPL(irq_domain_create_sim);
/**
- * irq_sim_fini - Deinitialize the interrupt simulator: free the interrupt
- * descriptors and allocated memory.
+ * irq_domain_remove_sim - Deinitialize the interrupt simulator domain: free
+ * the interrupt descriptors and allocated memory.
*
- * @sim: The interrupt simulator to tear down.
+ * @domain: The interrupt simulator domain to tear down.
*/
-void irq_sim_fini(struct irq_sim *sim)
+void irq_domain_remove_sim(struct irq_domain *domain)
{
- irq_work_sync(&sim->work_ctx.work);
- bitmap_free(sim->work_ctx.pending);
- irq_free_descs(sim->irq_base, sim->irq_count);
- kfree(sim->irqs);
+ struct irq_sim_work_ctx *work_ctx = domain->host_data;
+
+ irq_work_sync(&work_ctx->work);
+ bitmap_free(work_ctx->pending);
+ kfree(work_ctx);
+
+ irq_domain_remove(domain);
}
-EXPORT_SYMBOL_GPL(irq_sim_fini);
+EXPORT_SYMBOL_GPL(irq_domain_remove_sim);
-static void devm_irq_sim_release(struct device *dev, void *res)
+static void devm_irq_domain_release_sim(struct device *dev, void *res)
{
struct irq_sim_devres *this = res;
- irq_sim_fini(this->sim);
+ irq_domain_remove_sim(this->domain);
}
/**
- * irq_sim_init - Initialize the interrupt simulator for a managed device.
+ * devm_irq_domain_create_sim - Create a new interrupt simulator for
+ * a managed device.
*
* @dev: Device to initialize the simulator object for.
- * @sim: The interrupt simulator object to initialize.
+ * @fnode: struct fwnode_handle to be associated with this domain.
* @num_irqs: Number of interrupts to allocate
*
- * On success: return the base of the allocated interrupt range.
- * On failure: a negative errno.
+ * On success: return a new irq_domain object.
+ * On failure: a negative errno wrapped with ERR_PTR().
*/
-int devm_irq_sim_init(struct device *dev, struct irq_sim *sim,
- unsigned int num_irqs)
+struct irq_domain *devm_irq_domain_create_sim(struct device *dev,
+ struct fwnode_handle *fwnode,
+ unsigned int num_irqs)
{
struct irq_sim_devres *dr;
- int rv;
- dr = devres_alloc(devm_irq_sim_release, sizeof(*dr), GFP_KERNEL);
+ dr = devres_alloc(devm_irq_domain_release_sim,
+ sizeof(*dr), GFP_KERNEL);
if (!dr)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- rv = irq_sim_init(sim, num_irqs);
- if (rv < 0) {
+ dr->domain = irq_domain_create_sim(fwnode, num_irqs);
+ if (IS_ERR(dr->domain)) {
devres_free(dr);
- return rv;
+ return dr->domain;
}
- dr->sim = sim;
devres_add(dev, dr);
-
- return rv;
-}
-EXPORT_SYMBOL_GPL(devm_irq_sim_init);
-
-/**
- * irq_sim_fire - Enqueue an interrupt.
- *
- * @sim: The interrupt simulator object.
- * @offset: Offset of the simulated interrupt which should be fired.
- */
-void irq_sim_fire(struct irq_sim *sim, unsigned int offset)
-{
- if (sim->irqs[offset].enabled) {
- set_bit(offset, sim->work_ctx.pending);
- irq_work_queue(&sim->work_ctx.work);
- }
-}
-EXPORT_SYMBOL_GPL(irq_sim_fire);
-
-/**
- * irq_sim_irqnum - Get the allocated number of a dummy interrupt.
- *
- * @sim: The interrupt simulator object.
- * @offset: Offset of the simulated interrupt for which to retrieve
- * the number.
- */
-int irq_sim_irqnum(struct irq_sim *sim, unsigned int offset)
-{
- return sim->irqs[offset].irqnum;
+ return dr->domain;
}
-EXPORT_SYMBOL_GPL(irq_sim_irqnum);
+EXPORT_SYMBOL_GPL(devm_irq_domain_create_sim);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 35b8d97c3a1d..a4c2c915511d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -132,14 +132,13 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
const struct irq_domain_ops *ops,
void *host_data)
{
- struct device_node *of_node = to_of_node(fwnode);
struct irqchip_fwid *fwid;
struct irq_domain *domain;
static atomic_t unknown_domains;
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
- GFP_KERNEL, of_node_to_nid(of_node));
+ GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
if (!domain)
return NULL;
@@ -162,30 +161,16 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
domain->name = fwid->name;
break;
}
-#ifdef CONFIG_ACPI
- } else if (is_acpi_device_node(fwnode)) {
- struct acpi_buffer buf = {
- .length = ACPI_ALLOCATE_BUFFER,
- };
- acpi_handle handle;
-
- handle = acpi_device_handle(to_acpi_device_node(fwnode));
- if (acpi_get_name(handle, ACPI_FULL_PATHNAME, &buf) == AE_OK) {
- domain->name = buf.pointer;
- domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
- }
-
- domain->fwnode = fwnode;
-#endif
- } else if (of_node) {
+ } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) ||
+ is_software_node(fwnode)) {
char *name;
/*
- * DT paths contain '/', which debugfs is legitimately
+ * fwnode paths contain '/', which debugfs is legitimately
* unhappy about. Replace them with ':', which does
* the trick and is not as offensive as '\'...
*/
- name = kasprintf(GFP_KERNEL, "%pOF", of_node);
+ name = kasprintf(GFP_KERNEL, "%pfw", fwnode);
if (!name) {
kfree(domain);
return NULL;
@@ -210,7 +195,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
}
- of_node_get(of_node);
+ fwnode_handle_get(fwnode);
/* Fill structure */
INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
@@ -259,7 +244,7 @@ void irq_domain_remove(struct irq_domain *domain)
pr_debug("Removed domain %s\n", domain->name);
- of_node_put(irq_domain_get_of_node(domain));
+ fwnode_handle_put(domain->fwnode);
if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
kfree(domain->name);
kfree(domain);
@@ -1047,6 +1032,18 @@ int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
return virq;
}
+/**
+ * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
+ * @irq_data: The pointer to irq_data
+ */
+void irq_domain_reset_irq_data(struct irq_data *irq_data)
+{
+ irq_data->hwirq = 0;
+ irq_data->chip = &no_irq_chip;
+ irq_data->chip_data = NULL;
+}
+EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
+
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
/**
* irq_domain_create_hierarchy - Add a irqdomain into the hierarchy
@@ -1248,18 +1245,6 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
EXPORT_SYMBOL(irq_domain_set_info);
/**
- * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
- * @irq_data: The pointer to irq_data
- */
-void irq_domain_reset_irq_data(struct irq_data *irq_data)
-{
- irq_data->hwirq = 0;
- irq_data->chip = &no_irq_chip;
- irq_data->chip_data = NULL;
-}
-EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
-
-/**
* irq_domain_free_irqs_common - Clear irq_data and free the parent
* @domain: Interrupt domain to match
* @virq: IRQ number to start with
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 453a8a0f4804..761911168438 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2619,6 +2619,8 @@ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
do {
chip = irq_data_get_irq_chip(data);
+ if (WARN_ON_ONCE(!chip))
+ return -ENODEV;
if (chip->irq_get_irqchip_state)
break;
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
@@ -2696,6 +2698,8 @@ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
do {
chip = irq_data_get_irq_chip(data);
+ if (WARN_ON_ONCE(!chip))
+ return -ENODEV;
if (chip->irq_set_irqchip_state)
break;
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2625c241ac00..3f310df4a693 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2179,6 +2179,24 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
return 0;
}
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
+{
+ struct kprobe_blacklist_entry *ent, *n;
+
+ list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
+ if (ent->start_addr < start || ent->start_addr >= end)
+ continue;
+ list_del(&ent->list);
+ kfree(ent);
+ }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+ kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
int __init __weak arch_populate_kprobe_blacklist(void)
{
return 0;
@@ -2211,10 +2229,62 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
/* Symbols in __kprobes_text are blacklisted */
ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
(unsigned long)__kprobes_text_end);
+ if (ret)
+ return ret;
+
+ /* Symbols in noinstr section are blacklisted */
+ ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
+ (unsigned long)__noinstr_text_end);
return ret ? : arch_populate_kprobe_blacklist();
}
+static void add_module_kprobe_blacklist(struct module *mod)
+{
+ unsigned long start, end;
+ int i;
+
+ if (mod->kprobe_blacklist) {
+ for (i = 0; i < mod->num_kprobe_blacklist; i++)
+ kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
+ }
+
+ start = (unsigned long)mod->kprobes_text_start;
+ if (start) {
+ end = start + mod->kprobes_text_size;
+ kprobe_add_area_blacklist(start, end);
+ }
+
+ start = (unsigned long)mod->noinstr_text_start;
+ if (start) {
+ end = start + mod->noinstr_text_size;
+ kprobe_add_area_blacklist(start, end);
+ }
+}
+
+static void remove_module_kprobe_blacklist(struct module *mod)
+{
+ unsigned long start, end;
+ int i;
+
+ if (mod->kprobe_blacklist) {
+ for (i = 0; i < mod->num_kprobe_blacklist; i++)
+ kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
+ }
+
+ start = (unsigned long)mod->kprobes_text_start;
+ if (start) {
+ end = start + mod->kprobes_text_size;
+ kprobe_remove_area_blacklist(start, end);
+ }
+
+ start = (unsigned long)mod->noinstr_text_start;
+ if (start) {
+ end = start + mod->noinstr_text_size;
+ kprobe_remove_area_blacklist(start, end);
+ }
+}
+
/* Module notifier call back, checking kprobes on the module */
static int kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
@@ -2225,6 +2295,11 @@ static int kprobes_module_callback(struct notifier_block *nb,
unsigned int i;
int checkcore = (val == MODULE_STATE_GOING);
+ if (val == MODULE_STATE_COMING) {
+ mutex_lock(&kprobe_mutex);
+ add_module_kprobe_blacklist(mod);
+ mutex_unlock(&kprobe_mutex);
+ }
if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
return NOTIFY_DONE;
@@ -2255,6 +2330,8 @@ static int kprobes_module_callback(struct notifier_block *nb,
kill_kprobe(p);
}
}
+ if (val == MODULE_STATE_GOING)
+ remove_module_kprobe_blacklist(mod);
mutex_unlock(&kprobe_mutex);
return NOTIFY_DONE;
}
@@ -2420,6 +2497,7 @@ static const struct file_operations debugfs_kprobes_operations = {
/* kprobes/blacklist -- shows which functions can not be probed */
static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
{
+ mutex_lock(&kprobe_mutex);
return seq_list_start(&kprobe_blacklist, *pos);
}
@@ -2446,10 +2524,15 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
return 0;
}
+static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
+{
+ mutex_unlock(&kprobe_mutex);
+}
+
static const struct seq_operations kprobe_blacklist_seq_ops = {
.start = kprobe_blacklist_seq_start,
.next = kprobe_blacklist_seq_next,
- .stop = kprobe_seq_stop, /* Reuse void function */
+ .stop = kprobe_blacklist_seq_stop,
.show = kprobe_blacklist_seq_show,
};
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 6f1c8cba09c6..dd3cc0854c32 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -470,7 +470,7 @@ struct lock_trace {
struct hlist_node hash_entry;
u32 hash;
u32 nr_entries;
- unsigned long entries[0] __aligned(sizeof(unsigned long));
+ unsigned long entries[] __aligned(sizeof(unsigned long));
};
#define LOCK_TRACE_SIZE_IN_LONGS \
(sizeof(struct lock_trace) / sizeof(unsigned long))
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index c9f090d64f00..cfdd5b93264d 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -141,7 +141,6 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
* set up.
*/
#ifndef CONFIG_DEBUG_RT_MUTEXES
-# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
@@ -202,7 +201,6 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#else
-# define rt_mutex_cmpxchg_relaxed(l,c,n) (0)
# define rt_mutex_cmpxchg_acquire(l,c,n) (0)
# define rt_mutex_cmpxchg_release(l,c,n) (0)
diff --git a/kernel/module.c b/kernel/module.c
index 646f1e2330d2..a0f201d2e184 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2400,7 +2400,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
- || strstarts(sname, ".init"))
+ || module_init_section(sname))
continue;
s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
pr_debug("\t%s\n", sname);
@@ -2433,7 +2433,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
if ((s->sh_flags & masks[m][0]) != masks[m][0]
|| (s->sh_flags & masks[m][1])
|| s->sh_entsize != ~0UL
- || !strstarts(sname, ".init"))
+ || !module_init_section(sname))
continue;
s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i)
| INIT_OFFSET_MASK);
@@ -2768,6 +2768,11 @@ void * __weak module_alloc(unsigned long size)
return vmalloc_exec(size);
}
+bool __weak module_init_section(const char *name)
+{
+ return strstarts(name, ".init");
+}
+
bool __weak module_exit_section(const char *name)
{
return strstarts(name, ".exit");
@@ -2946,8 +2951,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
return err;
/* Suck in entire file: we'll want most of it. */
- info->hdr = __vmalloc(info->len,
- GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL);
+ info->hdr = __vmalloc(info->len, GFP_KERNEL | __GFP_NOWARN);
if (!info->hdr)
return -ENOMEM;
@@ -3150,6 +3154,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
}
#endif
+ mod->noinstr_text_start = section_objs(info, ".noinstr.text", 1,
+ &mod->noinstr_text_size);
+
#ifdef CONFIG_TRACEPOINTS
mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
sizeof(*mod->tracepoints_ptrs),
@@ -3194,6 +3201,13 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->ei_funcs),
&mod->num_ei_funcs);
#endif
+#ifdef CONFIG_KPROBES
+ mod->kprobes_text_start = section_objs(info, ".kprobes.text", 1,
+ &mod->kprobes_text_size);
+ mod->kprobe_blacklist = section_objs(info, "_kprobe_blacklist",
+ sizeof(unsigned long),
+ &mod->num_kprobe_blacklist);
+#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 5989bbb93039..84c987dfbe03 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -519,7 +519,6 @@ NOKPROBE_SYMBOL(notify_die);
int register_die_notifier(struct notifier_block *nb)
{
- vmalloc_sync_mappings();
return atomic_notifier_chain_register(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(register_die_notifier);
diff --git a/kernel/padata.c b/kernel/padata.c
index a6afa12fb75e..aae789896616 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -703,7 +703,7 @@ static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
struct padata_instance *pinst;
int ret;
- pinst = hlist_entry_safe(node, struct padata_instance, node);
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
if (!pinst_has_cpu(pinst, cpu))
return 0;
@@ -718,7 +718,7 @@ static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
struct padata_instance *pinst;
int ret;
- pinst = hlist_entry_safe(node, struct padata_instance, node);
+ pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
if (!pinst_has_cpu(pinst, cpu))
return 0;
@@ -734,8 +734,9 @@ static enum cpuhp_state hp_online;
static void __padata_free(struct padata_instance *pinst)
{
#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD, &pinst->node);
- cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
+ &pinst->cpu_dead_node);
+ cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
#endif
WARN_ON(!list_empty(&pinst->pslist));
@@ -939,9 +940,10 @@ static struct padata_instance *padata_alloc(const char *name,
mutex_init(&pinst->lock);
#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
+ cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
+ &pinst->cpu_online_node);
cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
- &pinst->node);
+ &pinst->cpu_dead_node);
#endif
put_online_cpus();
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index c208566c844b..4d0e6e815a2b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -80,6 +80,18 @@ config HIBERNATION
For more information take a look at <file:Documentation/power/swsusp.rst>.
+config HIBERNATION_SNAPSHOT_DEV
+ bool "Userspace snapshot device"
+ depends on HIBERNATION
+ default y
+ ---help---
+ Device used by the uswsusp tools.
+
+ Say N if no snapshotting from userspace is needed, this also
+ reduces the attack surface of the kernel.
+
+ If in doubt, say Y.
+
config PM_STD_PARTITION
string "Default resume partition"
depends on HIBERNATION
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index e7e47d9be1e5..5899260a8bef 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -10,7 +10,8 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
-obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o
+obj-$(CONFIG_HIBERNATION_SNAPSHOT_DEV) += user.o
obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 30bd28d1d418..02ec716a4927 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -67,6 +67,18 @@ bool freezer_test_done;
static const struct platform_hibernation_ops *hibernation_ops;
+static atomic_t hibernate_atomic = ATOMIC_INIT(1);
+
+bool hibernate_acquire(void)
+{
+ return atomic_add_unless(&hibernate_atomic, -1, 0);
+}
+
+void hibernate_release(void)
+{
+ atomic_inc(&hibernate_atomic);
+}
+
bool hibernation_available(void)
{
return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
@@ -704,7 +716,7 @@ int hibernate(void)
lock_system_sleep();
/* The snapshot device should not be opened while we're running */
- if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ if (!hibernate_acquire()) {
error = -EBUSY;
goto Unlock;
}
@@ -775,7 +787,7 @@ int hibernate(void)
Exit:
__pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL);
pm_restore_console();
- atomic_inc(&snapshot_device_available);
+ hibernate_release();
Unlock:
unlock_system_sleep();
pr_info("hibernation exit\n");
@@ -880,7 +892,7 @@ static int software_resume(void)
goto Unlock;
/* The snapshot device should not be opened while we're running */
- if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ if (!hibernate_acquire()) {
error = -EBUSY;
swsusp_close(FMODE_READ);
goto Unlock;
@@ -911,7 +923,7 @@ static int software_resume(void)
__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
pm_restore_console();
pr_info("resume failed (%d)\n", error);
- atomic_inc(&snapshot_device_available);
+ hibernate_release();
/* For success case, the suspend path will release the lock */
Unlock:
mutex_unlock(&system_transition_mutex);
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 7cdc64dc2373..ba2094db6294 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -154,8 +154,8 @@ extern int snapshot_write_next(struct snapshot_handle *handle);
extern void snapshot_write_finalize(struct snapshot_handle *handle);
extern int snapshot_image_loaded(struct snapshot_handle *handle);
-/* If unset, the snapshot device cannot be open. */
-extern atomic_t snapshot_device_available;
+extern bool hibernate_acquire(void);
+extern void hibernate_release(void);
extern sector_t alloc_swapdev_block(int swap);
extern void free_all_swap_pages(int swap);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 7959449765d9..d5eedc2baa2a 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -35,9 +35,13 @@ static struct snapshot_data {
bool ready;
bool platform_support;
bool free_bitmaps;
+ struct inode *bd_inode;
} snapshot_state;
-atomic_t snapshot_device_available = ATOMIC_INIT(1);
+int is_hibernate_resume_dev(const struct inode *bd_inode)
+{
+ return hibernation_available() && snapshot_state.bd_inode == bd_inode;
+}
static int snapshot_open(struct inode *inode, struct file *filp)
{
@@ -49,13 +53,13 @@ static int snapshot_open(struct inode *inode, struct file *filp)
lock_system_sleep();
- if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ if (!hibernate_acquire()) {
error = -EBUSY;
goto Unlock;
}
if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
- atomic_inc(&snapshot_device_available);
+ hibernate_release();
error = -ENOSYS;
goto Unlock;
}
@@ -92,11 +96,12 @@ static int snapshot_open(struct inode *inode, struct file *filp)
__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
}
if (error)
- atomic_inc(&snapshot_device_available);
+ hibernate_release();
data->frozen = false;
data->ready = false;
data->platform_support = false;
+ data->bd_inode = NULL;
Unlock:
unlock_system_sleep();
@@ -112,6 +117,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
swsusp_free();
data = filp->private_data;
+ data->bd_inode = NULL;
free_all_swap_pages(data->swap);
if (data->frozen) {
pm_restore_gfp_mask();
@@ -122,7 +128,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
}
pm_notifier_call_chain(data->mode == O_RDONLY ?
PM_POST_HIBERNATION : PM_POST_RESTORE);
- atomic_inc(&snapshot_device_available);
+ hibernate_release();
unlock_system_sleep();
@@ -204,6 +210,7 @@ struct compat_resume_swap_area {
static int snapshot_set_swap_area(struct snapshot_data *data,
void __user *argp)
{
+ struct block_device *bdev;
sector_t offset;
dev_t swdev;
@@ -234,9 +241,12 @@ static int snapshot_set_swap_area(struct snapshot_data *data,
data->swap = -1;
return -EINVAL;
}
- data->swap = swap_type_of(swdev, offset, NULL);
+ data->swap = swap_type_of(swdev, offset, &bdev);
if (data->swap < 0)
return -ENODEV;
+
+ data->bd_inode = bdev->bd_inode;
+ bdput(bdev);
return 0;
}
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index 11f19c466af5..3ca74ad391d6 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -6,6 +6,7 @@ struct console_cmdline
{
char name[16]; /* Name of the driver */
int index; /* Minor dev. to use */
+ bool user_specified; /* Specified by command line vs. platform */
char *options; /* Options for the driver */
#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
char *brl_options; /* Options for braille driver */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9a9b6156270b..9fdd6a42ad6a 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -280,6 +280,7 @@ static struct console *exclusive_console;
static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
static int preferred_console = -1;
+static bool has_preferred_console;
int console_set_on_cmdline;
EXPORT_SYMBOL(console_set_on_cmdline);
@@ -974,6 +975,16 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
user->idx = log_next_idx;
user->seq = log_next_seq;
break;
+ case SEEK_CUR:
+ /*
+ * It isn't supported due to the record nature of this
+ * interface: _SET _DATA and _END point to very specific
+ * record positions, while _CUR would be more useful in case
+ * of a byte-based log. Because of that, return the default
+ * errno value for invalid seek operation.
+ */
+ ret = -ESPIPE;
+ break;
default:
ret = -EINVAL;
}
@@ -2140,7 +2151,7 @@ asmlinkage __visible void early_printk(const char *fmt, ...)
#endif
static int __add_preferred_console(char *name, int idx, char *options,
- char *brl_options)
+ char *brl_options, bool user_specified)
{
struct console_cmdline *c;
int i;
@@ -2155,6 +2166,8 @@ static int __add_preferred_console(char *name, int idx, char *options,
if (strcmp(c->name, name) == 0 && c->index == idx) {
if (!brl_options)
preferred_console = i;
+ if (user_specified)
+ c->user_specified = true;
return 0;
}
}
@@ -2164,6 +2177,7 @@ static int __add_preferred_console(char *name, int idx, char *options,
preferred_console = i;
strlcpy(c->name, name, sizeof(c->name));
c->options = options;
+ c->user_specified = user_specified;
braille_set_options(c, brl_options);
c->index = idx;
@@ -2190,6 +2204,9 @@ static int __init console_setup(char *str)
char *s, *options, *brl_options = NULL;
int idx;
+ if (str[0] == 0)
+ return 1;
+
if (_braille_console_setup(&str, &brl_options))
return 1;
@@ -2218,7 +2235,7 @@ static int __init console_setup(char *str)
idx = simple_strtoul(s, NULL, 10);
*s = 0;
- __add_preferred_console(buf, idx, options, brl_options);
+ __add_preferred_console(buf, idx, options, brl_options, true);
console_set_on_cmdline = 1;
return 1;
}
@@ -2239,7 +2256,7 @@ __setup("console=", console_setup);
*/
int add_preferred_console(char *name, int idx, char *options)
{
- return __add_preferred_console(name, idx, options, NULL);
+ return __add_preferred_console(name, idx, options, NULL, false);
}
bool console_suspend_enabled = true;
@@ -2438,9 +2455,9 @@ again:
printk_safe_enter_irqsave(flags);
raw_spin_lock(&logbuf_lock);
if (console_seq < log_first_seq) {
- len = sprintf(text,
- "** %llu printk messages dropped **\n",
- log_first_seq - console_seq);
+ len = snprintf(text, sizeof(text),
+ "** %llu printk messages dropped **\n",
+ log_first_seq - console_seq);
/* messages are gone, move to first one */
console_seq = log_first_seq;
@@ -2652,6 +2669,63 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
/*
+ * This is called by register_console() to try to match
+ * the newly registered console with any of the ones selected
+ * by either the command line or add_preferred_console() and
+ * setup/enable it.
+ *
+ * Care need to be taken with consoles that are statically
+ * enabled such as netconsole
+ */
+static int try_enable_new_console(struct console *newcon, bool user_specified)
+{
+ struct console_cmdline *c;
+ int i;
+
+ for (i = 0, c = console_cmdline;
+ i < MAX_CMDLINECONSOLES && c->name[0];
+ i++, c++) {
+ if (c->user_specified != user_specified)
+ continue;
+ if (!newcon->match ||
+ newcon->match(newcon, c->name, c->index, c->options) != 0) {
+ /* default matching */
+ BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
+ if (strcmp(c->name, newcon->name) != 0)
+ continue;
+ if (newcon->index >= 0 &&
+ newcon->index != c->index)
+ continue;
+ if (newcon->index < 0)
+ newcon->index = c->index;
+
+ if (_braille_register_console(newcon, c))
+ return 0;
+
+ if (newcon->setup &&
+ newcon->setup(newcon, c->options) != 0)
+ return -EIO;
+ }
+ newcon->flags |= CON_ENABLED;
+ if (i == preferred_console) {
+ newcon->flags |= CON_CONSDEV;
+ has_preferred_console = true;
+ }
+ return 0;
+ }
+
+ /*
+ * Some consoles, such as pstore and netconsole, can be enabled even
+ * without matching. Accept the pre-enabled consoles only when match()
+ * and setup() had a change to be called.
+ */
+ if (newcon->flags & CON_ENABLED && c->user_specified == user_specified)
+ return 0;
+
+ return -ENOENT;
+}
+
+/*
* The console driver calls this routine during kernel initialization
* to register the console printing procedure with printk() and to
* print any messages that were printed by the kernel before the
@@ -2672,11 +2746,9 @@ early_param("keep_bootcon", keep_bootcon_setup);
*/
void register_console(struct console *newcon)
{
- int i;
unsigned long flags;
struct console *bcon = NULL;
- struct console_cmdline *c;
- static bool has_preferred;
+ int err;
for_each_console(bcon) {
if (WARN(bcon == newcon, "console '%s%d' already registered\n",
@@ -2701,15 +2773,15 @@ void register_console(struct console *newcon)
if (console_drivers && console_drivers->flags & CON_BOOT)
bcon = console_drivers;
- if (!has_preferred || bcon || !console_drivers)
- has_preferred = preferred_console >= 0;
+ if (!has_preferred_console || bcon || !console_drivers)
+ has_preferred_console = preferred_console >= 0;
/*
* See if we want to use this console driver. If we
* didn't select a console we take the first one
* that registers here.
*/
- if (!has_preferred) {
+ if (!has_preferred_console) {
if (newcon->index < 0)
newcon->index = 0;
if (newcon->setup == NULL ||
@@ -2717,47 +2789,20 @@ void register_console(struct console *newcon)
newcon->flags |= CON_ENABLED;
if (newcon->device) {
newcon->flags |= CON_CONSDEV;
- has_preferred = true;
+ has_preferred_console = true;
}
}
}
- /*
- * See if this console matches one we selected on
- * the command line.
- */
- for (i = 0, c = console_cmdline;
- i < MAX_CMDLINECONSOLES && c->name[0];
- i++, c++) {
- if (!newcon->match ||
- newcon->match(newcon, c->name, c->index, c->options) != 0) {
- /* default matching */
- BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
- if (strcmp(c->name, newcon->name) != 0)
- continue;
- if (newcon->index >= 0 &&
- newcon->index != c->index)
- continue;
- if (newcon->index < 0)
- newcon->index = c->index;
-
- if (_braille_register_console(newcon, c))
- return;
-
- if (newcon->setup &&
- newcon->setup(newcon, c->options) != 0)
- break;
- }
+ /* See if this console matches one we selected on the command line */
+ err = try_enable_new_console(newcon, true);
- newcon->flags |= CON_ENABLED;
- if (i == preferred_console) {
- newcon->flags |= CON_CONSDEV;
- has_preferred = true;
- }
- break;
- }
+ /* If not, try to match against the platform default(s) */
+ if (err == -ENOENT)
+ err = try_enable_new_console(newcon, false);
- if (!(newcon->flags & CON_ENABLED))
+ /* printk() messages are not printed to the Braille console. */
+ if (err || newcon->flags & CON_BRL)
return;
/*
@@ -2779,6 +2824,8 @@ void register_console(struct console *newcon)
console_drivers = newcon;
if (newcon->next)
newcon->next->flags &= ~CON_CONSDEV;
+ /* Ensure this flag is always set for the head of the list */
+ newcon->flags |= CON_CONSDEV;
} else {
newcon->next = console_drivers->next;
console_drivers->next = newcon;
@@ -3144,6 +3191,23 @@ EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
static bool always_kmsg_dump;
module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
+const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
+{
+ switch (reason) {
+ case KMSG_DUMP_PANIC:
+ return "Panic";
+ case KMSG_DUMP_OOPS:
+ return "Oops";
+ case KMSG_DUMP_EMERG:
+ return "Emergency";
+ case KMSG_DUMP_SHUTDOWN:
+ return "Shutdown";
+ default:
+ return "Unknown";
+ }
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
+
/**
* kmsg_dump - dump kernel log to kernel message dumpers.
* @reason: the reason (oops, panic etc) for dumping
@@ -3157,12 +3221,19 @@ void kmsg_dump(enum kmsg_dump_reason reason)
struct kmsg_dumper *dumper;
unsigned long flags;
- if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
- return;
-
rcu_read_lock();
list_for_each_entry_rcu(dumper, &dump_list, list) {
- if (dumper->max_reason && reason > dumper->max_reason)
+ enum kmsg_dump_reason max_reason = dumper->max_reason;
+
+ /*
+ * If client has not provided a specific max_reason, default
+ * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set.
+ */
+ if (max_reason == KMSG_DUMP_UNDEF) {
+ max_reason = always_kmsg_dump ? KMSG_DUMP_MAX :
+ KMSG_DUMP_OOPS;
+ }
+ if (reason > max_reason)
continue;
/* initialize iterator with data about the stored records */
@@ -3360,7 +3431,7 @@ out:
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
/**
- * kmsg_dump_rewind_nolock - reset the interator (unlocked version)
+ * kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
* @dumper: registered kmsg dumper
*
* Reset the dumper's iterator so that kmsg_dump_get_line() and
@@ -3378,7 +3449,7 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
}
/**
- * kmsg_dump_rewind - reset the interator
+ * kmsg_dump_rewind - reset the iterator
* @dumper: registered kmsg dumper
*
* Reset the dumper's iterator so that kmsg_dump_get_line() and
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f51385b86ea3..c716eadc7617 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -778,6 +778,24 @@ void rcu_irq_exit_preempt(void)
"RCU in extended quiescent state!");
}
+#ifdef CONFIG_PROVE_RCU
+/**
+ * rcu_irq_exit_check_preempt - Validate that scheduling is possible
+ */
+void rcu_irq_exit_check_preempt(void)
+{
+ lockdep_assert_irqs_disabled();
+
+ RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
+ "RCU dynticks_nesting counter underflow/zero!");
+ RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
+ DYNTICK_IRQ_NONIDLE,
+ "Bad RCU dynticks_nmi_nesting counter\n");
+ RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
+ "RCU in extended quiescent state!");
+}
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
/*
* Wrapper for rcu_irq_exit() where interrupts are enabled.
*
@@ -861,6 +879,67 @@ void noinstr rcu_user_exit(void)
{
rcu_eqs_exit(1);
}
+
+/**
+ * __rcu_irq_enter_check_tick - Enable scheduler tick on CPU if RCU needs it.
+ *
+ * The scheduler tick is not normally enabled when CPUs enter the kernel
+ * from nohz_full userspace execution. After all, nohz_full userspace
+ * execution is an RCU quiescent state and the time executing in the kernel
+ * is quite short. Except of course when it isn't. And it is not hard to
+ * cause a large system to spend tens of seconds or even minutes looping
+ * in the kernel, which can cause a number of problems, include RCU CPU
+ * stall warnings.
+ *
+ * Therefore, if a nohz_full CPU fails to report a quiescent state
+ * in a timely manner, the RCU grace-period kthread sets that CPU's
+ * ->rcu_urgent_qs flag with the expectation that the next interrupt or
+ * exception will invoke this function, which will turn on the scheduler
+ * tick, which will enable RCU to detect that CPU's quiescent states,
+ * for example, due to cond_resched() calls in CONFIG_PREEMPT=n kernels.
+ * The tick will be disabled once a quiescent state is reported for
+ * this CPU.
+ *
+ * Of course, in carefully tuned systems, there might never be an
+ * interrupt or exception. In that case, the RCU grace-period kthread
+ * will eventually cause one to happen. However, in less carefully
+ * controlled environments, this function allows RCU to get what it
+ * needs without creating otherwise useless interruptions.
+ */
+void __rcu_irq_enter_check_tick(void)
+{
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+
+ // Enabling the tick is unsafe in NMI handlers.
+ if (WARN_ON_ONCE(in_nmi()))
+ return;
+
+ RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
+ "Illegal rcu_irq_enter_check_tick() from extended quiescent state");
+
+ if (!tick_nohz_full_cpu(rdp->cpu) ||
+ !READ_ONCE(rdp->rcu_urgent_qs) ||
+ READ_ONCE(rdp->rcu_forced_tick)) {
+ // RCU doesn't need nohz_full help from this CPU, or it is
+ // already getting that help.
+ return;
+ }
+
+ // We get here only when not in an extended quiescent state and
+ // from interrupts (as opposed to NMIs). Therefore, (1) RCU is
+ // already watching and (2) The fact that we are in an interrupt
+ // handler and that the rcu_node lock is an irq-disabled lock
+ // prevents self-deadlock. So we can safely recheck under the lock.
+ // Note that the nohz_full state currently cannot change.
+ raw_spin_lock_rcu_node(rdp->mynode);
+ if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+ // A nohz_full CPU is in the kernel and RCU needs a
+ // quiescent state. Turn on the tick!
+ WRITE_ONCE(rdp->rcu_forced_tick, true);
+ tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ }
+ raw_spin_unlock_rcu_node(rdp->mynode);
+}
#endif /* CONFIG_NO_HZ_FULL */
/**
@@ -907,26 +986,7 @@ noinstr void rcu_nmi_enter(void)
incby = 1;
} else if (!in_nmi()) {
instrumentation_begin();
- if (tick_nohz_full_cpu(rdp->cpu) &&
- rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
- READ_ONCE(rdp->rcu_urgent_qs) &&
- !READ_ONCE(rdp->rcu_forced_tick)) {
- // We get here only if we had already exited the
- // extended quiescent state and this was an
- // interrupt (not an NMI). Therefore, (1) RCU is
- // already watching and (2) The fact that we are in
- // an interrupt handler and that the rcu_node lock
- // is an irq-disabled lock prevents self-deadlock.
- // So we can safely recheck under the lock.
- raw_spin_lock_rcu_node(rdp->mynode);
- if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
- // A nohz_full CPU is in the kernel and RCU
- // needs a quiescent state. Turn on the tick!
- WRITE_ONCE(rdp->rcu_forced_tick, true);
- tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
- }
- raw_spin_unlock_rcu_node(rdp->mynode);
- }
+ rcu_irq_enter_check_tick();
instrumentation_end();
}
instrumentation_begin();
diff --git a/kernel/reboot.c b/kernel/reboot.c
index c4d472b7f1b4..491f1347bf43 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -250,7 +250,7 @@ void kernel_restart(char *cmd)
pr_emerg("Restarting system\n");
else
pr_emerg("Restarting system with command '%s'\n", cmd);
- kmsg_dump(KMSG_DUMP_RESTART);
+ kmsg_dump(KMSG_DUMP_SHUTDOWN);
machine_restart(cmd);
}
EXPORT_SYMBOL_GPL(kernel_restart);
@@ -274,7 +274,7 @@ void kernel_halt(void)
migrate_to_reboot_cpu();
syscore_shutdown();
pr_emerg("System halted\n");
- kmsg_dump(KMSG_DUMP_HALT);
+ kmsg_dump(KMSG_DUMP_SHUTDOWN);
machine_halt();
}
EXPORT_SYMBOL_GPL(kernel_halt);
@@ -292,7 +292,7 @@ void kernel_power_off(void)
migrate_to_reboot_cpu();
syscore_shutdown();
pr_emerg("Power down\n");
- kmsg_dump(KMSG_DUMP_POWEROFF);
+ kmsg_dump(KMSG_DUMP_SHUTDOWN);
machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);
diff --git a/kernel/relay.c b/kernel/relay.c
index ade14fb7ce2e..d0c9c287680a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1,7 +1,7 @@
/*
* Public API and common code for kernel->userspace relay file support.
*
- * See Documentation/filesystems/relay.txt for an overview.
+ * See Documentation/filesystems/relay.rst for an overview.
*
* Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
* Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 43ba2d4a8eca..d7669027aede 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>
#include <linux/kcov.h>
+#include <linux/scs.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -3940,6 +3941,9 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
#ifdef CONFIG_SCHED_STACK_END_CHECK
if (task_stack_end_corrupted(prev))
panic("corrupted stack end detected inside scheduler\n");
+
+ if (task_scs_end_corrupted(prev))
+ panic("corrupted shadow stack detected inside scheduler\n");
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
@@ -6110,6 +6114,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;
+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);
#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..222a7a9ad543
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmstat.h>
+
+static struct kmem_cache *scs_cache;
+
+static void __scs_account(void *s, int account)
+{
+ struct page *scs_page = virt_to_page(s);
+
+ mod_zone_page_state(page_zone(scs_page), NR_KERNEL_SCS_KB,
+ account * (SCS_SIZE / SZ_1K));
+}
+
+static void *scs_alloc(int node)
+{
+ void *s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+
+ if (!s)
+ return NULL;
+
+ *__scs_magic(s) = SCS_END_MAGIC;
+
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ __scs_account(s, 1);
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ __scs_account(s, -1);
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s = scs_alloc(node);
+
+ if (!s)
+ return -ENOMEM;
+
+ task_scs(tsk) = task_scs_sp(tsk) = s;
+ return 0;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static unsigned long highest;
+
+ unsigned long *p, prev, curr = highest, used = 0;
+
+ if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
+ return;
+
+ for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
+ if (!READ_ONCE_NOCHECK(*p))
+ break;
+ used++;
+ }
+
+ while (used > curr) {
+ prev = cmpxchg_relaxed(&highest, curr, used);
+
+ if (prev == curr) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ break;
+ }
+
+ curr = prev;
+ }
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s = task_scs(tsk);
+
+ if (!s)
+ return;
+
+ WARN(task_scs_end_corrupted(tsk),
+ "corrupted shadow stack detected when freeing task\n");
+ scs_check_usage(tsk);
+ scs_free(s);
+}
diff --git a/kernel/signal.c b/kernel/signal.c
index 284fc1600063..5ca48cc5da76 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,94 +3235,94 @@ int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
}
#ifdef CONFIG_COMPAT
-int copy_siginfo_to_user32(struct compat_siginfo __user *to,
- const struct kernel_siginfo *from)
-#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
-{
- return __copy_siginfo_to_user32(to, from, in_x32_syscall());
-}
-int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
- const struct kernel_siginfo *from, bool x32_ABI)
-#endif
+/**
+ * copy_siginfo_to_external32 - copy a kernel siginfo into a compat user siginfo
+ * @to: compat siginfo destination
+ * @from: kernel siginfo source
+ *
+ * Note: This function does not work properly for the SIGCHLD on x32, but
+ * fortunately it doesn't have to. The only valid callers for this function are
+ * copy_siginfo_to_user32, which is overriden for x32 and the coredump code.
+ * The latter does not care because SIGCHLD will never cause a coredump.
+ */
+void copy_siginfo_to_external32(struct compat_siginfo *to,
+ const struct kernel_siginfo *from)
{
- struct compat_siginfo new;
- memset(&new, 0, sizeof(new));
+ memset(to, 0, sizeof(*to));
- new.si_signo = from->si_signo;
- new.si_errno = from->si_errno;
- new.si_code = from->si_code;
+ to->si_signo = from->si_signo;
+ to->si_errno = from->si_errno;
+ to->si_code = from->si_code;
switch(siginfo_layout(from->si_signo, from->si_code)) {
case SIL_KILL:
- new.si_pid = from->si_pid;
- new.si_uid = from->si_uid;
+ to->si_pid = from->si_pid;
+ to->si_uid = from->si_uid;
break;
case SIL_TIMER:
- new.si_tid = from->si_tid;
- new.si_overrun = from->si_overrun;
- new.si_int = from->si_int;
+ to->si_tid = from->si_tid;
+ to->si_overrun = from->si_overrun;
+ to->si_int = from->si_int;
break;
case SIL_POLL:
- new.si_band = from->si_band;
- new.si_fd = from->si_fd;
+ to->si_band = from->si_band;
+ to->si_fd = from->si_fd;
break;
case SIL_FAULT:
- new.si_addr = ptr_to_compat(from->si_addr);
+ to->si_addr = ptr_to_compat(from->si_addr);
#ifdef __ARCH_SI_TRAPNO
- new.si_trapno = from->si_trapno;
+ to->si_trapno = from->si_trapno;
#endif
break;
case SIL_FAULT_MCEERR:
- new.si_addr = ptr_to_compat(from->si_addr);
+ to->si_addr = ptr_to_compat(from->si_addr);
#ifdef __ARCH_SI_TRAPNO
- new.si_trapno = from->si_trapno;
+ to->si_trapno = from->si_trapno;
#endif
- new.si_addr_lsb = from->si_addr_lsb;
+ to->si_addr_lsb = from->si_addr_lsb;
break;
case SIL_FAULT_BNDERR:
- new.si_addr = ptr_to_compat(from->si_addr);
+ to->si_addr = ptr_to_compat(from->si_addr);
#ifdef __ARCH_SI_TRAPNO
- new.si_trapno = from->si_trapno;
+ to->si_trapno = from->si_trapno;
#endif
- new.si_lower = ptr_to_compat(from->si_lower);
- new.si_upper = ptr_to_compat(from->si_upper);
+ to->si_lower = ptr_to_compat(from->si_lower);
+ to->si_upper = ptr_to_compat(from->si_upper);
break;
case SIL_FAULT_PKUERR:
- new.si_addr = ptr_to_compat(from->si_addr);
+ to->si_addr = ptr_to_compat(from->si_addr);
#ifdef __ARCH_SI_TRAPNO
- new.si_trapno = from->si_trapno;
+ to->si_trapno = from->si_trapno;
#endif
- new.si_pkey = from->si_pkey;
+ to->si_pkey = from->si_pkey;
break;
case SIL_CHLD:
- new.si_pid = from->si_pid;
- new.si_uid = from->si_uid;
- new.si_status = from->si_status;
-#ifdef CONFIG_X86_X32_ABI
- if (x32_ABI) {
- new._sifields._sigchld_x32._utime = from->si_utime;
- new._sifields._sigchld_x32._stime = from->si_stime;
- } else
-#endif
- {
- new.si_utime = from->si_utime;
- new.si_stime = from->si_stime;
- }
+ to->si_pid = from->si_pid;
+ to->si_uid = from->si_uid;
+ to->si_status = from->si_status;
+ to->si_utime = from->si_utime;
+ to->si_stime = from->si_stime;
break;
case SIL_RT:
- new.si_pid = from->si_pid;
- new.si_uid = from->si_uid;
- new.si_int = from->si_int;
+ to->si_pid = from->si_pid;
+ to->si_uid = from->si_uid;
+ to->si_int = from->si_int;
break;
case SIL_SYS:
- new.si_call_addr = ptr_to_compat(from->si_call_addr);
- new.si_syscall = from->si_syscall;
- new.si_arch = from->si_arch;
+ to->si_call_addr = ptr_to_compat(from->si_call_addr);
+ to->si_syscall = from->si_syscall;
+ to->si_arch = from->si_arch;
break;
}
+}
+int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
+ const struct kernel_siginfo *from)
+{
+ struct compat_siginfo new;
+
+ copy_siginfo_to_external32(&new, from);
if (copy_to_user(to, &new, sizeof(struct compat_siginfo)))
return -EFAULT;
-
return 0;
}
diff --git a/kernel/smp.c b/kernel/smp.c
index c80486a7e3b8..472c2b274c65 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -709,7 +709,7 @@ void __init smp_init(void)
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
* of local_irq_disable/enable().
*/
-void on_each_cpu(void (*func) (void *info), void *info, int wait)
+void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a..891667a49bb7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2262,7 +2262,7 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which,
return -EINVAL;
}
-#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LESS_THROTTLE)
+#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
@@ -2634,6 +2634,7 @@ struct compat_sysinfo {
COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
{
struct sysinfo s;
+ struct compat_sysinfo s_32;
do_sysinfo(&s);
@@ -2658,23 +2659,23 @@ COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info)
s.freehigh >>= bitcount;
}
- if (!access_ok(info, sizeof(struct compat_sysinfo)) ||
- __put_user(s.uptime, &info->uptime) ||
- __put_user(s.loads[0], &info->loads[0]) ||
- __put_user(s.loads[1], &info->loads[1]) ||
- __put_user(s.loads[2], &info->loads[2]) ||
- __put_user(s.totalram, &info->totalram) ||
- __put_user(s.freeram, &info->freeram) ||
- __put_user(s.sharedram, &info->sharedram) ||
- __put_user(s.bufferram, &info->bufferram) ||
- __put_user(s.totalswap, &info->totalswap) ||
- __put_user(s.freeswap, &info->freeswap) ||
- __put_user(s.procs, &info->procs) ||
- __put_user(s.totalhigh, &info->totalhigh) ||
- __put_user(s.freehigh, &info->freehigh) ||
- __put_user(s.mem_unit, &info->mem_unit))
+ memset(&s_32, 0, sizeof(s_32));
+ s_32.uptime = s.uptime;
+ s_32.loads[0] = s.loads[0];
+ s_32.loads[1] = s.loads[1];
+ s_32.loads[2] = s.loads[2];
+ s_32.totalram = s.totalram;
+ s_32.freeram = s.freeram;
+ s_32.sharedram = s.sharedram;
+ s_32.bufferram = s.bufferram;
+ s_32.totalswap = s.totalswap;
+ s_32.freeswap = s.freeswap;
+ s_32.procs = s.procs;
+ s_32.totalhigh = s.totalhigh;
+ s_32.freehigh = s.freehigh;
+ s_32.mem_unit = s.mem_unit;
+ if (copy_to_user(info, &s_32, sizeof(s_32)))
return -EFAULT;
-
return 0;
}
#endif /* CONFIG_COMPAT */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index ca39dc3230cb..ea47f2084087 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -170,10 +170,10 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,
if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
blkcg = NULL;
#ifdef CONFIG_BLK_CGROUP
- trace_note(bt, 0, BLK_TN_MESSAGE, buf, n,
+ trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
#else
- trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, 0);
+ trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
#endif
local_irq_restore(flags);
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a010edc37ee0..92ba69b716dc 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1500,7 +1500,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
u32 *ids, prog_cnt, ids_len;
int ret;
- if (!capable(CAP_SYS_ADMIN))
+ if (!perfmon_capable())
return -EPERM;
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 29615f15a820..f12e99b387b2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8527,18 +8527,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
allocate_snapshot = false;
#endif
- /*
- * Because of some magic with the way alloc_percpu() works on
- * x86_64, we need to synchronize the pgd of all the tables,
- * otherwise the trace events that happen in x86_64 page fault
- * handlers can't cope with accessing the chance that a
- * alloc_percpu()'d memory might be touched in the page fault trace
- * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
- * calls in tracing, because something might get triggered within a
- * page fault trace event!
- */
- vmalloc_sync_mappings();
-
return 0;
}