summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c67
-rw-r--r--kernel/audit.h8
-rw-r--r--kernel/auditsc.c25
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/verifier.c64
-rw-r--r--kernel/cgroup/cgroup-v1.c2
-rw-r--r--kernel/cgroup/cgroup.c9
-rw-r--r--kernel/irq/affinity.c20
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/ptrace.c14
-rw-r--r--kernel/sysctl.c3
11 files changed, 127 insertions, 100 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 2f4964cfde0b..a871bf80fde1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -160,7 +160,6 @@ static LIST_HEAD(audit_freelist);
/* queue msgs to send via kauditd_task */
static struct sk_buff_head audit_queue;
-static void kauditd_hold_skb(struct sk_buff *skb);
/* queue msgs due to temporary unicast send problems */
static struct sk_buff_head audit_retry_queue;
/* queue msgs waiting for new auditd connection */
@@ -454,30 +453,6 @@ static void auditd_set(int pid, u32 portid, struct net *net)
}
/**
- * auditd_reset - Disconnect the auditd connection
- *
- * Description:
- * Break the auditd/kauditd connection and move all the queued records into the
- * hold queue in case auditd reconnects.
- */
-static void auditd_reset(void)
-{
- struct sk_buff *skb;
-
- /* if it isn't already broken, break the connection */
- rcu_read_lock();
- if (auditd_conn.pid)
- auditd_set(0, 0, NULL);
- rcu_read_unlock();
-
- /* flush all of the main and retry queues to the hold queue */
- while ((skb = skb_dequeue(&audit_retry_queue)))
- kauditd_hold_skb(skb);
- while ((skb = skb_dequeue(&audit_queue)))
- kauditd_hold_skb(skb);
-}
-
-/**
* kauditd_print_skb - Print the audit record to the ring buffer
* @skb: audit record
*
@@ -505,9 +480,6 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
{
/* put the record back in the queue at the same place */
skb_queue_head(&audit_hold_queue, skb);
-
- /* fail the auditd connection */
- auditd_reset();
}
/**
@@ -544,9 +516,6 @@ static void kauditd_hold_skb(struct sk_buff *skb)
/* we have no other options - drop the message */
audit_log_lost("kauditd hold queue overflow");
kfree_skb(skb);
-
- /* fail the auditd connection */
- auditd_reset();
}
/**
@@ -567,6 +536,30 @@ static void kauditd_retry_skb(struct sk_buff *skb)
}
/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
+ */
+static void auditd_reset(void)
+{
+ struct sk_buff *skb;
+
+ /* if it isn't already broken, break the connection */
+ rcu_read_lock();
+ if (auditd_conn.pid)
+ auditd_set(0, 0, NULL);
+ rcu_read_unlock();
+
+ /* flush all of the main and retry queues to the hold queue */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+ kauditd_hold_skb(skb);
+ while ((skb = skb_dequeue(&audit_queue)))
+ kauditd_hold_skb(skb);
+}
+
+/**
* auditd_send_unicast_skb - Send a record via unicast to auditd
* @skb: audit record
*
@@ -758,6 +751,7 @@ static int kauditd_thread(void *dummy)
NULL, kauditd_rehold_skb);
if (rc < 0) {
sk = NULL;
+ auditd_reset();
goto main_queue;
}
@@ -767,6 +761,7 @@ static int kauditd_thread(void *dummy)
NULL, kauditd_hold_skb);
if (rc < 0) {
sk = NULL;
+ auditd_reset();
goto main_queue;
}
@@ -775,16 +770,18 @@ main_queue:
* unicast, dump failed record sends to the retry queue; if
* sk == NULL due to previous failures we will just do the
* multicast send and move the record to the retry queue */
- kauditd_send_queue(sk, portid, &audit_queue, 1,
- kauditd_send_multicast_skb,
- kauditd_retry_skb);
+ rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
+ kauditd_send_multicast_skb,
+ kauditd_retry_skb);
+ if (sk == NULL || rc < 0)
+ auditd_reset();
+ sk = NULL;
/* drop our netns reference, no auditd sends past this line */
if (net) {
put_net(net);
net = NULL;
}
- sk = NULL;
/* we have processed all the queues so wake everyone */
wake_up(&audit_backlog_wait);
diff --git a/kernel/audit.h b/kernel/audit.h
index 0f1cf6d1878a..0d87f8ab8778 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -333,13 +333,7 @@ extern u32 audit_sig_sid;
extern int audit_filter(int msgtype, unsigned int listtype);
#ifdef CONFIG_AUDITSYSCALL
-extern int __audit_signal_info(int sig, struct task_struct *t);
-static inline int audit_signal_info(int sig, struct task_struct *t)
-{
- if (auditd_test_task(t) || (audit_signals && !audit_dummy_context()))
- return __audit_signal_info(sig, t);
- return 0;
-}
+extern int audit_signal_info(int sig, struct task_struct *t);
extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
extern struct list_head *audit_killed_trees(void);
#else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e59ffc7fc522..1c2333155893 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2249,26 +2249,27 @@ void __audit_ptrace(struct task_struct *t)
* If the audit subsystem is being terminated, record the task (pid)
* and uid that is doing that.
*/
-int __audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info(int sig, struct task_struct *t)
{
struct audit_aux_data_pids *axp;
struct task_struct *tsk = current;
struct audit_context *ctx = tsk->audit_context;
kuid_t uid = current_uid(), t_uid = task_uid(t);
- if (auditd_test_task(t)) {
- if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
- audit_sig_pid = task_tgid_nr(tsk);
- if (uid_valid(tsk->loginuid))
- audit_sig_uid = tsk->loginuid;
- else
- audit_sig_uid = uid;
- security_task_getsecid(tsk, &audit_sig_sid);
- }
- if (!audit_signals || audit_dummy_context())
- return 0;
+ if (auditd_test_task(t) &&
+ (sig == SIGTERM || sig == SIGHUP ||
+ sig == SIGUSR1 || sig == SIGUSR2)) {
+ audit_sig_pid = task_tgid_nr(tsk);
+ if (uid_valid(tsk->loginuid))
+ audit_sig_uid = tsk->loginuid;
+ else
+ audit_sig_uid = uid;
+ security_task_getsecid(tsk, &audit_sig_sid);
}
+ if (!audit_signals || audit_dummy_context())
+ return 0;
+
/* optimize the common case by putting first signal recipient directly
* in audit_context */
if (!ctx->target_pid) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f45827e205d3..b4f1cb0c5ac7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1162,12 +1162,12 @@ out:
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
off = IMM;
load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
- * only appearing in the programs where ctx ==
- * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
- * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
- * internal BPF verifier will check that BPF_R6 ==
- * ctx.
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
+ * appearing in the programs where ctx == skb
+ * (see may_access_skb() in the verifier). All programs
+ * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
+ * bpf_convert_filter() saves it in BPF_R6, internal BPF
+ * verifier will check that BPF_R6 == ctx.
*
* BPF_ABS and BPF_IND are wrappers of function calls,
* so they scratch BPF_R1-BPF_R5 registers, preserve
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 796b68d00119..a834068a400e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
}
}
-static int check_ptr_alignment(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, int off, int size)
+static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
{
- if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
- if (off % size != 0) {
- verbose("misaligned access off %d size %d\n",
- off, size);
- return -EACCES;
- } else {
- return 0;
- }
- }
-
- if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
- /* misaligned access to packet is ok on x86,arm,arm64 */
- return 0;
-
if (reg->id && size != 1) {
- verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+ verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
return -EACCES;
}
/* skb->data is NET_IP_ALIGN-ed */
- if (reg->type == PTR_TO_PACKET &&
- (NET_IP_ALIGN + reg->off + off) % size != 0) {
+ if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
NET_IP_ALIGN, reg->off, off, size);
return -EACCES;
}
+
return 0;
}
+static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+ int size)
+{
+ if (size != 1) {
+ verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int check_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ switch (reg->type) {
+ case PTR_TO_PACKET:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_pkt_ptr_alignment(reg, off, size);
+ case PTR_TO_MAP_VALUE_ADJ:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_val_ptr_alignment(reg, size);
+ default:
+ if (off % size != 0) {
+ verbose("misaligned access off %d size %d\n",
+ off, size);
+ return -EACCES;
+ }
+
+ return 0;
+ }
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
if (size < 0)
return size;
- err = check_ptr_alignment(env, reg, off, size);
+ err = check_ptr_alignment(reg, off, size);
if (err)
return err;
@@ -1925,6 +1943,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
* register as unknown.
*/
if (env->allow_ptr_leaks &&
+ BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
(dst_reg->type == PTR_TO_MAP_VALUE ||
dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
@@ -1973,14 +1992,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
- regs[i].range = dst_reg->off;
+ /* keep the maximum range already checked */
+ regs[i].range = max(regs[i].range, dst_reg->off);
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
- reg->range = dst_reg->off;
+ reg->range = max(reg->range, dst_reg->off);
}
}
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 1dc22f6b49f5..12e19f0636ea 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1146,7 +1146,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
* path is super cold. Let's just sleep a bit and retry.
*/
pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
- if (IS_ERR(pinned_sb) ||
+ if (IS_ERR_OR_NULL(pinned_sb) ||
!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
mutex_unlock(&cgroup_mutex);
if (!IS_ERR_OR_NULL(pinned_sb))
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 48851327a15e..687f5e0194ef 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
tsk = tsk->group_leader;
/*
- * Workqueue threads may acquire PF_NO_SETAFFINITY and become
- * trapped in a cpuset, or RT worker may be born in a cgroup
- * with no rt_runtime allocated. Just say no.
+ * kthreads may acquire PF_NO_SETAFFINITY during initialization.
+ * If userland migrates such a kthread to a non-root cgroup, it can
+ * become trapped in a cpuset, or RT kthread may be born in a
+ * cgroup with no rt_runtime allocated. Just say no.
*/
- if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+ if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
goto out_unlock_rcu;
}
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4544b115f5eb..d052947fe785 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -59,7 +59,7 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
struct cpumask *
irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
{
- int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+ int n, nodes, cpus_per_vec, extra_vecs, curvec;
int affv = nvecs - affd->pre_vectors - affd->post_vectors;
int last_affv = affv + affd->pre_vectors;
nodemask_t nodemsk = NODE_MASK_NONE;
@@ -94,19 +94,21 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
goto done;
}
- /* Spread the vectors per node */
- vecs_per_node = affv / nodes;
- /* Account for rounding errors */
- extra_vecs = affv - (nodes * vecs_per_node);
-
for_each_node_mask(n, nodemsk) {
- int ncpus, v, vecs_to_assign = vecs_per_node;
+ int ncpus, v, vecs_to_assign, vecs_per_node;
+
+ /* Spread the vectors per node */
+ vecs_per_node = (affv - curvec) / nodes;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
+ vecs_to_assign = min(vecs_per_node, ncpus);
+
+ /* Account for rounding errors */
+ extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
for (v = 0; curvec < last_affv && v < vecs_to_assign;
curvec++, v++) {
@@ -115,14 +117,14 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
/* Account for extra vectors to compensate rounding errors */
if (extra_vecs) {
cpus_per_vec++;
- if (!--extra_vecs)
- vecs_per_node++;
+ --extra_vecs;
}
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
}
if (curvec >= last_affv)
break;
+ --nodes;
}
done:
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2f26adea0f84..26db528c1d88 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
+#include <linux/cgroup.h>
#include <trace/events/sched.h>
static DEFINE_SPINLOCK(kthread_create_lock);
@@ -225,6 +226,7 @@ static int kthread(void *_create)
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+ cgroup_kthread_ready();
__kthread_parkme(self);
ret = threadfn(data);
}
@@ -538,6 +540,7 @@ int kthreadd(void *unused)
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
+ cgroup_init_kthreadd();
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0af928712174..266ddcc1d8bb 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
WARN_ON(!task->ptrace || task->parent != current);
+ /*
+ * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
+ * Recheck state under the lock to close this race.
+ */
spin_lock_irq(&task->sighand->siglock);
- if (__fatal_signal_pending(task))
- wake_up_state(task, __TASK_TRACED);
- else
- task->state = TASK_TRACED;
+ if (task->state == __TASK_TRACED) {
+ if (__fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+ else
+ task->state = TASK_TRACED;
+ }
spin_unlock_irq(&task->sighand->siglock);
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index acf0a5a06da7..8c8714fcb53c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2133,9 +2133,12 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
if (write) {
if (*negp)
return -EINVAL;
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
+ *negp = false;
*lvalp = (unsigned long)val;
}
return 0;