summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit.c532
-rw-r--r--kernel/audit_fsnotify.c5
-rw-r--r--kernel/audit_tree.c3
-rw-r--r--kernel/audit_watch.c5
-rw-r--r--kernel/auditfilter.c5
-rw-r--r--kernel/auditsc.c12
-rw-r--r--kernel/capability.c36
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/padata.c4
-rw-r--r--kernel/ptrace.c70
-rw-r--r--kernel/seccomp.c7
12 files changed, 433 insertions, 257 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 314e7d62f5f0..12c679f769c6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -116,8 +116,6 @@ obj-$(CONFIG_HAS_IOMEM) += memremap.o
$(obj)/configs.o: $(obj)/config_data.h
-# config_data.h contains the same information as ikconfig.h but gzipped.
-# Info from config_data can be extracted from /proc/config*
targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
diff --git a/kernel/audit.c b/kernel/audit.c
index 67b9fbd871be..91bff3c0b368 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -107,7 +107,6 @@ static u32 audit_rate_limit;
* When set to zero, this means unlimited. */
static u32 audit_backlog_limit = 64;
#define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
-static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
/* The identity of the user shutting down the audit system. */
@@ -138,11 +137,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock);
static int audit_freelist_count;
static LIST_HEAD(audit_freelist);
-static struct sk_buff_head audit_skb_queue;
-/* queue of skbs to send to auditd when/if it comes back */
-static struct sk_buff_head audit_skb_hold_queue;
+/* queue msgs to send via kauditd_task */
+static struct sk_buff_head audit_queue;
+/* queue msgs due to temporary unicast send problems */
+static struct sk_buff_head audit_retry_queue;
+/* queue msgs waiting for new auditd connection */
+static struct sk_buff_head audit_hold_queue;
+
+/* queue servicing thread */
static struct task_struct *kauditd_task;
static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
+
+/* waitqueue for callers who are blocked on the audit backlog */
static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION,
@@ -338,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit)
static int audit_set_backlog_wait_time(u32 timeout)
{
return audit_do_config_change("audit_backlog_wait_time",
- &audit_backlog_wait_time_master, timeout);
+ &audit_backlog_wait_time, timeout);
}
static int audit_set_enabled(u32 state)
@@ -365,29 +371,10 @@ static int audit_set_failure(u32 state)
}
/*
- * Queue skbs to be sent to auditd when/if it comes back. These skbs should
- * already have been sent via prink/syslog and so if these messages are dropped
- * it is not a huge concern since we already passed the audit_log_lost()
- * notification and stuff. This is just nice to get audit messages during
- * boot before auditd is running or messages generated while auditd is stopped.
- * This only holds messages is audit_default is set, aka booting with audit=1
- * or building your kernel that way.
- */
-static void audit_hold_skb(struct sk_buff *skb)
-{
- if (audit_default &&
- (!audit_backlog_limit ||
- skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit))
- skb_queue_tail(&audit_skb_hold_queue, skb);
- else
- kfree_skb(skb);
-}
-
-/*
* For one reason or another this nlh isn't getting delivered to the userspace
* audit daemon, just send it to printk.
*/
-static void audit_printk_skb(struct sk_buff *skb)
+static void kauditd_printk_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
@@ -398,58 +385,123 @@ static void audit_printk_skb(struct sk_buff *skb)
else
audit_log_lost("printk limit exceeded");
}
+}
+
+/**
+ * kauditd_hold_skb - Queue an audit record, waiting for auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Queue the audit record, waiting for an instance of auditd. When this
+ * function is called we haven't given up yet on sending the record, but things
+ * are not looking good. The first thing we want to do is try to write the
+ * record via printk and then see if we want to try and hold on to the record
+ * and queue it, if we have room. If we want to hold on to the record, but we
+ * don't have room, record a record lost message.
+ */
+static void kauditd_hold_skb(struct sk_buff *skb)
+{
+ /* at this point it is uncertain if we will ever send this to auditd so
+ * try to send the message via printk before we go any further */
+ kauditd_printk_skb(skb);
+
+ /* can we just silently drop the message? */
+ if (!audit_default) {
+ kfree_skb(skb);
+ return;
+ }
+
+ /* if we have room, queue the message */
+ if (!audit_backlog_limit ||
+ skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
+ skb_queue_tail(&audit_hold_queue, skb);
+ return;
+ }
- audit_hold_skb(skb);
+ /* we have no other options - drop the message */
+ audit_log_lost("kauditd hold queue overflow");
+ kfree_skb(skb);
}
-static void kauditd_send_skb(struct sk_buff *skb)
+/**
+ * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
+ * @skb: audit record
+ *
+ * Description:
+ * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
+ * but for some reason we are having problems sending it audit records so
+ * queue the given record and attempt to resend.
+ */
+static void kauditd_retry_skb(struct sk_buff *skb)
{
- int err;
- int attempts = 0;
-#define AUDITD_RETRIES 5
+ /* NOTE: because records should only live in the retry queue for a
+ * short period of time, before either being sent or moved to the hold
+ * queue, we don't currently enforce a limit on this queue */
+ skb_queue_tail(&audit_retry_queue, skb);
+}
+
+/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the records in the retry
+ * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex
+ * must be held when calling this function.
+ */
+static void auditd_reset(void)
+{
+ struct sk_buff *skb;
+
+ /* break the connection */
+ if (audit_sock) {
+ sock_put(audit_sock);
+ audit_sock = NULL;
+ }
+ audit_pid = 0;
+ audit_nlk_portid = 0;
+
+ /* flush all of the retry queue to the hold queue */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+ kauditd_hold_skb(skb);
+}
+
+/**
+ * kauditd_send_unicast_skb - Send a record via unicast to auditd
+ * @skb: audit record
+ */
+static int kauditd_send_unicast_skb(struct sk_buff *skb)
+{
+ int rc;
-restart:
- /* take a reference in case we can't send it and we want to hold it */
+ /* if we know nothing is connected, don't even try the netlink call */
+ if (!audit_pid)
+ return -ECONNREFUSED;
+
+ /* get an extra skb reference in case we fail to send */
skb_get(skb);
- err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
- if (err < 0) {
- pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n",
- audit_pid, err);
- if (audit_pid) {
- if (err == -ECONNREFUSED || err == -EPERM
- || ++attempts >= AUDITD_RETRIES) {
- char s[32];
-
- snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid);
- audit_log_lost(s);
- audit_pid = 0;
- audit_sock = NULL;
- } else {
- pr_warn("re-scheduling(#%d) write to audit_pid=%d\n",
- attempts, audit_pid);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- goto restart;
- }
- }
- /* we might get lucky and get this in the next auditd */
- audit_hold_skb(skb);
- } else
- /* drop the extra reference if sent ok */
+ rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+ if (rc >= 0) {
consume_skb(skb);
+ rc = 0;
+ }
+
+ return rc;
}
/*
- * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ * kauditd_send_multicast_skb - Send a record to any multicast listeners
+ * @skb: audit record
*
+ * Description:
* This function doesn't consume an skb as might be expected since it has to
* copy it anyways.
*/
-static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
+static void kauditd_send_multicast_skb(struct sk_buff *skb)
{
- struct sk_buff *copy;
- struct audit_net *aunet = net_generic(&init_net, audit_net_id);
- struct sock *sock = aunet->nlsk;
+ struct sk_buff *copy;
+ struct audit_net *aunet = net_generic(&init_net, audit_net_id);
+ struct sock *sock = aunet->nlsk;
+ struct nlmsghdr *nlh;
if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
return;
@@ -464,74 +516,161 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
* no reason for new multicast clients to continue with this
* non-compliance.
*/
- copy = skb_copy(skb, gfp_mask);
+ copy = skb_copy(skb, GFP_KERNEL);
if (!copy)
return;
+ nlh = nlmsg_hdr(copy);
+ nlh->nlmsg_len = skb->len;
- nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
+ nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
}
-/*
- * flush_hold_queue - empty the hold queue if auditd appears
- *
- * If auditd just started, drain the queue of messages already
- * sent to syslog/printk. Remember loss here is ok. We already
- * called audit_log_lost() if it didn't go out normally. so the
- * race between the skb_dequeue and the next check for audit_pid
- * doesn't matter.
+/**
+ * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
*
- * If you ever find kauditd to be too slow we can get a perf win
- * by doing our own locking and keeping better track if there
- * are messages in this queue. I don't see the need now, but
- * in 5 years when I want to play with this again I'll see this
- * note and still have no friggin idea what i'm thinking today.
+ * Description:
+ * This function is for use by the wait_event_freezable() call in
+ * kauditd_thread().
*/
-static void flush_hold_queue(void)
+static int kauditd_wake_condition(void)
{
- struct sk_buff *skb;
-
- if (!audit_default || !audit_pid)
- return;
-
- skb = skb_dequeue(&audit_skb_hold_queue);
- if (likely(!skb))
- return;
+ static int pid_last = 0;
+ int rc;
+ int pid = audit_pid;
- while (skb && audit_pid) {
- kauditd_send_skb(skb);
- skb = skb_dequeue(&audit_skb_hold_queue);
- }
+ /* wake on new messages or a change in the connected auditd */
+ rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
+ if (rc)
+ pid_last = pid;
- /*
- * if auditd just disappeared but we
- * dequeued an skb we need to drop ref
- */
- consume_skb(skb);
+ return rc;
}
static int kauditd_thread(void *dummy)
{
+ int rc;
+ int auditd = 0;
+ int reschedule = 0;
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+
+#define UNICAST_RETRIES 5
+#define AUDITD_BAD(x,y) \
+ ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
+
+ /* NOTE: we do invalidate the auditd connection flag on any sending
+ * errors, but we only "restore" the connection flag at specific places
+ * in the loop in order to help ensure proper ordering of audit
+ * records */
+
set_freezable();
while (!kthread_should_stop()) {
- struct sk_buff *skb;
-
- flush_hold_queue();
+ /* NOTE: possible area for future improvement is to look at
+ * the hold and retry queues, since only this thread
+ * has access to these queues we might be able to do
+ * our own queuing and skip some/all of the locking */
+
+ /* NOTE: it might be a fun experiment to split the hold and
+ * retry queue handling to another thread, but the
+ * synchronization issues and other overhead might kill
+ * any performance gains */
+
+ /* attempt to flush the hold queue */
+ while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ /* requeue to the same spot */
+ skb_queue_head(&audit_hold_queue, skb);
+
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ }
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
- skb = skb_dequeue(&audit_skb_queue);
+ /* attempt to flush the retry queue */
+ while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ kauditd_hold_skb(skb);
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ } else
+ /* temporary problem (we hope), queue
+ * to the same spot and retry */
+ skb_queue_head(&audit_retry_queue, skb);
+ } else
+ /* we were able to send successfully */
+ reschedule = 0;
+ }
+ /* standard queue processing, try to be as quick as possible */
+quick_loop:
+ skb = skb_dequeue(&audit_queue);
if (skb) {
- if (!audit_backlog_limit ||
- (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit))
- wake_up(&audit_backlog_wait);
- if (audit_pid)
- kauditd_send_skb(skb);
+ /* setup the netlink header, see the comments in
+ * kauditd_send_multicast_skb() for length quirks */
+ nlh = nlmsg_hdr(skb);
+ nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+ /* attempt to send to any multicast listeners */
+ kauditd_send_multicast_skb(skb);
+
+ /* attempt to send to auditd, queue on failure */
+ if (auditd) {
+ rc = kauditd_send_unicast_skb(skb);
+ if (rc) {
+ auditd = 0;
+ if (AUDITD_BAD(rc, reschedule)) {
+ mutex_lock(&audit_cmd_mutex);
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
+ reschedule = 0;
+ }
+
+ /* move to the retry queue */
+ kauditd_retry_skb(skb);
+ } else
+ /* everything is working so go fast! */
+ goto quick_loop;
+ } else if (reschedule)
+ /* we are currently having problems, move to
+ * the retry queue */
+ kauditd_retry_skb(skb);
else
- audit_printk_skb(skb);
- continue;
- }
+ /* dump the message via printk and hold it */
+ kauditd_hold_skb(skb);
+ } else {
+ /* we have flushed the backlog so wake everyone */
+ wake_up(&audit_backlog_wait);
+
+ /* if everything is okay with auditd (if present), go
+ * to sleep until there is something new in the queue
+ * or we have a change in the connected auditd;
+ * otherwise simply reschedule to give things a chance
+ * to recover */
+ if (reschedule) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ } else
+ wait_event_freezable(kauditd_wait,
+ kauditd_wake_condition());
- wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue));
+ /* update the auditd connection status */
+ auditd = (audit_pid ? 1 : 0);
+ }
}
+
return 0;
}
@@ -596,6 +735,7 @@ static int audit_send_reply_thread(void *arg)
kfree(reply);
return 0;
}
+
/**
* audit_send_reply - send an audit reply message via netlink
* @request_skb: skb of request we are replying to (used to target the reply)
@@ -832,16 +972,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err)
return err;
- /* As soon as there's any sign of userspace auditd,
- * start kauditd to talk to it */
- if (!kauditd_task) {
- kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
- if (IS_ERR(kauditd_task)) {
- err = PTR_ERR(kauditd_task);
- kauditd_task = NULL;
- return err;
- }
- }
seq = nlh->nlmsg_seq;
data = nlmsg_data(nlh);
@@ -855,9 +985,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
s.rate_limit = audit_rate_limit;
s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost);
- s.backlog = skb_queue_len(&audit_skb_queue);
+ s.backlog = skb_queue_len(&audit_queue);
s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL;
- s.backlog_wait_time = audit_backlog_wait_time_master;
+ s.backlog_wait_time = audit_backlog_wait_time;
audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
break;
}
@@ -897,9 +1027,17 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
if (audit_enabled != AUDIT_OFF)
audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
- audit_pid = new_pid;
- audit_nlk_portid = NETLINK_CB(skb).portid;
- audit_sock = skb->sk;
+ if (new_pid) {
+ if (audit_sock)
+ sock_put(audit_sock);
+ audit_pid = new_pid;
+ audit_nlk_portid = NETLINK_CB(skb).portid;
+ sock_hold(skb->sk);
+ audit_sock = skb->sk;
+ } else {
+ auditd_reset();
+ }
+ wake_up_interruptible(&kauditd_wait);
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
err = audit_set_rate_limit(s.rate_limit);
@@ -1167,10 +1305,10 @@ static void __net_exit audit_net_exit(struct net *net)
{
struct audit_net *aunet = net_generic(net, audit_net_id);
struct sock *sock = aunet->nlsk;
- if (sock == audit_sock) {
- audit_pid = 0;
- audit_sock = NULL;
- }
+ mutex_lock(&audit_cmd_mutex);
+ if (sock == audit_sock)
+ auditd_reset();
+ mutex_unlock(&audit_cmd_mutex);
netlink_kernel_release(sock);
aunet->nlsk = NULL;
@@ -1195,17 +1333,24 @@ static int __init audit_init(void)
audit_default ? "enabled" : "disabled");
register_pernet_subsys(&audit_net_ops);
- skb_queue_head_init(&audit_skb_queue);
- skb_queue_head_init(&audit_skb_hold_queue);
+ skb_queue_head_init(&audit_queue);
+ skb_queue_head_init(&audit_retry_queue);
+ skb_queue_head_init(&audit_hold_queue);
audit_initialized = AUDIT_INITIALIZED;
audit_enabled = audit_default;
audit_ever_enabled |= !!audit_default;
- audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
-
for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
INIT_LIST_HEAD(&audit_inode_hash[i]);
+ kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
+ if (IS_ERR(kauditd_task)) {
+ int err = PTR_ERR(kauditd_task);
+ panic("audit: failed to start the kauditd thread (%d)\n", err);
+ }
+
+ audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
+
return 0;
}
__initcall(audit_init);
@@ -1338,24 +1483,6 @@ static inline void audit_get_stamp(struct audit_context *ctx,
}
}
-/*
- * Wait for auditd to drain the queue a little
- */
-static long wait_for_auditd(long sleep_time)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- if (audit_backlog_limit &&
- skb_queue_len(&audit_skb_queue) > audit_backlog_limit) {
- add_wait_queue_exclusive(&audit_backlog_wait, &wait);
- set_current_state(TASK_UNINTERRUPTIBLE);
- sleep_time = schedule_timeout(sleep_time);
- remove_wait_queue(&audit_backlog_wait, &wait);
- }
-
- return sleep_time;
-}
-
/**
* audit_log_start - obtain an audit buffer
* @ctx: audit_context (may be NULL)
@@ -1374,12 +1501,9 @@ static long wait_for_auditd(long sleep_time)
struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
int type)
{
- struct audit_buffer *ab = NULL;
- struct timespec t;
- unsigned int uninitialized_var(serial);
- int reserve = 5; /* Allow atomic callers to go up to five
- entries over the normal backlog limit */
- unsigned long timeout_start = jiffies;
+ struct audit_buffer *ab;
+ struct timespec t;
+ unsigned int uninitialized_var(serial);
if (audit_initialized != AUDIT_INITIALIZED)
return NULL;
@@ -1387,38 +1511,48 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
return NULL;
- if (gfp_mask & __GFP_DIRECT_RECLAIM) {
- if (audit_pid && audit_pid == current->tgid)
- gfp_mask &= ~__GFP_DIRECT_RECLAIM;
- else
- reserve = 0;
- }
-
- while (audit_backlog_limit
- && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
- if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) {
- long sleep_time;
+ /* don't ever fail/sleep on these two conditions:
+ * 1. auditd generated record - since we need auditd to drain the
+ * queue; also, when we are checking for auditd, compare PIDs using
+ * task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
+ * using a PID anchored in the caller's namespace
+ * 2. audit command message - record types 1000 through 1099 inclusive
+ * are command messages/records used to manage the kernel subsystem
+ * and the audit userspace, blocking on these messages could cause
+ * problems under load so don't do it (note: not all of these
+ * command types are valid as record types, but it is quicker to
+ * just check two ints than a series of ints in a if/switch stmt) */
+ if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
+ (type >= 1000 && type <= 1099))) {
+ long sleep_time = audit_backlog_wait_time;
+
+ while (audit_backlog_limit &&
+ (skb_queue_len(&audit_queue) > audit_backlog_limit)) {
+ /* wake kauditd to try and flush the queue */
+ wake_up_interruptible(&kauditd_wait);
- sleep_time = timeout_start + audit_backlog_wait_time - jiffies;
- if (sleep_time > 0) {
- sleep_time = wait_for_auditd(sleep_time);
- if (sleep_time > 0)
- continue;
+ /* sleep if we are allowed and we haven't exhausted our
+ * backlog wait limit */
+ if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
+ (sleep_time > 0)) {
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&audit_backlog_wait,
+ &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ sleep_time = schedule_timeout(sleep_time);
+ remove_wait_queue(&audit_backlog_wait, &wait);
+ } else {
+ if (audit_rate_check() && printk_ratelimit())
+ pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
+ skb_queue_len(&audit_queue),
+ audit_backlog_limit);
+ audit_log_lost("backlog limit exceeded");
+ return NULL;
}
}
- if (audit_rate_check() && printk_ratelimit())
- pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n",
- skb_queue_len(&audit_skb_queue),
- audit_backlog_limit);
- audit_log_lost("backlog limit exceeded");
- audit_backlog_wait_time = 0;
- wake_up(&audit_backlog_wait);
- return NULL;
}
- if (!reserve && !audit_backlog_wait_time)
- audit_backlog_wait_time = audit_backlog_wait_time_master;
-
ab = audit_buffer_alloc(ctx, gfp_mask, type);
if (!ab) {
audit_log_lost("out of memory in audit_log_start");
@@ -1426,9 +1560,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
}
audit_get_stamp(ab->ctx, &t, &serial);
-
audit_log_format(ab, "audit(%lu.%03lu:%u): ",
t.tv_sec, t.tv_nsec/1000000, serial);
+
return ab;
}
@@ -1978,10 +2112,10 @@ out:
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
- * netlink_unicast() cannot be called inside an irq context because it blocks
- * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
- * on a queue and a tasklet is scheduled to remove them from the queue outside
- * the irq context. May be called in any context.
+ * We can not do a netlink send inside an irq context because it blocks (last
+ * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a
+ * queue and a tasklet is scheduled to remove them from the queue outside the
+ * irq context. May be called in any context.
*/
void audit_log_end(struct audit_buffer *ab)
{
@@ -1990,28 +2124,8 @@ void audit_log_end(struct audit_buffer *ab)
if (!audit_rate_check()) {
audit_log_lost("rate limit exceeded");
} else {
- struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
-
- nlh->nlmsg_len = ab->skb->len;
- kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
-
- /*
- * The original kaudit unicast socket sends up messages with
- * nlmsg_len set to the payload length rather than the entire
- * message length. This breaks the standard set by netlink.
- * The existing auditd daemon assumes this breakage. Fixing
- * this would require co-ordinating a change in the established
- * protocol between the kaudit kernel subsystem and the auditd
- * userspace code.
- */
- nlh->nlmsg_len -= NLMSG_HDRLEN;
-
- if (audit_pid) {
- skb_queue_tail(&audit_skb_queue, ab->skb);
- wake_up_interruptible(&kauditd_wait);
- } else {
- audit_printk_skb(ab->skb);
- }
+ skb_queue_tail(&audit_queue, ab->skb);
+ wake_up_interruptible(&kauditd_wait);
ab->skb = NULL;
}
audit_buffer_free(ab);
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index f84f8d06e1f6..f75154889aa9 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -130,10 +130,9 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
- audit_log_format(ab, "auid=%u ses=%u op=",
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current));
- audit_log_string(ab, op);
+ audit_get_sessionid(current), op);
audit_log_format(ab, " path=");
audit_log_untrustedstring(ab, audit_mark->path);
audit_log_key(ab, rule->filterkey);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 25772476fa4a..055f11b0a50f 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -458,8 +458,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
- audit_log_format(ab, "op=");
- audit_log_string(ab, "remove_rule");
+ audit_log_format(ab, "op=remove_rule");
audit_log_format(ab, " dir=");
audit_log_untrustedstring(ab, rule->tree->pathname);
audit_log_key(ab, rule->filterkey);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 0d302a87f21b..686e068ec3da 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -242,10 +242,9 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
- audit_log_format(ab, "auid=%u ses=%u op=",
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current));
- audit_log_string(ab, op);
+ audit_get_sessionid(current), op);
audit_log_format(ab, " path=");
audit_log_untrustedstring(ab, w->path);
audit_log_key(ab, r->filterkey);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 85d9cac497e4..880519d6cf2a 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -363,6 +363,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
case AUDIT_EXIT:
case AUDIT_SUCCESS:
case AUDIT_INODE:
+ case AUDIT_SESSIONID:
/* bit ops are only useful on syscall args */
if (f->op == Audit_bitmask || f->op == Audit_bittest)
return -EINVAL;
@@ -476,6 +477,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
if (!gid_valid(f->gid))
goto exit_free;
break;
+ case AUDIT_SESSIONID:
case AUDIT_ARCH:
entry->rule.arch_f = f;
break;
@@ -1074,8 +1076,7 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
return;
audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
audit_log_task_context(ab);
- audit_log_format(ab, " op=");
- audit_log_string(ab, action);
+ audit_log_format(ab, " op=%s", action);
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
audit_log_end(ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2cd5256dbff7..cf1fa43512c1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -446,6 +446,7 @@ static int audit_filter_rules(struct task_struct *tsk,
const struct cred *cred;
int i, need_sid = 1;
u32 sid;
+ unsigned int sessionid;
cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation);
@@ -508,6 +509,10 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_FSGID:
result = audit_gid_comparator(cred->fsgid, f->op, f->gid);
break;
+ case AUDIT_SESSIONID:
+ sessionid = audit_get_sessionid(current);
+ result = audit_comparator(sessionid, f->op, f->val);
+ break;
case AUDIT_PERS:
result = audit_comparator(tsk->personality, f->op, f->val);
break;
@@ -1000,7 +1005,7 @@ static void audit_log_execve_info(struct audit_context *context,
long len_rem;
long len_full;
long len_buf;
- long len_abuf;
+ long len_abuf = 0;
long len_tmp;
bool require_data;
bool encode;
@@ -2025,8 +2030,11 @@ int audit_set_loginuid(kuid_t loginuid)
goto out;
/* are we setting or clearing? */
- if (uid_valid(loginuid))
+ if (uid_valid(loginuid)) {
sessionid = (unsigned int)atomic_inc_return(&session_id);
+ if (unlikely(sessionid == (unsigned int)-1))
+ sessionid = (unsigned int)atomic_inc_return(&session_id);
+ }
task->sessionid = sessionid;
task->loginuid = loginuid;
diff --git a/kernel/capability.c b/kernel/capability.c
index 00411c82dac5..4984e1f552eb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
EXPORT_SYMBOL(file_ns_capable);
/**
+ * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
+ * @ns: The user namespace in question
+ * @inode: The inode in question
+ *
+ * Return true if the inode uid and gid are within the namespace.
+ */
+bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
+{
+ return kuid_has_mapping(ns, inode->i_uid) &&
+ kgid_has_mapping(ns, inode->i_gid);
+}
+
+/**
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
* @inode: The inode in question
* @cap: The capability in question
@@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
{
struct user_namespace *ns = current_user_ns();
- return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
- kgid_has_mapping(ns, inode->i_gid);
+ return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
}
EXPORT_SYMBOL(capable_wrt_inode_uidgid);
+
+/**
+ * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
+ * @tsk: The task that may be ptraced
+ * @ns: The user namespace to search for CAP_SYS_PTRACE in
+ *
+ * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
+ * in the specified user namespace.
+ */
+bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
+{
+ int ret = 0; /* An absent tracer adds no restrictions */
+ const struct cred *cred;
+ rcu_read_lock();
+ cred = rcu_dereference(tsk->ptracer_cred);
+ if (cred)
+ ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
+ rcu_read_unlock();
+ return (ret == 0);
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index a439ac429669..869b8ccc00bf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -747,7 +747,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
+ struct user_namespace *user_ns)
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
@@ -787,6 +788,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
if (init_new_context(p, mm))
goto fail_nocontext;
+ mm->user_ns = get_user_ns(user_ns);
return mm;
fail_nocontext:
@@ -832,7 +834,7 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- return mm_init(mm, current);
+ return mm_init(mm, current, current_user_ns());
}
/*
@@ -847,6 +849,7 @@ void __mmdrop(struct mm_struct *mm)
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
+ put_user_ns(mm->user_ns);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1128,7 +1131,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
memcpy(mm, oldmm, sizeof(*mm));
- if (!mm_init(mm, tsk))
+ if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
err = dup_mmap(mm, oldmm);
diff --git a/kernel/padata.c b/kernel/padata.c
index 7848f0566403..05316c9f32da 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -64,15 +64,11 @@ static int padata_cpu_hash(struct parallel_data *pd)
static void padata_parallel_worker(struct work_struct *parallel_work)
{
struct padata_parallel_queue *pqueue;
- struct parallel_data *pd;
- struct padata_instance *pinst;
LIST_HEAD(local_list);
local_bh_disable();
pqueue = container_of(parallel_work,
struct padata_parallel_queue, work);
- pd = pqueue->pd;
- pinst = pd->pinst;
spin_lock(&pqueue->parallel.lock);
list_replace_init(&pqueue->parallel.list, &local_list);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6474f7272ec..49ba7c1ade9d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -27,6 +27,35 @@
#include <linux/cn_proc.h>
#include <linux/compat.h>
+/*
+ * Access another process' address space via ptrace.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return 0;
+
+ if (!tsk->ptrace ||
+ (current != tsk->parent) ||
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptracer_capable(tsk, mm->user_ns))) {
+ mmput(mm);
+ return 0;
+ }
+
+ ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
+ mmput(mm);
+
+ return ret;
+}
+
/*
* ptrace a task: make the debugger its new parent and
@@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
+ rcu_read_lock();
+ child->ptracer_cred = get_cred(__task_cred(new_parent));
+ rcu_read_unlock();
}
/**
@@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
*/
void __ptrace_unlink(struct task_struct *child)
{
+ const struct cred *old_cred;
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
+ old_cred = child->ptracer_cred;
+ child->ptracer_cred = NULL;
+ put_cred(old_cred);
spin_lock(&child->sighand->siglock);
child->ptrace = 0;
@@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
- int dumpable = 0;
+ struct mm_struct *mm;
kuid_t caller_uid;
kgid_t caller_gid;
@@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return -EPERM;
ok:
rcu_read_unlock();
- smp_rmb();
- if (task->mm)
- dumpable = get_dumpable(task->mm);
- rcu_read_lock();
- if (dumpable != SUID_DUMP_USER &&
- !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
- rcu_read_unlock();
- return -EPERM;
- }
- rcu_read_unlock();
+ mm = task->mm;
+ if (mm &&
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptrace_has_cap(mm->user_ns, mode)))
+ return -EPERM;
return security_ptrace_access_check(task, mode);
}
@@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request,
if (seize)
flags |= PT_SEIZED;
- rcu_read_lock();
- if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
- flags |= PT_PTRACE_CAP;
- rcu_read_unlock();
task->ptrace = flags;
__ptrace_link(task, current);
@@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
int this_len, retval;
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
- retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
+ retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
+
if (!retval) {
if (copied)
break;
@@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
if (copy_from_user(buf, src, this_len))
return -EFAULT;
- retval = access_process_vm(tsk, dst, buf, this_len,
+ retval = ptrace_access_vm(tsk, dst, buf, this_len,
FOLL_FORCE | FOLL_WRITE);
if (!retval) {
if (copied)
@@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
unsigned long tmp;
int copied;
- copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+ copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
return -EIO;
return put_user(tmp, (unsigned long __user *)data);
@@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
{
int copied;
- copied = access_process_vm(tsk, addr, &data, sizeof(data),
+ copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
FOLL_FORCE | FOLL_WRITE);
return (copied == sizeof(data)) ? 0 : -EIO;
}
@@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
switch (request) {
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
- ret = access_process_vm(child, addr, &word, sizeof(word),
+ ret = ptrace_access_vm(child, addr, &word, sizeof(word),
FOLL_FORCE);
if (ret != sizeof(word))
ret = -EIO;
@@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
case PTRACE_POKETEXT:
case PTRACE_POKEDATA:
- ret = access_process_vm(child, addr, &data, sizeof(data),
+ ret = ptrace_access_vm(child, addr, &data, sizeof(data),
FOLL_FORCE | FOLL_WRITE);
ret = (ret != sizeof(data) ? -EIO : 0);
break;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index bff9c774987a..f7ce79a46050 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -41,8 +41,7 @@
* outside of a lifetime-guarded section. In general, this
* is only needed for handling filters shared across tasks.
* @prev: points to a previously installed, or inherited, filter
- * @len: the number of instructions in the program
- * @insnsi: the BPF program instructions to evaluate
+ * @prog: the BPF program to evaluate
*
* seccomp_filter objects are organized in a tree linked via the @prev
* pointer. For any task, it appears to be a singly-linked list starting
@@ -168,8 +167,8 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
}
/**
- * seccomp_run_filters - evaluates all seccomp filters against @syscall
- * @syscall: number of the current system call
+ * seccomp_run_filters - evaluates all seccomp filters against @sd
+ * @sd: optional seccomp data to be passed to filters
*
* Returns valid seccomp BPF response codes.
*/