From b4eb4f7f1a979552e0e9f54d0cef4abd0140beef Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 29 Oct 2016 19:04:39 +0300 Subject: audit: less stack usage for /proc/*/loginuid %u requires 10 characters at most not 20. Signed-off-by: Alexey Dobriyan Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index ac0df4dde823..9cdb3e40899a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1228,7 +1228,7 @@ static const struct file_operations proc_oom_score_adj_operations = { }; #ifdef CONFIG_AUDITSYSCALL -#define TMPBUFLEN 21 +#define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { -- cgit From 8443075eacb51df8539916c4170d2fdfe7c81433 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 10 Nov 2016 01:39:49 -0500 Subject: audit: tame initialization warning len_abuf in audit_log_execve_info Tame initialization warning of len_abuf in audit_log_execve_info even though there isn't presently a bug introduced by commit 43761473c254 ("audit: fix a double fetch in audit_log_single_execve_arg()"). Using UNINITIALIZED_VAR instead may mask future bugs. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 5abf1dc1f91c..8c434318ec8d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1000,7 +1000,7 @@ static void audit_log_execve_info(struct audit_context *context, long len_rem; long len_full; long len_buf; - long len_abuf; + long len_abuf = 0; long len_tmp; bool require_data; bool encode; -- cgit From 833fc48d18ce3595990b405ae82a160b33028994 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 10 Nov 2016 01:41:14 -0500 Subject: audit: skip sessionid sentinel value when auto-incrementing The value (unsigned int)-1 is used as a sentinel to indicate the sessionID is unset. Skip this value when the session_id value wraps. Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/auditsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8c434318ec8d..d161b17ce8ce 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2025,8 +2025,11 @@ int audit_set_loginuid(kuid_t loginuid) goto out; /* are we setting or clearing? */ - if (uid_valid(loginuid)) + if (uid_valid(loginuid)) { sessionid = (unsigned int)atomic_inc_return(&session_id); + if (unlikely(sessionid == (unsigned int)-1)) + sessionid = (unsigned int)atomic_inc_return(&session_id); + } task->sessionid = sessionid; task->loginuid = loginuid; -- cgit From c1e8f06d7a0eea232ce0767471e1b4756ccab70a Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Wed, 16 Nov 2016 16:14:33 -0500 Subject: audit: fix formatting of AUDIT_CONFIG_CHANGE events The AUDIT_CONFIG_CHANGE events sometimes use a op= field. The current code logs the value of the field with quotes. This field is documented to not be encoded, so it should not have quotes. Signed-off-by: Steve Grubb Reviewed-by: Richard Guy Briggs [PM: reformatted commit description to make checkpatch.pl happy] Signed-off-by: Paul Moore --- kernel/audit_fsnotify.c | 5 ++--- kernel/audit_tree.c | 3 +-- kernel/audit_watch.c | 5 ++--- kernel/auditfilter.c | 3 +-- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index f84f8d06e1f6..f75154889aa9 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -130,10 +130,9 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "auid=%u ses=%u op=", + audit_log_format(ab, "auid=%u ses=%u op=%s", from_kuid(&init_user_ns, audit_get_loginuid(current)), - audit_get_sessionid(current)); - audit_log_string(ab, op); + audit_get_sessionid(current), op); audit_log_format(ab, " path="); audit_log_untrustedstring(ab, audit_mark->path); audit_log_key(ab, rule->filterkey); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 25772476fa4a..055f11b0a50f 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -458,8 +458,7 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule) ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "op="); - audit_log_string(ab, "remove_rule"); + audit_log_format(ab, "op=remove_rule"); audit_log_format(ab, " dir="); audit_log_untrustedstring(ab, rule->tree->pathname); audit_log_key(ab, rule->filterkey); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 0d302a87f21b..686e068ec3da 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -242,10 +242,9 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); if (unlikely(!ab)) return; - audit_log_format(ab, "auid=%u ses=%u op=", + audit_log_format(ab, "auid=%u ses=%u op=%s", from_kuid(&init_user_ns, audit_get_loginuid(current)), - audit_get_sessionid(current)); - audit_log_string(ab, op); + audit_get_sessionid(current), op); audit_log_format(ab, " path="); audit_log_untrustedstring(ab, w->path); audit_log_key(ab, r->filterkey); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 85d9cac497e4..632e90d1005f 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1074,8 +1074,7 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re return; audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid); audit_log_task_context(ab); - audit_log_format(ab, " op="); - audit_log_string(ab, action); + audit_log_format(ab, " op=%s", action); audit_log_key(ab, rule->filterkey); audit_log_format(ab, " list=%d res=%d", rule->listnr, res); audit_log_end(ab); -- cgit From 8fae47705685fcaa75a1fe4c8c3e18300a702979 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Sun, 20 Nov 2016 16:47:55 -0500 Subject: audit: add support for session ID user filter Define AUDIT_SESSIONID in the uapi and add support for specifying user filters based on the session ID. Also add the new session ID filter to the feature bitmap so userspace knows it is available. https://github.com/linux-audit/audit-kernel/issues/4 RFE: add a session ID filter to the kernel's user filter Signed-off-by: Richard Guy Briggs [PM: combine multiple patches from Richard into this one] Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 5 ++++- kernel/auditfilter.c | 2 ++ kernel/auditsc.c | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 82e8aa59446b..c8dc97bc2c1b 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -254,6 +254,7 @@ #define AUDIT_OBJ_LEV_LOW 22 #define AUDIT_OBJ_LEV_HIGH 23 #define AUDIT_LOGINUID_SET 24 +#define AUDIT_SESSIONID 25 /* Session ID */ /* These are ONLY useful when checking * at syscall exit time (AUDIT_AT_EXIT). */ @@ -329,9 +330,11 @@ enum { #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 #define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH 0x00000004 +#define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \ AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \ - AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH) + AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH | \ + AUDIT_FEATURE_BITMAP_SESSIONID_FILTER) /* deprecated: AUDIT_VERSION_* */ #define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 632e90d1005f..880519d6cf2a 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -363,6 +363,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) case AUDIT_EXIT: case AUDIT_SUCCESS: case AUDIT_INODE: + case AUDIT_SESSIONID: /* bit ops are only useful on syscall args */ if (f->op == Audit_bitmask || f->op == Audit_bittest) return -EINVAL; @@ -476,6 +477,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, if (!gid_valid(f->gid)) goto exit_free; break; + case AUDIT_SESSIONID: case AUDIT_ARCH: entry->rule.arch_f = f; break; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d161b17ce8ce..f78cb1b3fa74 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -446,6 +446,7 @@ static int audit_filter_rules(struct task_struct *tsk, const struct cred *cred; int i, need_sid = 1; u32 sid; + unsigned int sessionid; cred = rcu_dereference_check(tsk->cred, tsk == current || task_creation); @@ -508,6 +509,10 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_FSGID: result = audit_gid_comparator(cred->fsgid, f->op, f->gid); break; + case AUDIT_SESSIONID: + sessionid = audit_get_sessionid(current); + result = audit_comparator(sessionid, f->op, f->val); + break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); break; -- cgit From 55a6f170a413cd8dc7a3a52e5a326e1a87579b4f Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 29 Nov 2016 16:53:23 -0500 Subject: audit: move kaudit thread start from auditd registration to kaudit init (#2) Richard made this change some time ago but Eric backed it out because the rest of the supporting code wasn't ready. In order to move the netlink multicast send to kauditd_thread we need to ensure the kauditd_thread is always running, so restore commit 6ff5e459 ("audit: move kaudit thread start from auditd registration to kaudit init"). Signed-off-by: Richard Guy Briggs [PM: brought forward and merged based on Richard's old patch] Signed-off-by: Paul Moore --- kernel/audit.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index a8a91bd2b2a9..d4c78ba5c4f9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -832,16 +832,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err) return err; - /* As soon as there's any sign of userspace auditd, - * start kauditd to talk to it */ - if (!kauditd_task) { - kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); - if (IS_ERR(kauditd_task)) { - err = PTR_ERR(kauditd_task); - kauditd_task = NULL; - return err; - } - } seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); @@ -1190,6 +1180,10 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); + kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); + if (IS_ERR(kauditd_task)) + return PTR_ERR(kauditd_task); + skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; -- cgit From 6c9255645350ce2aecb7c3cd032d0e93d4a7a71a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:23 -0500 Subject: audit: fixup audit_init() Make sure everything is initialized before we start the kauditd_thread and don't emit the "initialized" record until everything is finished. We also panic with a descriptive message if we can't start the kauditd_thread. Signed-off-by: Paul Moore --- kernel/audit.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index d4c78ba5c4f9..b61642b1934f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1180,21 +1180,23 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); - kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); - if (IS_ERR(kauditd_task)) - return PTR_ERR(kauditd_task); - skb_queue_head_init(&audit_skb_queue); skb_queue_head_init(&audit_skb_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; - audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); - for (i = 0; i < AUDIT_INODE_BUCKETS; i++) INIT_LIST_HEAD(&audit_inode_hash[i]); + kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); + if (IS_ERR(kauditd_task)) { + int err = PTR_ERR(kauditd_task); + panic("audit: failed to start the kauditd thread (%d)\n", err); + } + + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); + return 0; } __initcall(audit_init); -- cgit From 4aa83872d346806d9b54768aa0d1329050542bad Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:24 -0500 Subject: audit: queue netlink multicast sends just like we do for unicast sends Sending audit netlink multicast messages is bad for all the same reasons that sending audit netlink unicast messages is bad, so this patch reworks things so that we don't do the multicast send in audit_log_end(), we do it from the dedicated kauditd_thread thread just as we do for unicast messages. See the GitHub issues below for more information/history: * https://github.com/linux-audit/audit-kernel/issues/23 * https://github.com/linux-audit/audit-kernel/issues/22 Signed-off-by: Paul Moore --- kernel/audit.c | 70 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index b61642b1934f..801247a6c9e5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -511,26 +511,46 @@ static void flush_hold_queue(void) static int kauditd_thread(void *dummy) { + struct sk_buff *skb; + struct nlmsghdr *nlh; + set_freezable(); while (!kthread_should_stop()) { - struct sk_buff *skb; - flush_hold_queue(); skb = skb_dequeue(&audit_skb_queue); - if (skb) { - if (!audit_backlog_limit || - (skb_queue_len(&audit_skb_queue) <= audit_backlog_limit)) - wake_up(&audit_backlog_wait); + nlh = nlmsg_hdr(skb); + + /* if nlh->nlmsg_len is zero then we haven't attempted + * to send the message to userspace yet, if nlmsg_len + * is non-zero then we have attempted to send it to + * the multicast listeners as well as auditd; keep + * trying to send to auditd but don't repeat the + * multicast send */ + if (nlh->nlmsg_len == 0) { + nlh->nlmsg_len = skb->len; + kauditd_send_multicast_skb(skb, GFP_KERNEL); + + /* see the note in kauditd_send_multicast_skb + * regarding the nlh->nlmsg_len value and why + * it differs between the multicast and unicast + * clients */ + nlh->nlmsg_len -= NLMSG_HDRLEN; + } + if (audit_pid) kauditd_send_skb(skb); else audit_printk_skb(skb); - continue; + } else { + /* we have flushed the backlog so wake everyone up who + * is blocked and go to sleep until we have something + * in the backlog again */ + wake_up(&audit_backlog_wait); + wait_event_freezable(kauditd_wait, + skb_queue_len(&audit_skb_queue)); } - - wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue)); } return 0; } @@ -1969,10 +1989,10 @@ out: * audit_log_end - end one audit record * @ab: the audit_buffer * - * netlink_unicast() cannot be called inside an irq context because it blocks - * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed - * on a queue and a tasklet is scheduled to remove them from the queue outside - * the irq context. May be called in any context. + * We can not do a netlink send inside an irq context because it blocks (last + * arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed on a + * queue and a tasklet is scheduled to remove them from the queue outside the + * irq context. May be called in any context. */ void audit_log_end(struct audit_buffer *ab) { @@ -1981,28 +2001,8 @@ void audit_log_end(struct audit_buffer *ab) if (!audit_rate_check()) { audit_log_lost("rate limit exceeded"); } else { - struct nlmsghdr *nlh = nlmsg_hdr(ab->skb); - - nlh->nlmsg_len = ab->skb->len; - kauditd_send_multicast_skb(ab->skb, ab->gfp_mask); - - /* - * The original kaudit unicast socket sends up messages with - * nlmsg_len set to the payload length rather than the entire - * message length. This breaks the standard set by netlink. - * The existing auditd daemon assumes this breakage. Fixing - * this would require co-ordinating a change in the established - * protocol between the kaudit kernel subsystem and the auditd - * userspace code. - */ - nlh->nlmsg_len -= NLMSG_HDRLEN; - - if (audit_pid) { - skb_queue_tail(&audit_skb_queue, ab->skb); - wake_up_interruptible(&kauditd_wait); - } else { - audit_printk_skb(ab->skb); - } + skb_queue_tail(&audit_skb_queue, ab->skb); + wake_up_interruptible(&kauditd_wait); ab->skb = NULL; } audit_buffer_free(ab); -- cgit From af8b824f283de5acc9b9ae8dbb60e4adacff721b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:24 -0500 Subject: audit: rename the queues and kauditd related functions The audit queue names can be shortened and the record sending helpers associated with the kauditd task could be named better, do these small cleanups now to make life easier once we start reworking the queues and kauditd code. Signed-off-by: Paul Moore --- kernel/audit.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 801247a6c9e5..6ac1df116c0b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -138,9 +138,9 @@ static DEFINE_SPINLOCK(audit_freelist_lock); static int audit_freelist_count; static LIST_HEAD(audit_freelist); -static struct sk_buff_head audit_skb_queue; +static struct sk_buff_head audit_queue; /* queue of skbs to send to auditd when/if it comes back */ -static struct sk_buff_head audit_skb_hold_queue; +static struct sk_buff_head audit_hold_queue; static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); @@ -377,8 +377,8 @@ static void audit_hold_skb(struct sk_buff *skb) { if (audit_default && (!audit_backlog_limit || - skb_queue_len(&audit_skb_hold_queue) < audit_backlog_limit)) - skb_queue_tail(&audit_skb_hold_queue, skb); + skb_queue_len(&audit_hold_queue) < audit_backlog_limit)) + skb_queue_tail(&audit_hold_queue, skb); else kfree_skb(skb); } @@ -387,7 +387,7 @@ static void audit_hold_skb(struct sk_buff *skb) * For one reason or another this nlh isn't getting delivered to the userspace * audit daemon, just send it to printk. */ -static void audit_printk_skb(struct sk_buff *skb) +static void kauditd_printk_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); char *data = nlmsg_data(nlh); @@ -402,7 +402,7 @@ static void audit_printk_skb(struct sk_buff *skb) audit_hold_skb(skb); } -static void kauditd_send_skb(struct sk_buff *skb) +static void kauditd_send_unicast_skb(struct sk_buff *skb) { int err; int attempts = 0; @@ -493,13 +493,13 @@ static void flush_hold_queue(void) if (!audit_default || !audit_pid) return; - skb = skb_dequeue(&audit_skb_hold_queue); + skb = skb_dequeue(&audit_hold_queue); if (likely(!skb)) return; while (skb && audit_pid) { - kauditd_send_skb(skb); - skb = skb_dequeue(&audit_skb_hold_queue); + kauditd_send_unicast_skb(skb); + skb = skb_dequeue(&audit_hold_queue); } /* @@ -518,7 +518,7 @@ static int kauditd_thread(void *dummy) while (!kthread_should_stop()) { flush_hold_queue(); - skb = skb_dequeue(&audit_skb_queue); + skb = skb_dequeue(&audit_queue); if (skb) { nlh = nlmsg_hdr(skb); @@ -540,16 +540,16 @@ static int kauditd_thread(void *dummy) } if (audit_pid) - kauditd_send_skb(skb); + kauditd_send_unicast_skb(skb); else - audit_printk_skb(skb); + kauditd_printk_skb(skb); } else { /* we have flushed the backlog so wake everyone up who * is blocked and go to sleep until we have something * in the backlog again */ wake_up(&audit_backlog_wait); wait_event_freezable(kauditd_wait, - skb_queue_len(&audit_skb_queue)); + skb_queue_len(&audit_queue)); } } return 0; @@ -865,7 +865,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); - s.backlog = skb_queue_len(&audit_skb_queue); + s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; s.backlog_wait_time = audit_backlog_wait_time_master; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); @@ -1200,8 +1200,8 @@ static int __init audit_init(void) audit_default ? "enabled" : "disabled"); register_pernet_subsys(&audit_net_ops); - skb_queue_head_init(&audit_skb_queue); - skb_queue_head_init(&audit_skb_hold_queue); + skb_queue_head_init(&audit_queue); + skb_queue_head_init(&audit_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; audit_ever_enabled |= !!audit_default; @@ -1357,7 +1357,7 @@ static long wait_for_auditd(long sleep_time) DECLARE_WAITQUEUE(wait, current); if (audit_backlog_limit && - skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { + skb_queue_len(&audit_queue) > audit_backlog_limit) { add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); sleep_time = schedule_timeout(sleep_time); @@ -1406,7 +1406,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } while (audit_backlog_limit - && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { + && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) { if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) { long sleep_time; @@ -1419,7 +1419,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } if (audit_rate_check() && printk_ratelimit()) pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", - skb_queue_len(&audit_skb_queue), + skb_queue_len(&audit_queue), audit_backlog_limit); audit_log_lost("backlog limit exceeded"); audit_backlog_wait_time = 0; @@ -2001,7 +2001,7 @@ void audit_log_end(struct audit_buffer *ab) if (!audit_rate_check()) { audit_log_lost("rate limit exceeded"); } else { - skb_queue_tail(&audit_skb_queue, ab->skb); + skb_queue_tail(&audit_queue, ab->skb); wake_up_interruptible(&kauditd_wait); ab->skb = NULL; } -- cgit From c6480207fdf7b61de216ee23e93eac0a6878fa74 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:25 -0500 Subject: audit: rework the audit queue handling The audit record backlog queue has always been a bit of a mess, and the moving the multicast send into kauditd_thread() from audit_log_end() only makes things worse. This patch attempts to fix the backlog queue with a better design that should hold up better under load and have less of a performance impact at syscall invocation time. While it looks like there is a log going on in this patch, the main change is the move from a single backlog queue to three queues: * A queue for holding records generated from audit_log_end() that haven't been consumed by kauditd_thread() (audit_queue). * A queue for holding records that have been sent via multicast but had a temporary failure when sending via unicast and need a resend (audit_retry_queue). * A queue for holding records that haven't been sent via unicast because no one is listening (audit_hold_queue). Special care is taken in this patch to ensure that the proper record ordering is preserved, e.g. we send everything in the hold queue first, then the retry queue, and finally the main queue. Signed-off-by: Paul Moore --- kernel/audit.c | 347 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 121 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 6ac1df116c0b..f4056bc331fc 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -138,11 +138,18 @@ static DEFINE_SPINLOCK(audit_freelist_lock); static int audit_freelist_count; static LIST_HEAD(audit_freelist); +/* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; -/* queue of skbs to send to auditd when/if it comes back */ +/* queue msgs due to temporary unicast send problems */ +static struct sk_buff_head audit_retry_queue; +/* queue msgs waiting for new auditd connection */ static struct sk_buff_head audit_hold_queue; + +/* queue servicing thread */ static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); + +/* waitqueue for callers who are blocked on the audit backlog */ static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); static struct audit_features af = {.vers = AUDIT_FEATURE_VERSION, @@ -364,25 +371,6 @@ static int audit_set_failure(u32 state) return audit_do_config_change("audit_failure", &audit_failure, state); } -/* - * Queue skbs to be sent to auditd when/if it comes back. These skbs should - * already have been sent via prink/syslog and so if these messages are dropped - * it is not a huge concern since we already passed the audit_log_lost() - * notification and stuff. This is just nice to get audit messages during - * boot before auditd is running or messages generated while auditd is stopped. - * This only holds messages is audit_default is set, aka booting with audit=1 - * or building your kernel that way. - */ -static void audit_hold_skb(struct sk_buff *skb) -{ - if (audit_default && - (!audit_backlog_limit || - skb_queue_len(&audit_hold_queue) < audit_backlog_limit)) - skb_queue_tail(&audit_hold_queue, skb); - else - kfree_skb(skb); -} - /* * For one reason or another this nlh isn't getting delivered to the userspace * audit daemon, just send it to printk. @@ -398,58 +386,114 @@ static void kauditd_printk_skb(struct sk_buff *skb) else audit_log_lost("printk limit exceeded"); } +} + +/** + * kauditd_hold_skb - Queue an audit record, waiting for auditd + * @skb: audit record + * + * Description: + * Queue the audit record, waiting for an instance of auditd. When this + * function is called we haven't given up yet on sending the record, but things + * are not looking good. The first thing we want to do is try to write the + * record via printk and then see if we want to try and hold on to the record + * and queue it, if we have room. If we want to hold on to the record, but we + * don't have room, record a record lost message. + */ +static void kauditd_hold_skb(struct sk_buff *skb) +{ + /* at this point it is uncertain if we will ever send this to auditd so + * try to send the message via printk before we go any further */ + kauditd_printk_skb(skb); + + /* can we just silently drop the message? */ + if (!audit_default) { + kfree_skb(skb); + return; + } + + /* if we have room, queue the message */ + if (!audit_backlog_limit || + skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_hold_queue, skb); + return; + } - audit_hold_skb(skb); + /* we have no other options - drop the message */ + audit_log_lost("kauditd hold queue overflow"); + kfree_skb(skb); } -static void kauditd_send_unicast_skb(struct sk_buff *skb) +/** + * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd + * @skb: audit record + * + * Description: + * Not as serious as kauditd_hold_skb() as we still have a connected auditd, + * but for some reason we are having problems sending it audit records so + * queue the given record and attempt to resend. + */ +static void kauditd_retry_skb(struct sk_buff *skb) { - int err; - int attempts = 0; -#define AUDITD_RETRIES 5 + /* NOTE: because records should only live in the retry queue for a + * short period of time, before either being sent or moved to the hold + * queue, we don't currently enforce a limit on this queue */ + skb_queue_tail(&audit_retry_queue, skb); +} + +/** + * auditd_reset - Disconnect the auditd connection + * + * Description: + * Break the auditd/kauditd connection and move all the records in the retry + * queue into the hold queue in case auditd reconnects. + */ +static void auditd_reset(void) +{ + struct sk_buff *skb; + + /* break the connection */ + audit_pid = 0; + audit_sock = NULL; + + /* flush all of the retry queue to the hold queue */ + while ((skb = skb_dequeue(&audit_retry_queue))) + kauditd_hold_skb(skb); +} + +/** + * kauditd_send_unicast_skb - Send a record via unicast to auditd + * @skb: audit record + */ +static int kauditd_send_unicast_skb(struct sk_buff *skb) +{ + int rc; -restart: - /* take a reference in case we can't send it and we want to hold it */ + /* get an extra skb reference in case we fail to send */ skb_get(skb); - err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); - if (err < 0) { - pr_err("netlink_unicast sending to audit_pid=%d returned error: %d\n", - audit_pid, err); - if (audit_pid) { - if (err == -ECONNREFUSED || err == -EPERM - || ++attempts >= AUDITD_RETRIES) { - char s[32]; - - snprintf(s, sizeof(s), "audit_pid=%d reset", audit_pid); - audit_log_lost(s); - audit_pid = 0; - audit_sock = NULL; - } else { - pr_warn("re-scheduling(#%d) write to audit_pid=%d\n", - attempts, audit_pid); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - goto restart; - } - } - /* we might get lucky and get this in the next auditd */ - audit_hold_skb(skb); - } else - /* drop the extra reference if sent ok */ + rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); + if (rc >= 0) { consume_skb(skb); + rc = 0; + } + + return rc; } /* - * kauditd_send_multicast_skb - send the skb to multicast userspace listeners + * kauditd_send_multicast_skb - Send a record to any multicast listeners + * @skb: audit record * + * Description: * This function doesn't consume an skb as might be expected since it has to * copy it anyways. */ -static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) +static void kauditd_send_multicast_skb(struct sk_buff *skb) { - struct sk_buff *copy; - struct audit_net *aunet = net_generic(&init_net, audit_net_id); - struct sock *sock = aunet->nlsk; + struct sk_buff *copy; + struct audit_net *aunet = net_generic(&init_net, audit_net_id); + struct sock *sock = aunet->nlsk; + struct nlmsghdr *nlh; if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) return; @@ -464,94 +508,155 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask) * no reason for new multicast clients to continue with this * non-compliance. */ - copy = skb_copy(skb, gfp_mask); + copy = skb_copy(skb, GFP_KERNEL); if (!copy) return; + nlh = nlmsg_hdr(copy); + nlh->nlmsg_len = skb->len; - nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask); + nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL); } -/* - * flush_hold_queue - empty the hold queue if auditd appears - * - * If auditd just started, drain the queue of messages already - * sent to syslog/printk. Remember loss here is ok. We already - * called audit_log_lost() if it didn't go out normally. so the - * race between the skb_dequeue and the next check for audit_pid - * doesn't matter. +/** + * kauditd_wake_condition - Return true when it is time to wake kauditd_thread * - * If you ever find kauditd to be too slow we can get a perf win - * by doing our own locking and keeping better track if there - * are messages in this queue. I don't see the need now, but - * in 5 years when I want to play with this again I'll see this - * note and still have no friggin idea what i'm thinking today. + * Description: + * This function is for use by the wait_event_freezable() call in + * kauditd_thread(). */ -static void flush_hold_queue(void) +static int kauditd_wake_condition(void) { - struct sk_buff *skb; - - if (!audit_default || !audit_pid) - return; - - skb = skb_dequeue(&audit_hold_queue); - if (likely(!skb)) - return; + static int pid_last = 0; + int rc; + int pid = audit_pid; - while (skb && audit_pid) { - kauditd_send_unicast_skb(skb); - skb = skb_dequeue(&audit_hold_queue); - } + /* wake on new messages or a change in the connected auditd */ + rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last); + if (rc) + pid_last = pid; - /* - * if auditd just disappeared but we - * dequeued an skb we need to drop ref - */ - consume_skb(skb); + return rc; } static int kauditd_thread(void *dummy) { + int rc; + int auditd = 0; + int reschedule = 0; struct sk_buff *skb; struct nlmsghdr *nlh; +#define UNICAST_RETRIES 5 +#define AUDITD_BAD(x,y) \ + ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES) + + /* NOTE: we do invalidate the auditd connection flag on any sending + * errors, but we only "restore" the connection flag at specific places + * in the loop in order to help ensure proper ordering of audit + * records */ + set_freezable(); while (!kthread_should_stop()) { - flush_hold_queue(); + /* NOTE: possible area for future improvement is to look at + * the hold and retry queues, since only this thread + * has access to these queues we might be able to do + * our own queuing and skip some/all of the locking */ + + /* NOTE: it might be a fun experiment to split the hold and + * retry queue handling to another thread, but the + * synchronization issues and other overhead might kill + * any performance gains */ + + /* attempt to flush the hold queue */ + while (auditd && (skb = skb_dequeue(&audit_hold_queue))) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + /* requeue to the same spot */ + skb_queue_head(&audit_hold_queue, skb); + + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + auditd_reset(); + reschedule = 0; + } + } else + /* we were able to send successfully */ + reschedule = 0; + } + + /* attempt to flush the retry queue */ + while (auditd && (skb = skb_dequeue(&audit_retry_queue))) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + kauditd_hold_skb(skb); + auditd_reset(); + reschedule = 0; + } else + /* temporary problem (we hope), queue + * to the same spot and retry */ + skb_queue_head(&audit_retry_queue, skb); + } else + /* we were able to send successfully */ + reschedule = 0; + } + /* standard queue processing, try to be as quick as possible */ +quick_loop: skb = skb_dequeue(&audit_queue); if (skb) { + /* setup the netlink header, see the comments in + * kauditd_send_multicast_skb() for length quirks */ nlh = nlmsg_hdr(skb); - - /* if nlh->nlmsg_len is zero then we haven't attempted - * to send the message to userspace yet, if nlmsg_len - * is non-zero then we have attempted to send it to - * the multicast listeners as well as auditd; keep - * trying to send to auditd but don't repeat the - * multicast send */ - if (nlh->nlmsg_len == 0) { - nlh->nlmsg_len = skb->len; - kauditd_send_multicast_skb(skb, GFP_KERNEL); - - /* see the note in kauditd_send_multicast_skb - * regarding the nlh->nlmsg_len value and why - * it differs between the multicast and unicast - * clients */ - nlh->nlmsg_len -= NLMSG_HDRLEN; - } - - if (audit_pid) - kauditd_send_unicast_skb(skb); + nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; + + /* attempt to send to any multicast listeners */ + kauditd_send_multicast_skb(skb); + + /* attempt to send to auditd, queue on failure */ + if (auditd) { + rc = kauditd_send_unicast_skb(skb); + if (rc) { + auditd = 0; + if (AUDITD_BAD(rc, reschedule)) { + auditd_reset(); + reschedule = 0; + } + + /* move to the retry queue */ + kauditd_retry_skb(skb); + } else + /* everything is working so go fast! */ + goto quick_loop; + } else if (reschedule) + /* we are currently having problems, move to + * the retry queue */ + kauditd_retry_skb(skb); else - kauditd_printk_skb(skb); + /* dump the message via printk and hold it */ + kauditd_hold_skb(skb); } else { - /* we have flushed the backlog so wake everyone up who - * is blocked and go to sleep until we have something - * in the backlog again */ + /* we have flushed the backlog so wake everyone */ wake_up(&audit_backlog_wait); - wait_event_freezable(kauditd_wait, - skb_queue_len(&audit_queue)); + + /* if everything is okay with auditd (if present), go + * to sleep until there is something new in the queue + * or we have a change in the connected auditd; + * otherwise simply reschedule to give things a chance + * to recover */ + if (reschedule) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } else + wait_event_freezable(kauditd_wait, + kauditd_wake_condition()); + + /* update the auditd connection status */ + auditd = (audit_pid ? 1 : 0); } } + return 0; } @@ -616,6 +721,7 @@ static int audit_send_reply_thread(void *arg) kfree(reply); return 0; } + /** * audit_send_reply - send an audit reply message via netlink * @request_skb: skb of request we are replying to (used to target the reply) @@ -1171,10 +1277,8 @@ static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); struct sock *sock = aunet->nlsk; - if (sock == audit_sock) { - audit_pid = 0; - audit_sock = NULL; - } + if (sock == audit_sock) + auditd_reset(); RCU_INIT_POINTER(aunet->nlsk, NULL); synchronize_net(); @@ -1201,6 +1305,7 @@ static int __init audit_init(void) register_pernet_subsys(&audit_net_ops); skb_queue_head_init(&audit_queue); + skb_queue_head_init(&audit_retry_queue); skb_queue_head_init(&audit_hold_queue); audit_initialized = AUDIT_INITIALIZED; audit_enabled = audit_default; -- cgit From 3197542482df22c2a131d4a813280bd7c54cedf5 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:25 -0500 Subject: audit: rework audit_log_start() The backlog queue handling in audit_log_start() is a little odd with some questionable design decisions, this patch attempts to rectify this with the following changes: * Never make auditd wait, ignore any backlog limits as we need auditd awake so it can drain the backlog queue. * When we hit a backlog limit and start dropping records, don't wake all the tasks sleeping on the backlog, that's silly. Instead, let kauditd_thread() take care of waking everyone once it has had a chance to drain the backlog queue. * Don't keep a global backlog timeout countdown, make it per-task. A per-task timer means we won't have all the sleeping tasks waking at the same time and hammering on an already stressed backlog queue. Signed-off-by: Paul Moore --- kernel/audit.c | 92 +++++++++++++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 56 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index f4056bc331fc..e23ce6ce101f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -107,7 +107,6 @@ static u32 audit_rate_limit; * When set to zero, this means unlimited. */ static u32 audit_backlog_limit = 64; #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ) -static u32 audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME; static u32 audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME; /* The identity of the user shutting down the audit system. */ @@ -345,7 +344,7 @@ static int audit_set_backlog_limit(u32 limit) static int audit_set_backlog_wait_time(u32 timeout) { return audit_do_config_change("audit_backlog_wait_time", - &audit_backlog_wait_time_master, timeout); + &audit_backlog_wait_time, timeout); } static int audit_set_enabled(u32 state) @@ -973,7 +972,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.lost = atomic_read(&audit_lost); s.backlog = skb_queue_len(&audit_queue); s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; - s.backlog_wait_time = audit_backlog_wait_time_master; + s.backlog_wait_time = audit_backlog_wait_time; audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } @@ -1454,24 +1453,6 @@ static inline void audit_get_stamp(struct audit_context *ctx, } } -/* - * Wait for auditd to drain the queue a little - */ -static long wait_for_auditd(long sleep_time) -{ - DECLARE_WAITQUEUE(wait, current); - - if (audit_backlog_limit && - skb_queue_len(&audit_queue) > audit_backlog_limit) { - add_wait_queue_exclusive(&audit_backlog_wait, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - sleep_time = schedule_timeout(sleep_time); - remove_wait_queue(&audit_backlog_wait, &wait); - } - - return sleep_time; -} - /** * audit_log_start - obtain an audit buffer * @ctx: audit_context (may be NULL) @@ -1490,12 +1471,9 @@ static long wait_for_auditd(long sleep_time) struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, int type) { - struct audit_buffer *ab = NULL; - struct timespec t; - unsigned int uninitialized_var(serial); - int reserve = 5; /* Allow atomic callers to go up to five - entries over the normal backlog limit */ - unsigned long timeout_start = jiffies; + struct audit_buffer *ab; + struct timespec t; + unsigned int uninitialized_var(serial); if (audit_initialized != AUDIT_INITIALIZED) return NULL; @@ -1503,38 +1481,40 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) return NULL; - if (gfp_mask & __GFP_DIRECT_RECLAIM) { - if (audit_pid && audit_pid == current->tgid) - gfp_mask &= ~__GFP_DIRECT_RECLAIM; - else - reserve = 0; - } - - while (audit_backlog_limit - && skb_queue_len(&audit_queue) > audit_backlog_limit + reserve) { - if (gfp_mask & __GFP_DIRECT_RECLAIM && audit_backlog_wait_time) { - long sleep_time; - - sleep_time = timeout_start + audit_backlog_wait_time - jiffies; - if (sleep_time > 0) { - sleep_time = wait_for_auditd(sleep_time); - if (sleep_time > 0) - continue; + /* don't ever fail/sleep on auditd since we need auditd to drain the + * queue; also, when we are checking for auditd, compare PIDs using + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using + * a PID anchored in the caller's namespace */ + if (!(audit_pid && audit_pid == task_tgid_vnr(current))) { + long sleep_time = audit_backlog_wait_time; + + while (audit_backlog_limit && + (skb_queue_len(&audit_queue) > audit_backlog_limit)) { + /* wake kauditd to try and flush the queue */ + wake_up_interruptible(&kauditd_wait); + + /* sleep if we are allowed and we haven't exhausted our + * backlog wait limit */ + if ((gfp_mask & __GFP_DIRECT_RECLAIM) && + (sleep_time > 0)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&audit_backlog_wait, + &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + sleep_time = schedule_timeout(sleep_time); + remove_wait_queue(&audit_backlog_wait, &wait); + } else { + if (audit_rate_check() && printk_ratelimit()) + pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", + skb_queue_len(&audit_queue), + audit_backlog_limit); + audit_log_lost("backlog limit exceeded"); + return NULL; } } - if (audit_rate_check() && printk_ratelimit()) - pr_warn("audit_backlog=%d > audit_backlog_limit=%d\n", - skb_queue_len(&audit_queue), - audit_backlog_limit); - audit_log_lost("backlog limit exceeded"); - audit_backlog_wait_time = 0; - wake_up(&audit_backlog_wait); - return NULL; } - if (!reserve && !audit_backlog_wait_time) - audit_backlog_wait_time = audit_backlog_wait_time_master; - ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); @@ -1542,9 +1522,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } audit_get_stamp(ab->ctx, &t, &serial); - audit_log_format(ab, "audit(%lu.%03lu:%u): ", t.tv_sec, t.tv_nsec/1000000, serial); + return ab; } -- cgit From e1d166212894d9d959a601c4802882b877bb420a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:25 -0500 Subject: audit: wake up kauditd_thread after auditd registers This patch was suggested by Richard Briggs back in 2015, see the link to the mail archive below. Unfortunately, that patch is no longer even remotely valid due to other changes to the code. * https://www.redhat.com/archives/linux-audit/2015-October/msg00075.html Suggested-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/audit.c b/kernel/audit.c index e23ce6ce101f..0572e5dcfda7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1009,6 +1009,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_pid = new_pid; audit_nlk_portid = NETLINK_CB(skb).portid; audit_sock = skb->sk; + wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); -- cgit From 6c54e7899693dee3db67ea996e9be0e10f67920f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:26 -0500 Subject: audit: handle a clean auditd shutdown with grace When auditd stops cleanly it sets 'auditd_pid' to 0 with an AUDIT_SET message, in this case we should reset our backlog queues via the auditd_reset() function. This patch also adds a 'auditd_pid' check to the top of kauditd_send_unicast_skb() so we can fail quicker. Signed-off-by: Paul Moore --- kernel/audit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/audit.c b/kernel/audit.c index 0572e5dcfda7..b447a6b1fdc8 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -468,6 +468,10 @@ static int kauditd_send_unicast_skb(struct sk_buff *skb) { int rc; + /* if we know nothing is connected, don't even try the netlink call */ + if (!audit_pid) + return -ECONNREFUSED; + /* get an extra skb reference in case we fail to send */ skb_get(skb); rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); @@ -1009,6 +1013,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_pid = new_pid; audit_nlk_portid = NETLINK_CB(skb).portid; audit_sock = skb->sk; + if (!new_pid) + auditd_reset(); wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { -- cgit From a09cfa470817ac086cf68418da13a2b91c2744ef Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:53:26 -0500 Subject: audit: don't ever sleep on a command record/message Sleeping on a command record/message in audit_log_start() could slow something, e.g. auditd, from doing something important, e.g. clean shutdown, which could present problems on a heavily loaded system. This patch allows tasks to bypass any queue restrictions if they are logging a command record/message. Signed-off-by: Paul Moore --- kernel/audit.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index b447a6b1fdc8..f20eee0db7e6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1488,11 +1488,19 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) return NULL; - /* don't ever fail/sleep on auditd since we need auditd to drain the - * queue; also, when we are checking for auditd, compare PIDs using - * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() using - * a PID anchored in the caller's namespace */ - if (!(audit_pid && audit_pid == task_tgid_vnr(current))) { + /* don't ever fail/sleep on these two conditions: + * 1. auditd generated record - since we need auditd to drain the + * queue; also, when we are checking for auditd, compare PIDs using + * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() + * using a PID anchored in the caller's namespace + * 2. audit command message - record types 1000 through 1099 inclusive + * are command messages/records used to manage the kernel subsystem + * and the audit userspace, blocking on these messages could cause + * problems under load so don't do it (note: not all of these + * command types are valid as record types, but it is quicker to + * just check two ints than a series of ints in a if/switch stmt) */ + if (!((audit_pid && audit_pid == task_tgid_vnr(current)) || + (type >= 1000 && type <= 1099))) { long sleep_time = audit_backlog_wait_time; while (audit_backlog_limit && -- cgit From fba143c66abb81307a450679f38ab953fe96a413 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Nov 2016 16:57:48 -0500 Subject: netns: avoid disabling irq for netns id Bring back commit bc51dddf98c9 ("netns: avoid disabling irq for netns id") now that we've fixed some audit multicast issues that caused problems with original attempt. Additional information, and history, can be found in the links below: * https://github.com/linux-audit/audit-kernel/issues/22 * https://github.com/linux-audit/audit-kernel/issues/23 Signed-off-by: Cong Wang Signed-off-by: Paul Moore --- net/core/net_namespace.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b629b1..10608dd1489f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -213,14 +213,13 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { - unsigned long flags; bool alloc; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; @@ -230,12 +229,11 @@ EXPORT_SYMBOL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { - unsigned long flags; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); return id; } @@ -249,18 +247,17 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { - unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); return peer; @@ -404,17 +401,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_irq(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_irq(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_irq(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_irq(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } rtnl_unlock(); @@ -542,7 +539,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; - unsigned long flags; struct net *peer; int nsid, err; @@ -563,15 +559,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -693,11 +689,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; - unsigned long flags; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); cb->args[0] = net_cb.idx; return skb->len; -- cgit From 533c7b69c764ad5febb3e716899f43a75564fcab Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 13 Dec 2016 10:03:01 -0500 Subject: audit: use proper refcount locking on audit_sock Resetting audit_sock appears to be racy. audit_sock was being copied and dereferenced without using a refcount on the source sock. Bump the refcount on the underlying sock when we store a refrence in audit_sock and release it when we reset audit_sock. audit_sock modification needs the audit_cmd_mutex. See: https://lkml.org/lkml/2016/11/26/232 Thanks to Eric Dumazet and Cong Wang on ideas how to fix it. Signed-off-by: Richard Guy Briggs Reviewed-by: Cong Wang [PM: fixed the comment block text formatting for auditd_reset()] Signed-off-by: Paul Moore --- kernel/audit.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index f20eee0db7e6..41017685f9f2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -445,15 +445,20 @@ static void kauditd_retry_skb(struct sk_buff *skb) * * Description: * Break the auditd/kauditd connection and move all the records in the retry - * queue into the hold queue in case auditd reconnects. + * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex + * must be held when calling this function. */ static void auditd_reset(void) { struct sk_buff *skb; /* break the connection */ + if (audit_sock) { + sock_put(audit_sock); + audit_sock = NULL; + } audit_pid = 0; - audit_sock = NULL; + audit_nlk_portid = 0; /* flush all of the retry queue to the hold queue */ while ((skb = skb_dequeue(&audit_retry_queue))) @@ -579,7 +584,9 @@ static int kauditd_thread(void *dummy) auditd = 0; if (AUDITD_BAD(rc, reschedule)) { + mutex_lock(&audit_cmd_mutex); auditd_reset(); + mutex_unlock(&audit_cmd_mutex); reschedule = 0; } } else @@ -594,7 +601,9 @@ static int kauditd_thread(void *dummy) auditd = 0; if (AUDITD_BAD(rc, reschedule)) { kauditd_hold_skb(skb); + mutex_lock(&audit_cmd_mutex); auditd_reset(); + mutex_unlock(&audit_cmd_mutex); reschedule = 0; } else /* temporary problem (we hope), queue @@ -623,7 +632,9 @@ quick_loop: if (rc) { auditd = 0; if (AUDITD_BAD(rc, reschedule)) { + mutex_lock(&audit_cmd_mutex); auditd_reset(); + mutex_unlock(&audit_cmd_mutex); reschedule = 0; } @@ -1010,11 +1021,16 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, audit_pid, 1); - audit_pid = new_pid; - audit_nlk_portid = NETLINK_CB(skb).portid; - audit_sock = skb->sk; - if (!new_pid) + if (new_pid) { + if (audit_sock) + sock_put(audit_sock); + audit_pid = new_pid; + audit_nlk_portid = NETLINK_CB(skb).portid; + sock_hold(skb->sk); + audit_sock = skb->sk; + } else { auditd_reset(); + } wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { @@ -1283,8 +1299,10 @@ static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); struct sock *sock = aunet->nlsk; + mutex_lock(&audit_cmd_mutex); if (sock == audit_sock) auditd_reset(); + mutex_unlock(&audit_cmd_mutex); RCU_INIT_POINTER(aunet->nlsk, NULL); synchronize_net(); -- cgit