From 1c97be677f72b3c338312aecd36d8fff20322f32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 20 Sep 2015 22:02:17 -0700 Subject: list: Use WRITE_ONCE() when adding to lists and hlists Code that does lockless emptiness testing of non-RCU lists is relying on the list-addition code to write the list head's ->next pointer atomically. This commit therefore adds WRITE_ONCE() to list-addition pointer stores that could affect the head's ->next pointer. Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney --- include/linux/list.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 993395a2e55c..d7e31fe398b3 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -42,7 +42,7 @@ static inline void __list_add(struct list_head *new, next->prev = new; new->next = next; new->prev = prev; - prev->next = new; + WRITE_ONCE(prev->next, new); } #else extern void __list_add(struct list_head *new, @@ -642,7 +642,7 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->next = first; if (first) first->pprev = &n->next; - h->first = n; + WRITE_ONCE(h->first, n); n->pprev = &h->first; } @@ -653,14 +653,14 @@ static inline void hlist_add_before(struct hlist_node *n, n->pprev = next->pprev; n->next = next; next->pprev = &n->next; - *(n->pprev) = n; + WRITE_ONCE(*(n->pprev), n); } static inline void hlist_add_behind(struct hlist_node *n, struct hlist_node *prev) { n->next = prev->next; - prev->next = n; + WRITE_ONCE(prev->next, n); n->pprev = &prev->next; if (n->next) -- cgit From 1658d35ead5d8dd76f2b2d6ad0e32c08d123faa2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 20 Sep 2015 17:03:16 -0700 Subject: list: Use READ_ONCE() when testing for empty lists Most of the list-empty-check macros (list_empty(), hlist_empty(), hlist_bl_empty(), hlist_nulls_empty(), and hlist_nulls_empty()) use an unadorned load to check the list header. Given that these macros are sometimes invoked without the protection of a lock, this is not sufficient. This commit therefore adds READ_ONCE() calls to them. This commit does not touch llist_empty() because it already has the needed ACCESS_ONCE(). Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney --- include/linux/list.h | 4 ++-- include/linux/list_bl.h | 2 +- include/linux/list_nulls.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index d7e31fe398b3..06c2d887a918 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -186,7 +186,7 @@ static inline int list_is_last(const struct list_head *list, */ static inline int list_empty(const struct list_head *head) { - return head->next == head; + return READ_ONCE(head->next) == head; } /** @@ -608,7 +608,7 @@ static inline int hlist_unhashed(const struct hlist_node *h) static inline int hlist_empty(const struct hlist_head *h) { - return !h->first; + return !READ_ONCE(h->first); } static inline void __hlist_del(struct hlist_node *n) diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 8132214e8efd..ee7229a6c06a 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -70,7 +70,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h, static inline int hlist_bl_empty(const struct hlist_bl_head *h) { - return !((unsigned long)h->first & ~LIST_BL_LOCKMASK); + return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK); } static inline void hlist_bl_add_head(struct hlist_bl_node *n, diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 444d2b1313bd..b01fe1009084 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -57,7 +57,7 @@ static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) { - return is_a_nulls(h->first); + return is_a_nulls(READ_ONCE(h->first)); } static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, -- cgit From 5a9be7c628c5273f84abacebf7faf2488376e0f0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Nov 2015 15:44:06 -0800 Subject: rcu: Add rcu_normal kernel parameter to suppress expediting Although expedited grace periods can be quite useful, and although their OS jitter has been greatly reduced, they can still pose problems for extreme real-time workloads. This commit therefore adds a rcu_normal kernel boot parameter (which can also be manipulated via sysfs) to suppress expedited grace periods, that is, to treat requests for expedited grace periods as if they were requests for normal grace periods. If both rcu_expedited and rcu_normal are specified, rcu_normal wins. This means that if you are relying on expedited grace periods to speed up boot, you will want to specify rcu_expedited on the kernel command line, and then specify rcu_normal via sysfs once boot completes. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a0189ba67fde..98d9f30c02d4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -49,9 +49,14 @@ #include extern int rcu_expedited; /* for sysctl */ +extern int rcu_normal; /* also for sysctl */ #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ +{ + return true; +} static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ { return false; @@ -65,6 +70,7 @@ static inline void rcu_unexpedite_gp(void) { } #else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); -- cgit From 46a5d164db53ba6066b11889abb7fa6bddbe5cf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Oct 2015 09:10:48 -0700 Subject: rcu: Stop disabling interrupts in scheduler fastpaths We need the scheduler's fastpaths to be, well, fast, and unnecessarily disabling and re-enabling interrupts is not necessarily consistent with this goal. Especially given that there are regions of the scheduler that already have interrupts disabled. This commit therefore moves the call to rcu_note_context_switch() to one of the interrupts-disabled regions of the scheduler, and removes the now-redundant disabling and re-enabling of interrupts from rcu_note_context_switch() and the functions it calls. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney [ paulmck: Shift rcu_note_context_switch() to avoid deadlock, as suggested by Peter Zijlstra. ] --- include/linux/rcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 60d15a080d7c..9d3eda39bcd2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,7 @@ void rcu_cpu_stall_reset(void); /* * Note a virtualization-based context switch. This is simply a * wrapper around rcu_note_context_switch(), which allows TINY_RCU - * to save a few bytes. + * to save a few bytes. The caller must have disabled interrupts. */ static inline void rcu_virt_note_context_switch(int cpu) { -- cgit From 7d86dccf28a3ae2f790f399fc82d4c82521fd078 Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Mon, 12 Oct 2015 18:23:51 +0300 Subject: list: Introduces generic list_splice_tail_init_rcu() The list_splice_init_rcu() can be used as a stack onto which full lists are pushed, but queue-like behavior is now needed by some security policies. This requires a list_splice_tail_init_rcu(). This commit therefore supplies a list_splice_tail_init_rcu() by pulling code common it and to list_splice_init_rcu() into a new __list_splice_init_rcu() function. This new function is based on the existing list_splice_init_rcu() implementation. Signed-off-by: Petko Manolov Cc: Mimi Zohar Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 69 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 5ed540986019..e99d834545b6 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -179,32 +179,31 @@ static inline void list_replace_rcu(struct list_head *old, } /** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into + * @prev: points to the last element of the existing list + * @next: points to the first element of the existing list * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... * - * @head can be RCU-read traversed concurrently with this function. + * The list pointed to by @prev and @next can be RCU-read traversed + * concurrently with this function. * * Note that this function blocks. * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. + * Important note: the caller must take whatever action is necessary to prevent + * any other updates to the existing list. In principle, it is possible to + * modify the list as soon as sync() begins execution. If this sort of thing + * becomes necessary, an alternative version based on call_rcu() could be + * created. But only if -really- needed -- there is no shortage of RCU API + * members. */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) +static inline void __list_splice_init_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next, + void (*sync)(void)) { struct list_head *first = list->next; struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(list)) - return; /* * "first" and "last" tracking list, so initialize it. RCU readers @@ -231,10 +230,40 @@ static inline void list_splice_init_rcu(struct list_head *list, * this function. */ - last->next = at; - rcu_assign_pointer(list_next_rcu(head), first); - first->prev = head; - at->prev = last; + last->next = next; + rcu_assign_pointer(list_next_rcu(prev), first); + first->prev = prev; + next->prev = last; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list, + * designed for stacks. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head, head->next, sync); +} + +/** + * list_splice_tail_init_rcu - splice an RCU-protected list into an existing + * list, designed for queues. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_tail_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head->prev, head, sync); } /** -- cgit From 2f073848c3cc8aff2655ab7c46d8c0de90cf4e50 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 Oct 2015 16:56:42 -0700 Subject: list: Use WRITE_ONCE() when initializing list_head structures Code that does lockless emptiness testing of non-RCU lists is relying on INIT_LIST_HEAD() to write the list head's ->next pointer atomically, particularly when INIT_LIST_HEAD() is invoked from list_del_init(). This commit therefore adds WRITE_ONCE() to this function's pointer stores that could affect the head's ->next pointer. Reported-by: Andrey Konovalov Signed-off-by: Paul E. McKenney --- include/linux/list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 06c2d887a918..5356f4d661a7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -24,7 +24,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) { - list->next = list; + WRITE_ONCE(list->next, list); list->prev = list; } -- cgit From 79cfea0273876d9c438f3227b8f68c8c7ae31583 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Dec 2015 13:09:52 -0800 Subject: rcu: Remove TINY_RCU bloat from pointless boot parameters The rcu_expedited, rcu_normal, and rcu_normal_after_boot kernel boot parameters are pointless in the case of TINY_RCU because in that case synchronous grace periods, both expedited and normal, are no-ops. However, these three symbols contribute several hundred bytes of bloat. This commit therefore uses CPP directives to avoid compiling this code in TINY_RCU kernels. Reported-by: kbuild test robot Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 98d9f30c02d4..47e95b80bebd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -48,8 +48,10 @@ #include +#ifndef CONFIG_TINY_RCU extern int rcu_expedited; /* for sysctl */ extern int rcu_normal; /* also for sysctl */ +#endif /* #ifndef CONFIG_TINY_RCU */ #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ @@ -327,7 +329,6 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ void rcu_init(void); -void rcu_end_inkernel_boot(void); void rcu_sched_qs(void); void rcu_bh_qs(void); void rcu_check_callbacks(int user); @@ -335,6 +336,12 @@ struct notifier_block; int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); +#ifndef CONFIG_TINY_RCU +void rcu_end_inkernel_boot(void); +#else /* #ifndef CONFIG_TINY_RCU */ +static inline void rcu_end_inkernel_boot(void) { } +#endif /* #ifndef CONFIG_TINY_RCU */ + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); -- cgit From 7c9906ca5e582a773fff696975e312cef58a7386 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 31 Oct 2015 00:59:01 -0700 Subject: rcu: Don't redundantly disable irqs in rcu_irq_{enter,exit}() This commit replaces a local_irq_save()/local_irq_restore() pair with a lockdep assertion that interrupts are already disabled. This should remove the corresponding overhead from the interrupt entry/exit fastpaths. This change was inspired by the fact that Iftekhar Ahmed's mutation testing showed that removing rcu_irq_enter()'s call to local_ird_restore() had no effect, which might indicate that interrupts were always enabled anyway. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 ++-- include/linux/rcutiny.h | 8 ++++++++ include/linux/rcutree.h | 2 ++ include/linux/tracepoint.h | 4 ++-- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a0189ba67fde..f2b667df1131 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -379,9 +379,9 @@ static inline void rcu_init_nohz(void) */ #define RCU_NONIDLE(a) \ do { \ - rcu_irq_enter(); \ + rcu_irq_enter_irqson(); \ do { a; } while (0); \ - rcu_irq_exit(); \ + rcu_irq_exit_irqson(); \ } while (0) /* diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4c1aaf9cce7b..64809aea661c 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -181,6 +181,14 @@ static inline void rcu_irq_enter(void) { } +static inline void rcu_irq_exit_irqson(void) +{ +} + +static inline void rcu_irq_enter_irqson(void) +{ +} + static inline void rcu_irq_exit(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9d3eda39bcd2..ad1eda9fa4da 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -97,6 +97,8 @@ void rcu_idle_enter(void); void rcu_idle_exit(void); void rcu_irq_enter(void); void rcu_irq_exit(void); +void rcu_irq_enter_irqson(void); +void rcu_irq_exit_irqson(void); void exit_rcu(void); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 696a339c592c..7834a8a8bf1e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -171,8 +171,8 @@ extern void syscall_unregfunc(void); TP_PROTO(data_proto), \ TP_ARGS(data_args), \ TP_CONDITION(cond), \ - rcu_irq_enter(), \ - rcu_irq_exit()); \ + rcu_irq_enter_irqson(), \ + rcu_irq_exit_irqson()); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) -- cgit From f039f0af081746933d5dec3229637a18fab791ed Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 2 Nov 2015 13:21:47 +1100 Subject: rcu: Fix comment for rcu_dereference_raw_notrace rcu_dereference_raw() calls indirectly rcu_read_lock_held() while rcu_dereference_raw_notrace() does not so fix the comment about the latter. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index f2b667df1131..85aabcd8b564 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -741,7 +741,7 @@ static inline void rcu_preempt_sleep_check(void) * The tracing infrastructure traces RCU (we want that), but unfortunately * some of the RCU checks causes tracing to lock up the system. * - * The tracing version of rcu_dereference_raw() must not call + * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ #define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) -- cgit From 69b907297f4edf13182e3fa3adc0160df077746c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Sat, 5 Dec 2015 18:14:19 -0800 Subject: list: Add lockless list traversal primitives Although list_for_each_entry_rcu() can in theory be used anywhere preemption is disabled, it can result in calls to lockdep, which cannot be used in certain constrained execution environments, such as exception handlers that do not map the entire kernel into their address spaces. This commit therefore adds list_entry_lockless() and list_for_each_entry_lockless(), which never invoke lockdep and can therefore safely be used from these constrained environments, but only as long as those environments are non-preemptible (or items are never deleted from the list). Use synchronize_sched(), call_rcu_sched(), or synchronize_sched_expedited() in updates for the needed grace periods. Of course, if items are never deleted from the list, there is no need to wait for grace periods. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 5ed540986019..1fad79861e14 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -304,6 +304,42 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) +/** + * list_entry_lockless - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_entry_lockless(ptr, type, member) \ + container_of((typeof(ptr))lockless_dereference(ptr), type, member) + +/** + * list_for_each_entry_lockless - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_for_each_entry_lockless(pos, head, member) \ + for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_lockless(pos->member.next, typeof(*pos), member)) + /** * list_for_each_entry_continue_rcu - continue iteration over list of given type * @pos: the type * to use as a loop cursor. -- cgit