From 6a4e24518c8a10f78f44da219835239cb5aca90d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Jul 2016 17:16:03 +0000 Subject: cpu/hotplug: Handle early registration gracefully We switched the hotplug machinery to smpboot threads. Early registration of hotplug callbacks, i.e. from do_pre_smp_initcalls(), happens before the threads are initialized. Instead of moving the thread init, we simply handle it in the hotplug code itself and invoke the function directly. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153332.896450738@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 7b61887f7ccd..fe71ce4e60f1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -517,6 +517,13 @@ static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, if (!cpu_online(cpu)) return 0; + /* + * If we are up and running, use the hotplug thread. For early calls + * we invoke the thread function directly. + */ + if (!st->thread) + return cpuhp_invoke_callback(cpu, state, cb); + st->cb_state = state; st->cb = cb; /* -- cgit From 00e16c3d68fce504e880f59c9bdf23b2a4759d6d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Jul 2016 17:16:09 +0000 Subject: perf/core: Convert to hotplug state machine Actually a nice symmetric startup/teardown pair which fits properly into the state machine concept. In the long run we should be able to invoke the startup callback for the boot CPU via the state machine and get rid of the init function which invokes it on the boot CPU. Note: This comes actually before the perf hardware callbacks. In the notifier model the hardware callbacks have a higher priority than the core callback. But that's solely for CPU offline so that hardware migration of events happens before the core is notified about the outgoing CPU. With the symetric state array model we have the following ordering: UP: core -> hardware DOWN: hardware -> core Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Sebastian Siewior Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153333.587514098@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 11 +++++++++++ kernel/events/core.c | 56 ++++++++++------------------------------------------ 2 files changed, 21 insertions(+), 46 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index fe71ce4e60f1..3705d9043c08 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1180,6 +1180,11 @@ static struct cpuhp_step cpuhp_bp_states[] = { .teardown = NULL, .cant_stop = true, }, + [CPUHP_PERF_PREPARE] = { + .name = "perf prepare", + .startup = perf_event_init_cpu, + .teardown = perf_event_exit_cpu, + }, /* * Preparatory and dead notifiers. Will be replaced once the notifiers * are converted to states. @@ -1257,6 +1262,12 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup = smpboot_unpark_threads, .teardown = NULL, }, + [CPUHP_AP_PERF_ONLINE] = { + .name = "perf online", + .startup = perf_event_init_cpu, + .teardown = perf_event_exit_cpu, + }, + /* * Online/down_prepare notifiers. Will be removed once the notifiers * are converted to states. diff --git a/kernel/events/core.c b/kernel/events/core.c index 43d43a2d5811..f3ef1c29a7c9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10255,7 +10255,7 @@ static void __init perf_event_init_all_cpus(void) } } -static void perf_event_init_cpu(int cpu) +int perf_event_init_cpu(unsigned int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); @@ -10268,6 +10268,7 @@ static void perf_event_init_cpu(int cpu) rcu_assign_pointer(swhash->swevent_hlist, hlist); } mutex_unlock(&swhash->hlist_mutex); + return 0; } #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE @@ -10299,14 +10300,17 @@ static void perf_event_exit_cpu_context(int cpu) } srcu_read_unlock(&pmus_srcu, idx); } +#else + +static void perf_event_exit_cpu_context(int cpu) { } + +#endif -static void perf_event_exit_cpu(int cpu) +int perf_event_exit_cpu(unsigned int cpu) { perf_event_exit_cpu_context(cpu); + return 0; } -#else -static inline void perf_event_exit_cpu(int cpu) { } -#endif static int perf_reboot(struct notifier_block *notifier, unsigned long val, void *v) @@ -10328,46 +10332,6 @@ static struct notifier_block perf_reboot_notifier = { .priority = INT_MIN, }; -static int -perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action & ~CPU_TASKS_FROZEN) { - - case CPU_UP_PREPARE: - /* - * This must be done before the CPU comes alive, because the - * moment we can run tasks we can encounter (software) events. - * - * Specifically, someone can have inherited events on kthreadd - * or a pre-existing worker thread that gets re-bound. - */ - perf_event_init_cpu(cpu); - break; - - case CPU_DOWN_PREPARE: - /* - * This must be done before the CPU dies because after that an - * active event might want to IPI the CPU and that'll not work - * so great for dead CPUs. - * - * XXX smp_call_function_single() return -ENXIO without a warn - * so we could possibly deal with this. - * - * This is safe against new events arriving because - * sys_perf_event_open() serializes against hotplug using - * get_online_cpus(). - */ - perf_event_exit_cpu(cpu); - break; - default: - break; - } - - return NOTIFY_OK; -} - void __init perf_event_init(void) { int ret; @@ -10380,7 +10344,7 @@ void __init perf_event_init(void) perf_pmu_register(&perf_cpu_clock, NULL, -1); perf_pmu_register(&perf_task_clock, NULL, -1); perf_tp_register(); - perf_cpu_notifier(perf_cpu_notify); + perf_event_init_cpu(smp_processor_id()); register_reboot_notifier(&perf_reboot_notifier); ret = init_hw_breakpoint(); -- cgit From 7ee681b25284782ecf380bf5ccf55f13c52fd0ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Jul 2016 17:16:29 +0000 Subject: workqueue: Convert to state machine callbacks Get rid of the prio ordering of the separate notifiers and use a proper state callback pair. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Cc: Andrew Morton Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Nicolas Iooss Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Rusty Russell Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153335.197083890@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 10 +++++ kernel/workqueue.c | 108 +++++++++++++++++++++-------------------------------- 2 files changed, 53 insertions(+), 65 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 3705d9043c08..af53f820fec9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1185,6 +1185,11 @@ static struct cpuhp_step cpuhp_bp_states[] = { .startup = perf_event_init_cpu, .teardown = perf_event_exit_cpu, }, + [CPUHP_WORKQUEUE_PREP] = { + .name = "workqueue prepare", + .startup = workqueue_prepare_cpu, + .teardown = NULL, + }, /* * Preparatory and dead notifiers. Will be replaced once the notifiers * are converted to states. @@ -1267,6 +1272,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup = perf_event_init_cpu, .teardown = perf_event_exit_cpu, }, + [CPUHP_AP_WORKQUEUE_ONLINE] = { + .name = "workqueue online", + .startup = workqueue_online_cpu, + .teardown = workqueue_offline_cpu, + }, /* * Online/down_prepare notifiers. Will be removed once the notifiers diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e1c0e996b5ae..c9dd5fbdbf33 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4611,84 +4611,65 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) pool->attrs->cpumask) < 0); } -/* - * Workqueues should be brought up before normal priority CPU notifiers. - * This will be registered high priority CPU notifier. - */ -static int workqueue_cpu_up_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +int workqueue_prepare_cpu(unsigned int cpu) +{ + struct worker_pool *pool; + + for_each_cpu_worker_pool(pool, cpu) { + if (pool->nr_workers) + continue; + if (!create_worker(pool)) + return -ENOMEM; + } + return 0; +} + +int workqueue_online_cpu(unsigned int cpu) { - int cpu = (unsigned long)hcpu; struct worker_pool *pool; struct workqueue_struct *wq; int pi; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - for_each_cpu_worker_pool(pool, cpu) { - if (pool->nr_workers) - continue; - if (!create_worker(pool)) - return NOTIFY_BAD; - } - break; - - case CPU_DOWN_FAILED: - case CPU_ONLINE: - mutex_lock(&wq_pool_mutex); + mutex_lock(&wq_pool_mutex); - for_each_pool(pool, pi) { - mutex_lock(&pool->attach_mutex); + for_each_pool(pool, pi) { + mutex_lock(&pool->attach_mutex); - if (pool->cpu == cpu) - rebind_workers(pool); - else if (pool->cpu < 0) - restore_unbound_workers_cpumask(pool, cpu); + if (pool->cpu == cpu) + rebind_workers(pool); + else if (pool->cpu < 0) + restore_unbound_workers_cpumask(pool, cpu); - mutex_unlock(&pool->attach_mutex); - } + mutex_unlock(&pool->attach_mutex); + } - /* update NUMA affinity of unbound workqueues */ - list_for_each_entry(wq, &workqueues, list) - wq_update_unbound_numa(wq, cpu, true); + /* update NUMA affinity of unbound workqueues */ + list_for_each_entry(wq, &workqueues, list) + wq_update_unbound_numa(wq, cpu, true); - mutex_unlock(&wq_pool_mutex); - break; - } - return NOTIFY_OK; + mutex_unlock(&wq_pool_mutex); + return 0; } -/* - * Workqueues should be brought down after normal priority CPU notifiers. - * This will be registered as low priority CPU notifier. - */ -static int workqueue_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +int workqueue_offline_cpu(unsigned int cpu) { - int cpu = (unsigned long)hcpu; struct work_struct unbind_work; struct workqueue_struct *wq; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - /* unbinding per-cpu workers should happen on the local CPU */ - INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); - queue_work_on(cpu, system_highpri_wq, &unbind_work); - - /* update NUMA affinity of unbound workqueues */ - mutex_lock(&wq_pool_mutex); - list_for_each_entry(wq, &workqueues, list) - wq_update_unbound_numa(wq, cpu, false); - mutex_unlock(&wq_pool_mutex); - - /* wait for per-cpu unbinding to finish */ - flush_work(&unbind_work); - destroy_work_on_stack(&unbind_work); - break; - } - return NOTIFY_OK; + /* unbinding per-cpu workers should happen on the local CPU */ + INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); + queue_work_on(cpu, system_highpri_wq, &unbind_work); + + /* update NUMA affinity of unbound workqueues */ + mutex_lock(&wq_pool_mutex); + list_for_each_entry(wq, &workqueues, list) + wq_update_unbound_numa(wq, cpu, false); + mutex_unlock(&wq_pool_mutex); + + /* wait for per-cpu unbinding to finish */ + flush_work(&unbind_work); + destroy_work_on_stack(&unbind_work); + return 0; } #ifdef CONFIG_SMP @@ -5490,9 +5471,6 @@ static int __init init_workqueues(void) pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); - cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); - hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); - wq_numa_init(); /* initialize CPU pools */ -- cgit From 27590dc17b34aedc4f3e14bd107ee59b9db9b0a6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Jul 2016 10:41:04 +0200 Subject: hrtimer: Convert to hotplug state machine Split out the clockevents callbacks instead of piggybacking them on hrtimers. This gets rid of a POST_DEAD user. See commit: 54e88fad223c ("sched: Make sure timers have migrated before killing the migration_thread") We just move the callback state to the proper place in the state machine. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Rusty Russell Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153337.485419196@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 5 +++++ kernel/time/hrtimer.c | 40 +++++----------------------------------- 2 files changed, 10 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index af53f820fec9..85500e7fe238 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1190,6 +1190,11 @@ static struct cpuhp_step cpuhp_bp_states[] = { .startup = workqueue_prepare_cpu, .teardown = NULL, }, + [CPUHP_HRTIMERS_PREPARE] = { + .name = "hrtimers prepare", + .startup = hrtimers_prepare_cpu, + .teardown = hrtimers_dead_cpu, + }, /* * Preparatory and dead notifiers. Will be replaced once the notifiers * are converted to states. diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d13c9aebf7a3..9ba7c820fc23 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1590,7 +1590,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, /* * Functions related to boot-time initialization: */ -static void init_hrtimers_cpu(int cpu) +int hrtimers_prepare_cpu(unsigned int cpu) { struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; @@ -1602,6 +1602,7 @@ static void init_hrtimers_cpu(int cpu) cpu_base->cpu = cpu; hrtimer_init_hres(cpu_base); + return 0; } #ifdef CONFIG_HOTPLUG_CPU @@ -1636,7 +1637,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, } } -static void migrate_hrtimers(int scpu) +int hrtimers_dead_cpu(unsigned int scpu) { struct hrtimer_cpu_base *old_base, *new_base; int i; @@ -1665,45 +1666,14 @@ static void migrate_hrtimers(int scpu) /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); local_irq_enable(); + return 0; } #endif /* CONFIG_HOTPLUG_CPU */ -static int hrtimer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int scpu = (long)hcpu; - - switch (action) { - - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - init_hrtimers_cpu(scpu); - break; - -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - migrate_hrtimers(scpu); - break; -#endif - - default: - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block hrtimers_nb = { - .notifier_call = hrtimer_cpu_notify, -}; - void __init hrtimers_init(void) { - hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&hrtimers_nb); + hrtimers_prepare_cpu(smp_processor_id()); } /** -- cgit From 24f73b99716a9cd8cbb345c41ced6b3b5ed94006 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 13 Jul 2016 17:16:59 +0000 Subject: timers/core: Convert to hotplug state machine When tearing down, call timers_dead_cpu() before notify_dead(). There is a hidden dependency between: - timers - block multiqueue - rcutree If timers_dead_cpu() comes later than blk_mq_queue_reinit_notify() that latter function causes a RCU stall. Signed-off-by: Richard Cochran Signed-off-by: Anna-Maria Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: John Stultz Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153337.566790058@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 5 +++++ kernel/time/timer.c | 25 ++----------------------- 2 files changed, 7 insertions(+), 23 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 85500e7fe238..e1017d92d308 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1195,6 +1195,11 @@ static struct cpuhp_step cpuhp_bp_states[] = { .startup = hrtimers_prepare_cpu, .teardown = hrtimers_dead_cpu, }, + [CPUHP_TIMERS_DEAD] = { + .name = "timers dead", + .startup = NULL, + .teardown = timers_dead_cpu, + }, /* * Preparatory and dead notifiers. Will be replaced once the notifiers * are converted to states. diff --git a/kernel/time/timer.c b/kernel/time/timer.c index cb9ab401e2d9..555670a5143c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1804,7 +1804,7 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } -static void migrate_timers(int cpu) +int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; struct timer_base *new_base; @@ -1831,29 +1831,9 @@ static void migrate_timers(int cpu) spin_unlock_irq(&new_base->lock); put_cpu_ptr(&timer_bases); } + return 0; } -static int timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_DEAD: - case CPU_DEAD_FROZEN: - migrate_timers((long)hcpu); - break; - default: - break; - } - - return NOTIFY_OK; -} - -static inline void timer_register_cpu_notifier(void) -{ - cpu_notifier(timer_cpu_notify, 0); -} -#else -static inline void timer_register_cpu_notifier(void) { } #endif /* CONFIG_HOTPLUG_CPU */ static void __init init_timer_cpu(int cpu) @@ -1881,7 +1861,6 @@ void __init init_timers(void) { init_timer_cpus(); init_timer_stats(); - timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } -- cgit From e722d8daafb974b9ad1bbaf42f384a5ea5929f5f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 13 Jul 2016 17:16:59 +0000 Subject: profile: Convert to hotplug state machine Install the callbacks via the state machine and let the core invoke the callbacks on the already online CPUs. A lot of code is removed because the for-loop is used and create_hash_tables() is removed since its purpose is covered by the startup / teardown hooks. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Anna-Maria Gleixner Cc: Andrew Morton Cc: Arnd Bergmann Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153337.649867675@linutronix.de Signed-off-by: Ingo Molnar --- kernel/profile.c | 181 ++++++++++++++++++++----------------------------------- 1 file changed, 65 insertions(+), 116 deletions(-) (limited to 'kernel') diff --git a/kernel/profile.c b/kernel/profile.c index c2199e9901c9..2dbccf2d806c 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -328,68 +328,57 @@ out: put_cpu(); } -static int profile_cpu_callback(struct notifier_block *info, - unsigned long action, void *__cpu) +static int profile_dead_cpu(unsigned int cpu) { - int node, cpu = (unsigned long)__cpu; struct page *page; + int i; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - node = cpu_to_mem(cpu); - per_cpu(cpu_profile_flip, cpu) = 0; - if (!per_cpu(cpu_profile_hits, cpu)[1]) { - page = __alloc_pages_node(node, - GFP_KERNEL | __GFP_ZERO, - 0); - if (!page) - return notifier_from_errno(-ENOMEM); - per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); - } - if (!per_cpu(cpu_profile_hits, cpu)[0]) { - page = __alloc_pages_node(node, - GFP_KERNEL | __GFP_ZERO, - 0); - if (!page) - goto out_free; - per_cpu(cpu_profile_hits, cpu)[0] = page_address(page); - } - break; -out_free: - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); - per_cpu(cpu_profile_hits, cpu)[1] = NULL; - __free_page(page); - return notifier_from_errno(-ENOMEM); - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - if (prof_cpu_mask != NULL) - cpumask_set_cpu(cpu, prof_cpu_mask); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - if (prof_cpu_mask != NULL) - cpumask_clear_cpu(cpu, prof_cpu_mask); - if (per_cpu(cpu_profile_hits, cpu)[0]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); - per_cpu(cpu_profile_hits, cpu)[0] = NULL; + if (prof_cpu_mask != NULL) + cpumask_clear_cpu(cpu, prof_cpu_mask); + + for (i = 0; i < 2; i++) { + if (per_cpu(cpu_profile_hits, cpu)[i]) { + page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]); + per_cpu(cpu_profile_hits, cpu)[i] = NULL; __free_page(page); } - if (per_cpu(cpu_profile_hits, cpu)[1]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); - per_cpu(cpu_profile_hits, cpu)[1] = NULL; - __free_page(page); + } + return 0; +} + +static int profile_prepare_cpu(unsigned int cpu) +{ + int i, node = cpu_to_mem(cpu); + struct page *page; + + per_cpu(cpu_profile_flip, cpu) = 0; + + for (i = 0; i < 2; i++) { + if (per_cpu(cpu_profile_hits, cpu)[i]) + continue; + + page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!page) { + profile_dead_cpu(cpu); + return -ENOMEM; } - break; + per_cpu(cpu_profile_hits, cpu)[i] = page_address(page); + } - return NOTIFY_OK; + return 0; +} + +static int profile_online_cpu(unsigned int cpu) +{ + if (prof_cpu_mask != NULL) + cpumask_set_cpu(cpu, prof_cpu_mask); + + return 0; } + #else /* !CONFIG_SMP */ #define profile_flip_buffers() do { } while (0) #define profile_discard_flip_buffers() do { } while (0) -#define profile_cpu_callback NULL static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { @@ -531,83 +520,43 @@ static const struct file_operations proc_profile_operations = { .llseek = default_llseek, }; -#ifdef CONFIG_SMP -static void profile_nop(void *unused) -{ -} - -static int create_hash_tables(void) +int __ref create_proc_profile(void) { - int cpu; - - for_each_online_cpu(cpu) { - int node = cpu_to_mem(cpu); - struct page *page; - - page = __alloc_pages_node(node, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - 0); - if (!page) - goto out_cleanup; - per_cpu(cpu_profile_hits, cpu)[1] - = (struct profile_hit *)page_address(page); - page = __alloc_pages_node(node, - GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, - 0); - if (!page) - goto out_cleanup; - per_cpu(cpu_profile_hits, cpu)[0] - = (struct profile_hit *)page_address(page); - } - return 0; -out_cleanup: - prof_on = 0; - smp_mb(); - on_each_cpu(profile_nop, NULL, 1); - for_each_online_cpu(cpu) { - struct page *page; - - if (per_cpu(cpu_profile_hits, cpu)[0]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); - per_cpu(cpu_profile_hits, cpu)[0] = NULL; - __free_page(page); - } - if (per_cpu(cpu_profile_hits, cpu)[1]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[1]); - per_cpu(cpu_profile_hits, cpu)[1] = NULL; - __free_page(page); - } - } - return -1; -} -#else -#define create_hash_tables() ({ 0; }) + struct proc_dir_entry *entry; +#ifdef CONFIG_SMP + enum cpuhp_state online_state; #endif -int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */ -{ - struct proc_dir_entry *entry; int err = 0; if (!prof_on) return 0; - - cpu_notifier_register_begin(); - - if (create_hash_tables()) { - err = -ENOMEM; - goto out; - } - +#ifdef CONFIG_SMP + err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE", + profile_prepare_cpu, profile_dead_cpu); + if (err) + return err; + + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE", + profile_online_cpu, NULL); + if (err < 0) + goto err_state_prep; + online_state = err; + err = 0; +#endif entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &proc_profile_operations); if (!entry) - goto out; + goto err_state_onl; proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); - __hotcpu_notifier(profile_cpu_callback, 0); -out: - cpu_notifier_register_done(); + return err; +err_state_onl: +#ifdef CONFIG_SMP + cpuhp_remove_state(online_state); +err_state_prep: + cpuhp_remove_state(CPUHP_PROFILE_PREPARE); +#endif return err; } subsys_initcall(create_proc_profile); -- cgit From 31487f8328f20fdb302430b020a5d6e8446c1971 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 13 Jul 2016 17:17:01 +0000 Subject: smp/cfd: Convert core to hotplug state machine Install the callbacks via the state machine. They are installed at runtime so smpcfd_prepare_cpu() needs to be invoked by the boot-CPU. Signed-off-by: Richard Weinberger [ Added the dropped CPU dying case back in. ] Signed-off-by: Richard Cochran Signed-off-by: Anna-Maria Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Mel Gorman Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153337.818376366@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 9 +++++++ kernel/smp.c | 79 ++++++++++++++++++++++++------------------------------------ 2 files changed, 41 insertions(+), 47 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index e1017d92d308..008e2fd40cb1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1195,6 +1195,11 @@ static struct cpuhp_step cpuhp_bp_states[] = { .startup = hrtimers_prepare_cpu, .teardown = hrtimers_dead_cpu, }, + [CPUHP_SMPCFD_PREPARE] = { + .name = "SMPCFD prepare", + .startup = smpcfd_prepare_cpu, + .teardown = smpcfd_dead_cpu, + }, [CPUHP_TIMERS_DEAD] = { .name = "timers dead", .startup = NULL, @@ -1218,6 +1223,10 @@ static struct cpuhp_step cpuhp_bp_states[] = { .teardown = NULL, .cant_stop = true, }, + [CPUHP_AP_SMPCFD_DYING] = { + .startup = NULL, + .teardown = smpcfd_dying_cpu, + }, /* * Handled on controll processor until the plugged processor manages * this itself. diff --git a/kernel/smp.c b/kernel/smp.c index 74165443c240..7180491c9678 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -33,69 +33,54 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); static void flush_smp_call_function_queue(bool warn_cpu_offline); -static int -hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) +int smpcfd_prepare_cpu(unsigned int cpu) { - long cpu = (long)hcpu; struct call_function_data *cfd = &per_cpu(cfd_data, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, - cpu_to_node(cpu))) - return notifier_from_errno(-ENOMEM); - cfd->csd = alloc_percpu(struct call_single_data); - if (!cfd->csd) { - free_cpumask_var(cfd->cpumask); - return notifier_from_errno(-ENOMEM); - } - break; - -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - /* Fall-through to the CPU_DEAD[_FROZEN] case. */ - - case CPU_DEAD: - case CPU_DEAD_FROZEN: + if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, + cpu_to_node(cpu))) + return -ENOMEM; + cfd->csd = alloc_percpu(struct call_single_data); + if (!cfd->csd) { free_cpumask_var(cfd->cpumask); - free_percpu(cfd->csd); - break; + return -ENOMEM; + } - case CPU_DYING: - case CPU_DYING_FROZEN: - /* - * The IPIs for the smp-call-function callbacks queued by other - * CPUs might arrive late, either due to hardware latencies or - * because this CPU disabled interrupts (inside stop-machine) - * before the IPIs were sent. So flush out any pending callbacks - * explicitly (without waiting for the IPIs to arrive), to - * ensure that the outgoing CPU doesn't go offline with work - * still pending. - */ - flush_smp_call_function_queue(false); - break; -#endif - }; + return 0; +} + +int smpcfd_dead_cpu(unsigned int cpu) +{ + struct call_function_data *cfd = &per_cpu(cfd_data, cpu); - return NOTIFY_OK; + free_cpumask_var(cfd->cpumask); + free_percpu(cfd->csd); + return 0; } -static struct notifier_block hotplug_cfd_notifier = { - .notifier_call = hotplug_cfd, -}; +int smpcfd_dying_cpu(unsigned int cpu) +{ + /* + * The IPIs for the smp-call-function callbacks queued by other + * CPUs might arrive late, either due to hardware latencies or + * because this CPU disabled interrupts (inside stop-machine) + * before the IPIs were sent. So flush out any pending callbacks + * explicitly (without waiting for the IPIs to arrive), to + * ensure that the outgoing CPU doesn't go offline with work + * still pending. + */ + flush_smp_call_function_queue(false); + return 0; +} void __init call_function_init(void) { - void *cpu = (void *)(long)smp_processor_id(); int i; for_each_possible_cpu(i) init_llist_head(&per_cpu(call_single_queue, i)); - hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); - register_cpu_notifier(&hotplug_cfd_notifier); + smpcfd_prepare_cpu(smp_processor_id()); } /* -- cgit From 4df8374254ea9294dfe4b8c447a1b7eddc543dbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Jul 2016 17:17:03 +0000 Subject: rcu: Convert rcutree to hotplug state machine Straight forward conversion to the state machine. Though the question arises whether this needs really all these state transitions to work. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Sebastian Andrzej Siewior Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160713153337.982013161@linutronix.de Signed-off-by: Ingo Molnar --- kernel/cpu.c | 14 ++++++++ kernel/rcu/tree.c | 105 +++++++++++++++++++++++++++--------------------------- 2 files changed, 66 insertions(+), 53 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 008e2fd40cb1..f24f45915b54 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1205,6 +1205,11 @@ static struct cpuhp_step cpuhp_bp_states[] = { .startup = NULL, .teardown = timers_dead_cpu, }, + [CPUHP_RCUTREE_PREP] = { + .name = "RCU-tree prepare", + .startup = rcutree_prepare_cpu, + .teardown = rcutree_dead_cpu, + }, /* * Preparatory and dead notifiers. Will be replaced once the notifiers * are converted to states. @@ -1263,6 +1268,10 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup = sched_cpu_starting, .teardown = sched_cpu_dying, }, + [CPUHP_AP_RCUTREE_DYING] = { + .startup = NULL, + .teardown = rcutree_dying_cpu, + }, /* * Low level startup/teardown notifiers. Run with interrupts * disabled. Will be removed once the notifiers are converted to @@ -1296,6 +1305,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { .startup = workqueue_online_cpu, .teardown = workqueue_offline_cpu, }, + [CPUHP_AP_RCUTREE_ONLINE] = { + .name = "RCU-tree online", + .startup = rcutree_online_cpu, + .teardown = rcutree_offline_cpu, + }, /* * Online/down_prepare notifiers. Will be removed once the notifiers diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f433959e9322..5d80925e7fc8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1073,11 +1073,11 @@ EXPORT_SYMBOL_GPL(rcu_is_watching); * offline to continue to use RCU for one jiffy after marking itself * offline in the cpu_online_mask. This leniency is necessary given the * non-atomic nature of the online and offline processing, for example, - * the fact that a CPU enters the scheduler after completing the CPU_DYING - * notifiers. + * the fact that a CPU enters the scheduler after completing the teardown + * of the CPU. * - * This is also why RCU internally marks CPUs online during the - * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. + * This is also why RCU internally marks CPUs online during in the + * preparation phase and offline after the CPU has been taken down. * * Disable checking if in an NMI handler because we cannot safely report * errors from NMI handlers anyway. @@ -3806,12 +3806,58 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } -static void rcu_prepare_cpu(int cpu) +int rcutree_prepare_cpu(unsigned int cpu) { struct rcu_state *rsp; for_each_rcu_flavor(rsp) rcu_init_percpu_data(cpu, rsp); + + rcu_prepare_kthreads(cpu); + rcu_spawn_all_nocb_kthreads(cpu); + + return 0; +} + +static void rcutree_affinity_setting(unsigned int cpu, int outgoing) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); + + rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); +} + +int rcutree_online_cpu(unsigned int cpu) +{ + sync_sched_exp_online_cleanup(cpu); + rcutree_affinity_setting(cpu, -1); + return 0; +} + +int rcutree_offline_cpu(unsigned int cpu) +{ + rcutree_affinity_setting(cpu, cpu); + return 0; +} + + +int rcutree_dying_cpu(unsigned int cpu) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_cleanup_dying_cpu(rsp); + return 0; +} + +int rcutree_dead_cpu(unsigned int cpu) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) { + rcu_cleanup_dead_cpu(cpu, rsp); + do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu)); + } + return 0; } #ifdef CONFIG_HOTPLUG_CPU @@ -3851,52 +3897,6 @@ void rcu_report_dead(unsigned int cpu) } #endif -/* - * Handle CPU online/offline notification events. - */ -int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); - struct rcu_node *rnp = rdp->mynode; - struct rcu_state *rsp; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - rcu_prepare_cpu(cpu); - rcu_prepare_kthreads(cpu); - rcu_spawn_all_nocb_kthreads(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - sync_sched_exp_online_cleanup(cpu); - rcu_boost_kthread_setaffinity(rnp, -1); - break; - case CPU_DOWN_PREPARE: - rcu_boost_kthread_setaffinity(rnp, cpu); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - for_each_rcu_flavor(rsp) - rcu_cleanup_dying_cpu(rsp); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - for_each_rcu_flavor(rsp) { - rcu_cleanup_dead_cpu(cpu, rsp); - do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu)); - } - break; - default: - break; - } - return NOTIFY_OK; -} - static int rcu_pm_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -4208,10 +4208,9 @@ void __init rcu_init(void) * this is called early in boot, before either interrupts * or the scheduler are operational. */ - cpu_notifier(rcu_cpu_notify, 0); pm_notifier(rcu_pm_notify, 0); for_each_online_cpu(cpu) - rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); + rcutree_prepare_cpu(cpu); } #include "tree_exp.h" -- cgit From 4fae16dffb812f0e0d98a0b2b0856ca48ca63e6c Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 27 Jul 2016 11:08:18 +0200 Subject: timers/core: Correct callback order during CPU hot plug On the tear-down path, the dead CPU callback for the timers was misplaced within the 'cpuhp_state' enumeration. There is a hidden dependency between the timers and block multiqueue. The timers callback must happen before the block multiqueue callback otherwise a RCU stall occurs. Move the timers callback to the proper place in the state machine. Reported-and-tested-by: Jon Hunter Reported-by: kbuild test robot Fixes: 24f73b99716a ("timers/core: Convert to hotplug state machine") Signed-off-by: Richard Cochran Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Rasmus Villemoes Cc: John Stultz Cc: rt@linutronix.de Cc: Oleg Nesterov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1469610498-25914-1-git-send-email-rcochran@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/cpu.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index f24f45915b54..341bf80f80bd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1200,11 +1200,6 @@ static struct cpuhp_step cpuhp_bp_states[] = { .startup = smpcfd_prepare_cpu, .teardown = smpcfd_dead_cpu, }, - [CPUHP_TIMERS_DEAD] = { - .name = "timers dead", - .startup = NULL, - .teardown = timers_dead_cpu, - }, [CPUHP_RCUTREE_PREP] = { .name = "RCU-tree prepare", .startup = rcutree_prepare_cpu, @@ -1221,6 +1216,16 @@ static struct cpuhp_step cpuhp_bp_states[] = { .skip_onerr = true, .cant_stop = true, }, + /* + * On the tear-down path, timers_dead_cpu() must be invoked + * before blk_mq_queue_reinit_notify() from notify_dead(), + * otherwise a RCU stall occurs. + */ + [CPUHP_TIMERS_DEAD] = { + .name = "timers dead", + .startup = NULL, + .teardown = timers_dead_cpu, + }, /* Kicks the plugged cpu into life */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", -- cgit