summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c23
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/kcov.c9
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/sched/core.c28
-rw-r--r--kernel/sched/fair.c3
-rw-r--r--kernel/sched/wait.c10
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/time/timer.c74
9 files changed, 95 insertions, 62 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6e47e97b33f..0e292132efac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1960,6 +1960,12 @@ void perf_event_disable(struct perf_event *event)
}
EXPORT_SYMBOL_GPL(perf_event_disable);
+void perf_event_disable_inatomic(struct perf_event *event)
+{
+ event->pending_disable = 1;
+ irq_work_queue(&event->pending);
+}
+
static void perf_set_shadow_time(struct perf_event *event,
struct perf_event_context *ctx,
u64 tstamp)
@@ -7075,8 +7081,8 @@ static int __perf_event_overflow(struct perf_event *event,
if (events && atomic_dec_and_test(&event->event_limit)) {
ret = 1;
event->pending_kill = POLL_HUP;
- event->pending_disable = 1;
- irq_work_queue(&event->pending);
+
+ perf_event_disable_inatomic(event);
}
READ_ONCE(event->overflow_handler)(event, data, regs);
@@ -8855,7 +8861,10 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
void perf_pmu_unregister(struct pmu *pmu)
{
+ int remove_device;
+
mutex_lock(&pmus_lock);
+ remove_device = pmu_bus_running;
list_del_rcu(&pmu->entry);
mutex_unlock(&pmus_lock);
@@ -8869,10 +8878,12 @@ void perf_pmu_unregister(struct pmu *pmu)
free_percpu(pmu->pmu_disable_count);
if (pmu->type >= PERF_TYPE_MAX)
idr_remove(&pmu_idr, pmu->type);
- if (pmu->nr_addr_filters)
- device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
- device_del(pmu->dev);
- put_device(pmu->dev);
+ if (remove_device) {
+ if (pmu->nr_addr_filters)
+ device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
+ device_del(pmu->dev);
+ put_device(pmu->dev);
+ }
free_pmu_context(pmu);
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
diff --git a/kernel/fork.c b/kernel/fork.c
index 623259fc794d..997ac1d584f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -315,6 +315,9 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
static void release_task_stack(struct task_struct *tsk)
{
+ if (WARN_ON(tsk->state != TASK_DEAD))
+ return; /* Better to leak the stack than to free prematurely */
+
account_kernel_stack(tsk, -1);
arch_release_thread_stack(tsk->stack);
free_thread_stack(tsk);
@@ -1862,6 +1865,7 @@ bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
exit_creds(p);
bad_fork_free:
+ p->state = TASK_DEAD;
put_task_stack(p);
free_task(p);
fork_out:
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 8d44b3fea9d0..30e6d05aa5a9 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -53,8 +53,15 @@ void notrace __sanitizer_cov_trace_pc(void)
/*
* We are interested in code coverage as a function of a syscall inputs,
* so we ignore code executed in interrupts.
+ * The checks for whether we are in an interrupt are open-coded, because
+ * 1. We can't use in_interrupt() here, since it also returns true
+ * when we are inside local_bh_disable() section.
+ * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()),
+ * since that leads to slower generated code (three separate tests,
+ * one for each of the flags).
*/
- if (!t || in_interrupt())
+ if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET
+ | NMI_MASK)))
return;
mode = READ_ONCE(t->kcov_mode);
if (mode == KCOV_MODE_TRACE) {
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 1e7f5da648d9..6ccb08f57fcb 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -498,9 +498,9 @@ static int enter_state(suspend_state_t state)
#ifndef CONFIG_SUSPEND_SKIP_SYNC
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
- printk(KERN_INFO "PM: Syncing filesystems ... ");
+ pr_info("PM: Syncing filesystems ... ");
sys_sync();
- printk("done.\n");
+ pr_cont("done.\n");
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8912aafd09e1..8b08fb257856 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5192,21 +5192,14 @@ void sched_show_task(struct task_struct *p)
int ppid;
unsigned long state = p->state;
+ if (!try_get_task_stack(p))
+ return;
if (state)
state = __ffs(state) + 1;
printk(KERN_INFO "%-15.15s %c", p->comm,
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
-#if BITS_PER_LONG == 32
- if (state == TASK_RUNNING)
- printk(KERN_CONT " running ");
- else
- printk(KERN_CONT " %08lx ", thread_saved_pc(p));
-#else
if (state == TASK_RUNNING)
printk(KERN_CONT " running task ");
- else
- printk(KERN_CONT " %016lx ", thread_saved_pc(p));
-#endif
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
@@ -5221,6 +5214,7 @@ void sched_show_task(struct task_struct *p)
print_worker_info(KERN_INFO, p);
show_stack(p, NULL);
+ put_task_stack(p);
}
void show_state_filter(unsigned long state_filter)
@@ -7515,11 +7509,27 @@ static struct kmem_cache *task_group_cache __read_mostly;
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+ const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+ unsigned long val = (unsigned long)word << shift | bit;
+
+ return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
+
void __init sched_init(void)
{
int i, j;
unsigned long alloc_size = 0, ptr;
+ for (i = 0; i < WAIT_TABLE_SIZE; i++)
+ init_waitqueue_head(bit_wait_table + i);
+
#ifdef CONFIG_FAIR_GROUP_SCHED
alloc_size += 2 * nr_cpu_ids * sizeof(void **);
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d941c97dfbc3..c242944f5cbd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8839,7 +8839,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
struct sched_entity *se;
struct cfs_rq *cfs_rq;
- struct rq *rq;
int i;
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8854,8 +8853,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
for_each_possible_cpu(i) {
- rq = cpu_rq(i);
-
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 4f7053579fe3..9453efe9b25a 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -480,16 +480,6 @@ void wake_up_bit(void *word, int bit)
}
EXPORT_SYMBOL(wake_up_bit);
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
- const int shift = BITS_PER_LONG == 32 ? 5 : 6;
- const struct zone *zone = page_zone(virt_to_page(word));
- unsigned long val = (unsigned long)word << shift | bit;
-
- return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
/*
* Manipulate the atomic_t address to produce a better bit waitqueue table hash
* index (we're keying off bit -1, but that would produce a horrible hash
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 1bf81ef91375..744fa611cae0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
const char * const softirq_to_name[NR_SOFTIRQS] = {
- "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
+ "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
"TASKLET", "SCHED", "HRTIMER", "RCU"
};
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d47980a1bc4..c611c47de884 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags)
#ifdef CONFIG_NO_HZ_COMMON
static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
{
#ifdef CONFIG_SMP
if ((tflags & TIMER_PINNED) || !base->migration_enabled)
@@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
+ unsigned long jnow = READ_ONCE(jiffies);
+
/*
* We only forward the base when it's idle and we have a delta between
* base clock and jiffies.
*/
- if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+ if (!base->is_idle || (long) (jnow - base->clk) < 2)
return;
/*
* If the next expiry value is > jiffies, then we fast forward to
* jiffies otherwise we forward to the next expiry value.
*/
- if (time_after(base->next_expiry, jiffies))
- base->clk = jiffies;
+ if (time_after(base->next_expiry, jnow))
+ base->clk = jnow;
else
base->clk = base->next_expiry;
}
#else
static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
{
return get_timer_this_cpu_base(tflags);
}
@@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base) { }
#endif
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
- struct timer_base *target = __get_target_base(base, tflags);
-
- forward_timer_base(target);
- return target;
-}
/*
* We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
{
for (;;) {
struct timer_base *base;
- u32 tf = timer->flags;
+ u32 tf;
+
+ /*
+ * We need to use READ_ONCE() here, otherwise the compiler
+ * might re-read @tf between the check for TIMER_MIGRATING
+ * and spin_lock().
+ */
+ tf = READ_ONCE(timer->flags);
if (!(tf & TIMER_MIGRATING)) {
base = get_timer_base(tf);
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
unsigned long clk = 0, flags;
int ret = 0;
+ BUG_ON(!timer->function);
+
/*
* This is a common optimization triggered by the networking code - if
* the timer is re-modified to have the same timeout or ends up in the
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
if (timer_pending(timer)) {
if (timer->expires == expires)
return 1;
+
/*
- * Take the current timer_jiffies of base, but without holding
- * the lock!
+ * We lock timer base and calculate the bucket index right
+ * here. If the timer ends up in the same bucket, then we
+ * just update the expiry time and avoid the whole
+ * dequeue/enqueue dance.
*/
- base = get_timer_base(timer->flags);
- clk = base->clk;
+ base = lock_timer_base(timer, &flags);
+ clk = base->clk;
idx = calc_wheel_index(expires, clk);
/*
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
*/
if (idx == timer_get_idx(timer)) {
timer->expires = expires;
- return 1;
+ ret = 1;
+ goto out_unlock;
}
+ } else {
+ base = lock_timer_base(timer, &flags);
}
timer_stats_timer_set_start_info(timer);
- BUG_ON(!timer->function);
-
- base = lock_timer_base(timer, &flags);
ret = detach_if_pending(timer, base, false);
if (!ret && pending_only)
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
}
+ /* Try to forward a stale timer base clock */
+ forward_timer_base(base);
+
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
- * between calculating 'idx' and taking the lock, only enqueue_timer()
- * and trigger_dyntick_cpu() is required. Otherwise we need to
- * (re)calculate the wheel index via internal_add_timer().
+ * between calculating 'idx' and possibly switching the base, only
+ * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
+ * we need to (re)calculate the wheel index via
+ * internal_add_timer().
*/
if (idx != UINT_MAX && clk == base->clk) {
enqueue_timer(base, timer, idx);
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
base->next_expiry = nextevt;
/*
- * We have a fresh next event. Check whether we can forward the base:
+ * We have a fresh next event. Check whether we can forward the
+ * base. We can only do that when @basej is past base->clk
+ * otherwise we might rewind base->clk.
*/
- if (time_after(nextevt, jiffies))
- base->clk = jiffies;
- else if (time_after(nextevt, base->clk))
- base->clk = nextevt;
+ if (time_after(basej, base->clk)) {
+ if (time_after(nextevt, basej))
+ base->clk = basej;
+ else if (time_after(nextevt, base->clk))
+ base->clk = nextevt;
+ }
if (time_before_eq(nextevt, basej)) {
expires = basem;