summaryrefslogtreecommitdiff
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c730
1 files changed, 376 insertions, 354 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 5db5a8d26811..4296d13db3d1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1,7 +1,7 @@
/*
* linux/kernel/timer.c
*
- * Kernel internal timers, basic process system calls
+ * Kernel internal timers
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
@@ -20,7 +20,7 @@
*/
#include <linux/kernel_stat.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/init.h>
@@ -37,8 +37,11 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
+#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -62,6 +65,7 @@ EXPORT_SYMBOL(jiffies_64);
#define TVR_SIZE (1 << TVR_BITS)
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
struct tvec {
struct list_head vec[TVN_SIZE];
@@ -76,6 +80,7 @@ struct tvec_base {
struct timer_list *running_timer;
unsigned long timer_jiffies;
unsigned long next_timer;
+ unsigned long active_timers;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
@@ -87,35 +92,28 @@ struct tvec_base boot_tvec_bases;
EXPORT_SYMBOL(boot_tvec_bases);
static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
-/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB for
- * the new flag to indicate whether the timer is deferrable
- */
-#define TBASE_DEFERRABLE_FLAG (0x1)
-
/* Functions below help us manage 'deferrable' flag */
static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
{
- return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
+ return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
}
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
{
- return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+ return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
}
-static inline void timer_set_deferrable(struct timer_list *timer)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
{
- timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
- TBASE_DEFERRABLE_FLAG));
+ return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
}
static inline void
timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
{
- timer->base = (struct tvec_base *)((unsigned long)(new_base) |
- tbase_get_deferrable(timer->base));
+ unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
+
+ timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
}
static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -151,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
/* now that we have rounded, subtract the extra skew again */
j -= cpu * 3;
- if (j <= jiffies) /* rounding ate our timeout entirely; */
- return original;
- return j;
+ /*
+ * Make sure j is still in the future. Otherwise return the
+ * unmodified value.
+ */
+ return time_is_after_jiffies(j) ? j : original;
}
/**
@@ -318,16 +318,27 @@ unsigned long round_jiffies_up_relative(unsigned long j)
}
EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
-
-static inline void set_running_timer(struct tvec_base *base,
- struct timer_list *timer)
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @timer: the timer to be modified
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
{
-#ifdef CONFIG_SMP
- base->running_timer = timer;
-#endif
+ timer->slack = slack_hz;
}
+EXPORT_SYMBOL_GPL(set_timer_slack);
-static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+static void
+__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
@@ -353,11 +364,12 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
} else {
int i;
- /* If the timeout is larger than 0xffffffff on 64-bit
- * architectures then we use the maximum timeout:
+ /* If the timeout is larger than MAX_TVAL (on 64-bit
+ * architectures or with CONFIG_BASE_SMALL=1) then we
+ * use the maximum timeout.
*/
- if (idx > 0xffffffffUL) {
- idx = 0xffffffffUL;
+ if (idx > MAX_TVAL) {
+ idx = MAX_TVAL;
expires = idx + base->timer_jiffies;
}
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
@@ -369,6 +381,19 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
list_add_tail(&timer->entry, vec);
}
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+ __internal_add_timer(base, timer);
+ /*
+ * Update base->active_timers and base->next_timer
+ */
+ if (!tbase_get_deferrable(timer->base)) {
+ if (time_before(timer->expires, base->next_timer))
+ base->next_timer = timer->expires;
+ base->active_timers++;
+ }
+}
+
#ifdef CONFIG_TIMER_STATS
void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
{
@@ -401,6 +426,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
static struct debug_obj_descr timer_debug_descr;
+static void *timer_debug_hint(void *addr)
+{
+ return ((struct timer_list *) addr)->function;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -419,6 +449,12 @@ static int timer_fixup_init(void *addr, enum debug_obj_state state)
}
}
+/* Stub timer callback for improperly used timers. */
+static void stub_timer(unsigned long data)
+{
+ WARN_ON(1);
+}
+
/*
* fixup_activate is called when:
* - an active object is activated
@@ -442,7 +478,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
debug_object_activate(timer, &timer_debug_descr);
return 0;
} else {
- WARN_ON_ONCE(1);
+ setup_timer(timer, stub_timer, 0);
+ return 1;
}
return 0;
@@ -472,11 +509,40 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
}
}
+/*
+ * fixup_assert_init is called when:
+ * - an untracked/uninit-ed object is found
+ */
+static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
+{
+ struct timer_list *timer = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_NOTAVAILABLE:
+ if (timer->entry.prev == TIMER_ENTRY_STATIC) {
+ /*
+ * This is not really a fixup. The timer was
+ * statically initialized. We just make sure that it
+ * is tracked in the object tracker.
+ */
+ debug_object_init(timer, &timer_debug_descr);
+ return 0;
+ } else {
+ setup_timer(timer, stub_timer, 0);
+ return 1;
+ }
+ default:
+ return 0;
+ }
+}
+
static struct debug_obj_descr timer_debug_descr = {
- .name = "timer_list",
- .fixup_init = timer_fixup_init,
- .fixup_activate = timer_fixup_activate,
- .fixup_free = timer_fixup_free,
+ .name = "timer_list",
+ .debug_hint = timer_debug_hint,
+ .fixup_init = timer_fixup_init,
+ .fixup_activate = timer_fixup_activate,
+ .fixup_free = timer_fixup_free,
+ .fixup_assert_init = timer_fixup_assert_init,
};
static inline void debug_timer_init(struct timer_list *timer)
@@ -499,16 +565,19 @@ static inline void debug_timer_free(struct timer_list *timer)
debug_object_free(timer, &timer_debug_descr);
}
-static void __init_timer(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key);
+static inline void debug_timer_assert_init(struct timer_list *timer)
+{
+ debug_object_assert_init(timer, &timer_debug_descr);
+}
+
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key);
-void init_timer_on_stack_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
+void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key)
{
debug_object_init_on_stack(timer, &timer_debug_descr);
- __init_timer(timer, name, key);
+ do_init_timer(timer, flags, name, key);
}
EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
@@ -522,6 +591,7 @@ EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
static inline void debug_timer_init(struct timer_list *timer) { }
static inline void debug_timer_activate(struct timer_list *timer) { }
static inline void debug_timer_deactivate(struct timer_list *timer) { }
+static inline void debug_timer_assert_init(struct timer_list *timer) { }
#endif
static inline void debug_init(struct timer_list *timer)
@@ -543,12 +613,19 @@ static inline void debug_deactivate(struct timer_list *timer)
trace_timer_cancel(timer);
}
-static void __init_timer(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
+static inline void debug_assert_init(struct timer_list *timer)
{
+ debug_timer_assert_init(timer);
+}
+
+static void do_init_timer(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key)
+{
+ struct tvec_base *base = __raw_get_cpu_var(tvec_bases);
+
timer->entry.next = NULL;
- timer->base = __raw_get_cpu_var(tvec_bases);
+ timer->base = (void *)((unsigned long)base | flags);
+ timer->slack = -1;
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
timer->start_pid = -1;
@@ -560,6 +637,7 @@ static void __init_timer(struct timer_list *timer,
/**
* init_timer_key - initialize a timer
* @timer: the timer to be initialized
+ * @flags: timer flags
* @name: name of the timer
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
@@ -567,26 +645,15 @@ static void __init_timer(struct timer_list *timer,
* init_timer_key() must be done to a timer prior calling *any* of the
* other timer functions.
*/
-void init_timer_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
+void init_timer_key(struct timer_list *timer, unsigned int flags,
+ const char *name, struct lock_class_key *key)
{
debug_init(timer);
- __init_timer(timer, name, key);
+ do_init_timer(timer, flags, name, key);
}
EXPORT_SYMBOL(init_timer_key);
-void init_timer_deferrable_key(struct timer_list *timer,
- const char *name,
- struct lock_class_key *key)
-{
- init_timer_key(timer, name, key);
- timer_set_deferrable(timer);
-}
-EXPORT_SYMBOL(init_timer_deferrable_key);
-
-static inline void detach_timer(struct timer_list *timer,
- int clear_pending)
+static inline void detach_timer(struct timer_list *timer, bool clear_pending)
{
struct list_head *entry = &timer->entry;
@@ -598,6 +665,29 @@ static inline void detach_timer(struct timer_list *timer,
entry->prev = LIST_POISON2;
}
+static inline void
+detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
+{
+ detach_timer(timer, true);
+ if (!tbase_get_deferrable(timer->base))
+ base->active_timers--;
+}
+
+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+ bool clear_pending)
+{
+ if (!timer_pending(timer))
+ return 0;
+
+ detach_timer(timer, clear_pending);
+ if (!tbase_get_deferrable(timer->base)) {
+ base->active_timers--;
+ if (timer->expires == base->next_timer)
+ base->next_timer = base->timer_jiffies;
+ }
+ return 1;
+}
+
/*
* We are using hashed locking: holding per_cpu(tvec_bases).lock
* means that all timers which are tied to this base via timer->base are
@@ -643,30 +733,17 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
base = lock_timer_base(timer, &flags);
- if (timer_pending(timer)) {
- detach_timer(timer, 0);
- if (timer->expires == base->next_timer &&
- !tbase_get_deferrable(timer->base))
- base->next_timer = base->timer_jiffies;
- ret = 1;
- } else {
- if (pending_only)
- goto out_unlock;
- }
+ ret = detach_if_pending(timer, base, false);
+ if (!ret && pending_only)
+ goto out_unlock;
debug_activate(timer, expires);
- new_base = __get_cpu_var(tvec_bases);
-
cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
- int preferred_cpu = get_nohz_load_balancer();
-
- if (preferred_cpu >= 0)
- cpu = preferred_cpu;
- }
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
+ cpu = get_nohz_timer_target();
#endif
new_base = per_cpu(tvec_bases, cpu);
@@ -689,9 +766,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
}
timer->expires = expires;
- if (time_before(timer->expires, base->next_timer) &&
- !tbase_get_deferrable(timer->base))
- base->next_timer = timer->expires;
internal_add_timer(base, timer);
out_unlock:
@@ -716,6 +790,45 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
}
EXPORT_SYMBOL(mod_timer_pending);
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ * 1) calculate the maximum (absolute) time
+ * 2) calculate the highest bit where the expires and new max are different
+ * 3) use this bit to make a mask
+ * 4) use the bitmask to round down the maximum time, so that all last
+ * bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+ unsigned long expires_limit, mask;
+ int bit;
+
+ if (timer->slack >= 0) {
+ expires_limit = expires + timer->slack;
+ } else {
+ long delta = expires - jiffies;
+
+ if (delta < 256)
+ return expires;
+
+ expires_limit = expires + delta / 256;
+ }
+ mask = expires ^ expires_limit;
+ if (mask == 0)
+ return expires;
+
+ bit = find_last_bit(&mask, BITS_PER_LONG);
+
+ mask = (1 << bit) - 1;
+
+ expires_limit = expires_limit & ~(mask);
+
+ return expires_limit;
+}
+
/**
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
@@ -738,6 +851,8 @@ EXPORT_SYMBOL(mod_timer_pending);
*/
int mod_timer(struct timer_list *timer, unsigned long expires)
{
+ expires = apply_slack(timer, expires);
+
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
@@ -757,7 +872,13 @@ EXPORT_SYMBOL(mod_timer);
*
* mod_timer_pinned() is a way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
- * and not allow the timer to be migrated to a different CPU.
+ * and to ensure that the timer is scheduled on the current CPU.
+ *
+ * Note that this does not prevent the timer from being migrated
+ * when the current CPU goes offline. If this is a problem for
+ * you, use CPU-hotplug notifiers to handle it correctly, for
+ * example, cancelling the timer when the corresponding CPU goes
+ * offline.
*
* mod_timer_pinned(timer, expires) is equivalent to:
*
@@ -810,19 +931,16 @@ void add_timer_on(struct timer_list *timer, int cpu)
spin_lock_irqsave(&base->lock, flags);
timer_set_base(timer, base);
debug_activate(timer, timer->expires);
- if (time_before(timer->expires, base->next_timer) &&
- !tbase_get_deferrable(timer->base))
- base->next_timer = timer->expires;
internal_add_timer(base, timer);
/*
- * Check whether the other CPU is idle and needs to be
- * triggered to reevaluate the timer wheel when nohz is
- * active. We are protected against the other CPU fiddling
+ * Check whether the other CPU is in dynticks mode and needs
+ * to be triggered to reevaluate the timer wheel.
+ * We are protected against the other CPU fiddling
* with the timer by holding the timer base lock. This also
- * makes sure that a CPU on the way to idle can not evaluate
- * the timer wheel.
+ * makes sure that a CPU on the way to stop its tick can not
+ * evaluate the timer wheel.
*/
- wake_up_idle_cpu(cpu);
+ wake_up_nohz_cpu(cpu);
spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(add_timer_on);
@@ -844,16 +962,12 @@ int del_timer(struct timer_list *timer)
unsigned long flags;
int ret = 0;
+ debug_assert_init(timer);
+
timer_stats_timer_clear_start_info(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
- if (timer_pending(timer)) {
- detach_timer(timer, 1);
- if (timer->expires == base->next_timer &&
- !tbase_get_deferrable(timer->base))
- base->next_timer = base->timer_jiffies;
- ret = 1;
- }
+ ret = detach_if_pending(timer, base, true);
spin_unlock_irqrestore(&base->lock, flags);
}
@@ -861,15 +975,12 @@ int del_timer(struct timer_list *timer)
}
EXPORT_SYMBOL(del_timer);
-#ifdef CONFIG_SMP
/**
* try_to_del_timer_sync - Try to deactivate a timer
* @timer: timer do del
*
* This function tries to deactivate a timer. Upon successful (ret >= 0)
* exit the timer is not queued and the handler is not running on any CPU.
- *
- * It must not be called from interrupt contexts.
*/
int try_to_del_timer_sync(struct timer_list *timer)
{
@@ -877,26 +988,21 @@ int try_to_del_timer_sync(struct timer_list *timer)
unsigned long flags;
int ret = -1;
- base = lock_timer_base(timer, &flags);
+ debug_assert_init(timer);
- if (base->running_timer == timer)
- goto out;
+ base = lock_timer_base(timer, &flags);
- ret = 0;
- if (timer_pending(timer)) {
- detach_timer(timer, 1);
- if (timer->expires == base->next_timer &&
- !tbase_get_deferrable(timer->base))
- base->next_timer = base->timer_jiffies;
- ret = 1;
+ if (base->running_timer != timer) {
+ timer_stats_timer_clear_start_info(timer);
+ ret = detach_if_pending(timer, base, true);
}
-out:
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
EXPORT_SYMBOL(try_to_del_timer_sync);
+#ifdef CONFIG_SMP
/**
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -907,10 +1013,29 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
*
* Synchronization rules: Callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
- * interrupt contexts. The caller must not hold locks which would prevent
- * completion of the timer's handler. The timer's handler must not call
- * add_timer_on(). Upon exit the timer is not queued and the handler is
- * not running on any CPU.
+ * interrupt contexts unless the timer is an irqsafe one. The caller must
+ * not hold locks which would prevent completion of the timer's
+ * handler. The timer's handler must not call add_timer_on(). Upon exit the
+ * timer is not queued and the handler is not running on any CPU.
+ *
+ * Note: For !irqsafe timers, you must not hold locks that are held in
+ * interrupt context while calling this function. Even if the lock has
+ * nothing to do with the timer in question. Here's why:
+ *
+ * CPU0 CPU1
+ * ---- ----
+ * <SOFTIRQ>
+ * call_timer_fn();
+ * base->running_timer = mytimer;
+ * spin_lock_irq(somelock);
+ * <IRQ>
+ * spin_lock(somelock);
+ * del_timer_sync(mytimer);
+ * while (base->running_timer == mytimer);
+ *
+ * Now del_timer_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but
+ * it has interrupted the softirq that CPU0 is waiting to finish.
*
* The function returns whether it has deactivated a pending timer or not.
*/
@@ -919,12 +1044,20 @@ int del_timer_sync(struct timer_list *timer)
#ifdef CONFIG_LOCKDEP
unsigned long flags;
+ /*
+ * If lockdep gives a backtrace here, please reference
+ * the synchronization rules above.
+ */
local_irq_save(flags);
lock_map_acquire(&timer->lockdep_map);
lock_map_release(&timer->lockdep_map);
local_irq_restore(flags);
#endif
-
+ /*
+ * don't use it in hardirq context, because it
+ * could lead to deadlock.
+ */
+ WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
for (;;) {
int ret = try_to_del_timer_sync(timer);
if (ret >= 0)
@@ -949,12 +1082,56 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
*/
list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
BUG_ON(tbase_get_base(timer->base) != base);
- internal_add_timer(base, timer);
+ /* No accounting, while moving them */
+ __internal_add_timer(base, timer);
}
return index;
}
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+ unsigned long data)
+{
+ int preempt_count = preempt_count();
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * It is permissible to free the timer from inside the
+ * function that is called from it, this we need to take into
+ * account for lockdep too. To avoid bogus "held lock freed"
+ * warnings as well as problems when looking into
+ * timer->lockdep_map, make a copy and use that here.
+ */
+ struct lockdep_map lockdep_map;
+
+ lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
+#endif
+ /*
+ * Couple the lock chain with the lock chain at
+ * del_timer_sync() by acquiring the lock_map around the fn()
+ * call here and in del_timer_sync().
+ */
+ lock_map_acquire(&lockdep_map);
+
+ trace_timer_expire_entry(timer);
+ fn(data);
+ trace_timer_expire_exit(timer);
+
+ lock_map_release(&lockdep_map);
+
+ if (preempt_count != preempt_count()) {
+ WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ fn, preempt_count, preempt_count());
+ /*
+ * Restore the preempt count. That gives us a decent
+ * chance to survive and extract information. If the
+ * callback kept a lock held, bad luck, but not worse
+ * than the BUG() we had.
+ */
+ preempt_count() = preempt_count;
+ }
+}
+
#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
/**
@@ -987,64 +1164,34 @@ static inline void __run_timers(struct tvec_base *base)
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
+ bool irqsafe;
timer = list_first_entry(head, struct timer_list,entry);
fn = timer->function;
data = timer->data;
+ irqsafe = tbase_get_irqsafe(timer->base);
timer_stats_account_timer(timer);
- set_running_timer(base, timer);
- detach_timer(timer, 1);
-
- spin_unlock_irq(&base->lock);
- {
- int preempt_count = preempt_count();
-
-#ifdef CONFIG_LOCKDEP
- /*
- * It is permissible to free the timer from
- * inside the function that is called from
- * it, this we need to take into account for
- * lockdep too. To avoid bogus "held lock
- * freed" warnings as well as problems when
- * looking into timer->lockdep_map, make a
- * copy and use that here.
- */
- struct lockdep_map lockdep_map =
- timer->lockdep_map;
-#endif
- /*
- * Couple the lock chain with the lock chain at
- * del_timer_sync() by acquiring the lock_map
- * around the fn() call here and in
- * del_timer_sync().
- */
- lock_map_acquire(&lockdep_map);
-
- trace_timer_expire_entry(timer);
- fn(data);
- trace_timer_expire_exit(timer);
-
- lock_map_release(&lockdep_map);
-
- if (preempt_count != preempt_count()) {
- printk(KERN_ERR "huh, entered %p "
- "with preempt_count %08x, exited"
- " with %08x?\n",
- fn, preempt_count,
- preempt_count());
- BUG();
- }
+ base->running_timer = timer;
+ detach_expired_timer(timer, base);
+
+ if (irqsafe) {
+ spin_unlock(&base->lock);
+ call_timer_fn(timer, fn, data);
+ spin_lock(&base->lock);
+ } else {
+ spin_unlock_irq(&base->lock);
+ call_timer_fn(timer, fn, data);
+ spin_lock_irq(&base->lock);
}
- spin_lock_irq(&base->lock);
}
}
- set_running_timer(base, NULL);
+ base->running_timer = NULL;
spin_unlock_irq(&base->lock);
}
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
/*
* Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a CPU is idle.
@@ -1170,13 +1317,22 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
*/
unsigned long get_next_timer_interrupt(unsigned long now)
{
- struct tvec_base *base = __get_cpu_var(tvec_bases);
- unsigned long expires;
+ struct tvec_base *base = __this_cpu_read(tvec_bases);
+ unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
+
+ /*
+ * Pretend that there is no timer pending if the cpu is offline.
+ * Possible pending timers will be migrated later to an active cpu.
+ */
+ if (cpu_is_offline(smp_processor_id()))
+ return expires;
spin_lock(&base->lock);
- if (time_before_eq(base->next_timer, base->timer_jiffies))
- base->next_timer = __next_timer_interrupt(base);
- expires = base->next_timer;
+ if (base->active_timers) {
+ if (time_before_eq(base->next_timer, base->timer_jiffies))
+ base->next_timer = __next_timer_interrupt(base);
+ expires = base->next_timer;
+ }
spin_unlock(&base->lock);
if (time_before_eq(expires, now))
@@ -1199,7 +1355,10 @@ void update_process_times(int user_tick)
account_process_tick(p, user_tick);
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
- printk_tick();
+#ifdef CONFIG_IRQ_WORK
+ if (in_irq())
+ irq_work_run();
+#endif
scheduler_tick();
run_posix_cpu_timers(p);
}
@@ -1209,9 +1368,7 @@ void update_process_times(int user_tick)
*/
static void run_timer_softirq(struct softirq_action *h)
{
- struct tvec_base *base = __get_cpu_var(tvec_bases);
-
- perf_event_do_pending();
+ struct tvec_base *base = __this_cpu_read(tvec_bases);
hrtimer_run_pending();
@@ -1226,20 +1383,6 @@ void run_local_timers(void)
{
hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
- softlockup_tick();
-}
-
-/*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
- * jiffies is defined in the linker script...
- */
-
-void do_timer(unsigned long ticks)
-{
- jiffies_64 += ticks;
- update_wall_time();
- calc_global_load();
}
#ifdef __ARCH_WANT_SYS_ALARM
@@ -1255,70 +1398,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
#endif
-#ifndef __alpha__
-
-/*
- * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
- * should be moved into arch/i386 instead?
- */
-
-/**
- * sys_getpid - return the thread group id of the current process
- *
- * Note, despite the name, this returns the tgid not the pid. The tgid and
- * the pid are identical unless CLONE_THREAD was specified on clone() in
- * which case the tgid is the same in all threads of the same group.
- *
- * This is SMP safe as current->tgid does not change.
- */
-SYSCALL_DEFINE0(getpid)
-{
- return task_tgid_vnr(current);
-}
-
-/*
- * Accessing ->real_parent is not SMP-safe, it could
- * change from under us. However, we can use a stale
- * value of ->real_parent under rcu_read_lock(), see
- * release_task()->call_rcu(delayed_put_task_struct).
- */
-SYSCALL_DEFINE0(getppid)
-{
- int pid;
-
- rcu_read_lock();
- pid = task_tgid_vnr(current->real_parent);
- rcu_read_unlock();
-
- return pid;
-}
-
-SYSCALL_DEFINE0(getuid)
-{
- /* Only we change this so SMP safe */
- return current_uid();
-}
-
-SYSCALL_DEFINE0(geteuid)
-{
- /* Only we change this so SMP safe */
- return current_euid();
-}
-
-SYSCALL_DEFINE0(getgid)
-{
- /* Only we change this so SMP safe */
- return current_gid();
-}
-
-SYSCALL_DEFINE0(getegid)
-{
- /* Only we change this so SMP safe */
- return current_egid();
-}
-
-#endif
-
static void process_timeout(unsigned long __data)
{
wake_up_process((struct task_struct *)__data);
@@ -1426,96 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-/* Thread ID - the internal kernel "pid" */
-SYSCALL_DEFINE0(gettid)
-{
- return task_pid_vnr(current);
-}
-
-/**
- * do_sysinfo - fill in sysinfo struct
- * @info: pointer to buffer to fill
- */
-int do_sysinfo(struct sysinfo *info)
-{
- unsigned long mem_total, sav_total;
- unsigned int mem_unit, bitcount;
- struct timespec tp;
-
- memset(info, 0, sizeof(struct sysinfo));
-
- ktime_get_ts(&tp);
- monotonic_to_bootbased(&tp);
- info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
- get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
-
- info->procs = nr_threads;
-
- si_meminfo(info);
- si_swapinfo(info);
-
- /*
- * If the sum of all the available memory (i.e. ram + swap)
- * is less than can be stored in a 32 bit unsigned long then
- * we can be binary compatible with 2.2.x kernels. If not,
- * well, in that case 2.2.x was broken anyways...
- *
- * -Erik Andersen <andersee@debian.org>
- */
-
- mem_total = info->totalram + info->totalswap;
- if (mem_total < info->totalram || mem_total < info->totalswap)
- goto out;
- bitcount = 0;
- mem_unit = info->mem_unit;
- while (mem_unit > 1) {
- bitcount++;
- mem_unit >>= 1;
- sav_total = mem_total;
- mem_total <<= 1;
- if (mem_total < sav_total)
- goto out;
- }
-
- /*
- * If mem_total did not overflow, multiply all memory values by
- * info->mem_unit and set it to 1. This leaves things compatible
- * with 2.2.x, and also retains compatibility with earlier 2.4.x
- * kernels...
- */
-
- info->mem_unit = 1;
- info->totalram <<= bitcount;
- info->freeram <<= bitcount;
- info->sharedram <<= bitcount;
- info->bufferram <<= bitcount;
- info->totalswap <<= bitcount;
- info->freeswap <<= bitcount;
- info->totalhigh <<= bitcount;
- info->freehigh <<= bitcount;
-
-out:
- return 0;
-}
-
-SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
-{
- struct sysinfo val;
-
- do_sysinfo(&val);
-
- if (copy_to_user(info, &val, sizeof(struct sysinfo)))
- return -EFAULT;
-
- return 0;
-}
-
-static int __cpuinit init_timers_cpu(int cpu)
+static int init_timers_cpu(int cpu)
{
int j;
struct tvec_base *base;
- static char __cpuinitdata tvec_base_done[NR_CPUS];
+ static char tvec_base_done[NR_CPUS];
if (!tvec_base_done[cpu]) {
static char boot_done;
@@ -1547,12 +1541,12 @@ static int __cpuinit init_timers_cpu(int cpu)
boot_done = 1;
base = &boot_tvec_bases;
}
+ spin_lock_init(&base->lock);
tvec_base_done[cpu] = 1;
} else {
base = per_cpu(tvec_bases, cpu);
}
- spin_lock_init(&base->lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1565,6 +1559,7 @@ static int __cpuinit init_timers_cpu(int cpu)
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
+ base->active_timers = 0;
return 0;
}
@@ -1575,16 +1570,14 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
while (!list_empty(head)) {
timer = list_first_entry(head, struct timer_list, entry);
- detach_timer(timer, 0);
+ /* We ignore the accounting on the dying cpu */
+ detach_timer(timer, false);
timer_set_base(timer, new_base);
- if (time_before(timer->expires, new_base->next_timer) &&
- !tbase_get_deferrable(timer->base))
- new_base->next_timer = timer->expires;
internal_add_timer(new_base, timer);
}
}
-static void __cpuinit migrate_timers(int cpu)
+static void migrate_timers(int cpu)
{
struct tvec_base *old_base;
struct tvec_base *new_base;
@@ -1617,15 +1610,18 @@ static void __cpuinit migrate_timers(int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __cpuinit timer_cpu_notify(struct notifier_block *self,
+static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
+ int err;
+
switch(action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- if (init_timers_cpu(cpu) < 0)
- return NOTIFY_BAD;
+ err = init_timers_cpu(cpu);
+ if (err < 0)
+ return notifier_from_errno(err);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
@@ -1639,19 +1635,23 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata timers_nb = {
+static struct notifier_block timers_nb = {
.notifier_call = timer_cpu_notify,
};
void __init init_timers(void)
{
- int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
- (void *)(long)smp_processor_id());
+ int err;
+ /* ensure there are enough low bits for flags in timer->base pointer */
+ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
+
+ err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
+ (void *)(long)smp_processor_id());
init_timer_stats();
- BUG_ON(err == NOTIFY_BAD);
+ BUG_ON(err != NOTIFY_OK);
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
@@ -1684,3 +1684,25 @@ unsigned long msleep_interruptible(unsigned int msecs)
}
EXPORT_SYMBOL(msleep_interruptible);
+
+static int __sched do_usleep_range(unsigned long min, unsigned long max)
+{
+ ktime_t kmin;
+ unsigned long delta;
+
+ kmin = ktime_set(0, min * NSEC_PER_USEC);
+ delta = (max - min) * NSEC_PER_USEC;
+ return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
+}
+
+/**
+ * usleep_range - Drop in replacement for udelay where wakeup is flexible
+ * @min: Minimum time in usecs to sleep
+ * @max: Maximum time in usecs to sleep
+ */
+void usleep_range(unsigned long min, unsigned long max)
+{
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ do_usleep_range(min, max);
+}
+EXPORT_SYMBOL(usleep_range);