diff options
Diffstat (limited to 'kernel/time/timer_migration.h')
-rw-r--r-- | kernel/time/timer_migration.h | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/kernel/time/timer_migration.h b/kernel/time/timer_migration.h new file mode 100644 index 000000000000..ae19f70f8170 --- /dev/null +++ b/kernel/time/timer_migration.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _KERNEL_TIME_MIGRATION_H +#define _KERNEL_TIME_MIGRATION_H + +/* Per group capacity. Must be a power of 2! */ +#define TMIGR_CHILDREN_PER_GROUP 8 + +/** + * struct tmigr_event - a timer event associated to a CPU + * @nextevt: The node to enqueue an event in the parent group queue + * @cpu: The CPU to which this event belongs + * @ignore: Hint whether the event could be ignored; it is set when + * CPU or group is active; + */ +struct tmigr_event { + struct timerqueue_node nextevt; + unsigned int cpu; + bool ignore; +}; + +/** + * struct tmigr_group - timer migration hierarchy group + * @lock: Lock protecting the event information and group hierarchy + * information during setup + * @parent: Pointer to the parent group. Pointer is updated when a + * new hierarchy level is added because of a CPU coming + * online the first time. Once it is set, the pointer will + * not be removed or updated. When accessing parent pointer + * lock less to decide whether to abort a propagation or + * not, it is not a problem. The worst outcome is an + * unnecessary/early CPU wake up. But do not access parent + * pointer several times in the same 'action' (like + * activation, deactivation, check for remote expiry,...) + * without holding the lock as it is not ensured that value + * will not change. + * @groupevt: Next event of the group which is only used when the + * group is !active. The group event is then queued into + * the parent timer queue. + * Ignore bit of @groupevt is set when the group is active. + * @next_expiry: Base monotonic expiry time of the next event of the + * group; It is used for the racy lockless check whether a + * remote expiry is required; it is always reliable + * @events: Timer queue for child events queued in the group + * @migr_state: State of the group (see union tmigr_state) + * @level: Hierarchy level of the group; Required during setup + * @numa_node: Required for setup only to make sure CPU and low level + * group information is NUMA local. It is set to NUMA node + * as long as the group level is per NUMA node (level < + * tmigr_crossnode_level); otherwise it is set to + * NUMA_NO_NODE + * @num_children: Counter of group children to make sure the group is only + * filled with TMIGR_CHILDREN_PER_GROUP; Required for setup + * only + * @groupmask: mask of the group in the parent group; is set during + * setup and will never change; can be read lockless + * @list: List head that is added to the per level + * tmigr_level_list; is required during setup when a + * new group needs to be connected to the existing + * hierarchy groups + */ +struct tmigr_group { + raw_spinlock_t lock; + struct tmigr_group *parent; + struct tmigr_event groupevt; + u64 next_expiry; + struct timerqueue_head events; + atomic_t migr_state; + unsigned int level; + int numa_node; + unsigned int num_children; + u8 groupmask; + struct list_head list; +}; + +/** + * struct tmigr_cpu - timer migration per CPU group + * @lock: Lock protecting the tmigr_cpu group information + * @online: Indicates whether the CPU is online; In deactivate path + * it is required to know whether the migrator in the top + * level group is to be set offline, while a timer is + * pending. Then another online CPU needs to be notified to + * take over the migrator role. Furthermore the information + * is required in CPU hotplug path as the CPU is able to go + * idle before the timer migration hierarchy hotplug AP is + * reached. During this phase, the CPU has to handle the + * global timers on its own and must not act as a migrator. + * @idle: Indicates whether the CPU is idle in the timer migration + * hierarchy + * @remote: Is set when timers of the CPU are expired remotely + * @tmgroup: Pointer to the parent group + * @groupmask: mask of tmigr_cpu in the parent group + * @wakeup: Stores the first timer when the timer migration + * hierarchy is completely idle and remote expiry was done; + * is returned to timer code in the idle path and is only + * used in idle path. + * @cpuevt: CPU event which could be enqueued into the parent group + */ +struct tmigr_cpu { + raw_spinlock_t lock; + bool online; + bool idle; + bool remote; + struct tmigr_group *tmgroup; + u8 groupmask; + u64 wakeup; + struct tmigr_event cpuevt; +}; + +/** + * union tmigr_state - state of tmigr_group + * @state: Combined version of the state - only used for atomic + * read/cmpxchg function + * &anon struct: Split version of the state - only use the struct members to + * update information to stay independent of endianness + * @active: Contains each mask bit of the active children + * @migrator: Contains mask of the child which is migrator + * @seq: Sequence counter needs to be increased when an update + * to the tmigr_state is done. It prevents a race when + * updates in the child groups are propagated in changed + * order. Detailed information about the scenario is + * given in the documentation at the begin of + * timer_migration.c. + */ +union tmigr_state { + u32 state; + struct { + u8 active; + u8 migrator; + u16 seq; + } __packed; +}; + +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +extern void tmigr_handle_remote(void); +extern bool tmigr_requires_handle_remote(void); +extern void tmigr_cpu_activate(void); +extern u64 tmigr_cpu_deactivate(u64 nextevt); +extern u64 tmigr_cpu_new_timer(u64 nextevt); +extern u64 tmigr_quick_check(u64 nextevt); +#else +static inline void tmigr_handle_remote(void) { } +static inline bool tmigr_requires_handle_remote(void) { return false; } +static inline void tmigr_cpu_activate(void) { } +#endif + +#endif |