From 052f1dc7eb02300b05170ae341ccd03b76207778 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 13 Feb 2008 15:45:40 +0100 Subject: sched: rt-group: make rt groups scheduling configurable Make the rt group scheduler compile time configurable. Keep it experimental for now. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 148 ++++++++++++++++++++++++++++++++++++++---------------- kernel/sched_rt.c | 12 ++--- kernel/user.c | 22 +++++--- 3 files changed, 126 insertions(+), 56 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 85a5fbff2b00..5edc549edae8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -155,7 +155,7 @@ struct rt_prio_array { struct list_head queue[MAX_RT_PRIO]; }; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED #include @@ -165,19 +165,16 @@ static LIST_HEAD(task_groups); /* task group related information */ struct task_group { -#ifdef CONFIG_FAIR_CGROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED struct cgroup_subsys_state css; #endif + +#ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ struct sched_entity **se; /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; - struct sched_rt_entity **rt_se; - struct rt_rq **rt_rq; - - u64 rt_runtime; - /* * shares assigned to a task group governs how much of cpu bandwidth * is allocated to the group. The more shares a group has, the more is @@ -213,24 +210,36 @@ struct task_group { * */ unsigned long shares; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity **rt_se; + struct rt_rq **rt_rq; + + u64 rt_runtime; +#endif struct rcu_head rcu; struct list_head list; }; +#ifdef CONFIG_FAIR_GROUP_SCHED /* Default task group's sched entity on each cpu */ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); /* Default task group's cfs_rq on each cpu */ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); -static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; - static struct sched_entity *init_sched_entity_p[NR_CPUS]; static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); +static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; static struct rt_rq *init_rt_rq_p[NR_CPUS]; +#endif /* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. @@ -240,6 +249,7 @@ static DEFINE_SPINLOCK(task_group_lock); /* doms_cur_mutex serializes access to doms_cur[] array */ static DEFINE_MUTEX(doms_cur_mutex); +#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP /* kernel thread that runs rebalance_shares() periodically */ static struct task_struct *lb_monitor_task; @@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused); static void set_se_shares(struct sched_entity *se, unsigned long shares); +#ifdef CONFIG_USER_SCHED +# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) +#else +# define INIT_TASK_GROUP_LOAD NICE_0_LOAD +#endif + +#define MIN_GROUP_SHARES 2 + +static int init_task_group_load = INIT_TASK_GROUP_LOAD; +#endif + /* Default task group. * Every task in system belong to this group at bootup. */ struct task_group init_task_group = { +#ifdef CONFIG_FAIR_GROUP_SCHED .se = init_sched_entity_p, .cfs_rq = init_cfs_rq_p, +#endif +#ifdef CONFIG_RT_GROUP_SCHED .rt_se = init_sched_rt_entity_p, .rt_rq = init_rt_rq_p, -}; - -#ifdef CONFIG_FAIR_USER_SCHED -# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) -#else -# define INIT_TASK_GROUP_LOAD NICE_0_LOAD #endif - -#define MIN_GROUP_SHARES 2 - -static int init_task_group_load = INIT_TASK_GROUP_LOAD; +}; /* return group to which a task belongs */ static inline struct task_group *task_group(struct task_struct *p) { struct task_group *tg; -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED tg = p->user->tg; -#elif defined(CONFIG_FAIR_CGROUP_SCHED) +#elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); #else @@ -288,11 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p) /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { +#ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; p->se.parent = task_group(p)->se[cpu]; +#endif +#ifdef CONFIG_RT_GROUP_SCHED p->rt.rt_rq = task_group(p)->rt_rq[cpu]; p->rt.parent = task_group(p)->rt_se[cpu]; +#endif } static inline void lock_doms_cur(void) @@ -311,7 +330,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } static inline void lock_doms_cur(void) { } static inline void unlock_doms_cur(void) { } -#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif /* CONFIG_GROUP_SCHED */ /* CFS-related fields in a runqueue */ struct cfs_rq { @@ -351,7 +370,7 @@ struct cfs_rq { struct rt_rq { struct rt_prio_array active; unsigned long rt_nr_running; -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED int highest_prio; /* highest queued rt task prio */ #endif #ifdef CONFIG_SMP @@ -361,7 +380,7 @@ struct rt_rq { int rt_throttled; u64 rt_time; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED unsigned long rt_nr_boosted; struct rq *rq; @@ -437,6 +456,8 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; +#endif +#ifdef CONFIG_RT_GROUP_SCHED struct list_head leaf_rt_rq_list; #endif @@ -7104,7 +7125,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) /* delimiter for bitsearch: */ __set_bit(MAX_RT_PRIO, array->bitmap); -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED rt_rq->highest_prio = MAX_RT_PRIO; #endif #ifdef CONFIG_SMP @@ -7115,7 +7136,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) rt_rq->rt_time = 0; rt_rq->rt_throttled = 0; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED rt_rq->rt_nr_boosted = 0; rt_rq->rq = rq; #endif @@ -7139,7 +7160,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg, se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); se->parent = NULL; } +#endif +#ifdef CONFIG_RT_GROUP_SCHED static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int cpu, int add) @@ -7168,7 +7191,7 @@ void __init sched_init(void) init_defrootdomain(); #endif -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED list_add(&init_task_group.list, &task_groups); #endif @@ -7189,6 +7212,8 @@ void __init sched_init(void) &per_cpu(init_cfs_rq, i), &per_cpu(init_sched_entity, i), i, 1); +#endif +#ifdef CONFIG_RT_GROUP_SCHED init_task_group.rt_runtime = sysctl_sched_rt_runtime * NSEC_PER_USEC; INIT_LIST_HEAD(&rq->leaf_rt_rq_list); @@ -7381,9 +7406,9 @@ void set_curr_task(int cpu, struct task_struct *p) #endif -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED -#ifdef CONFIG_SMP +#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP /* * distribute shares of all task groups among their schedulable entities, * to reflect load distribution across cpus. @@ -7539,20 +7564,28 @@ static void free_sched_group(struct task_group *tg) int i; for_each_possible_cpu(i) { +#ifdef CONFIG_FAIR_GROUP_SCHED if (tg->cfs_rq) kfree(tg->cfs_rq[i]); if (tg->se) kfree(tg->se[i]); +#endif +#ifdef CONFIG_RT_GROUP_SCHED if (tg->rt_rq) kfree(tg->rt_rq[i]); if (tg->rt_se) kfree(tg->rt_se[i]); +#endif } +#ifdef CONFIG_FAIR_GROUP_SCHED kfree(tg->cfs_rq); kfree(tg->se); +#endif +#ifdef CONFIG_RT_GROUP_SCHED kfree(tg->rt_rq); kfree(tg->rt_se); +#endif kfree(tg); } @@ -7560,10 +7593,14 @@ static void free_sched_group(struct task_group *tg) struct task_group *sched_create_group(void) { struct task_group *tg; +#ifdef CONFIG_FAIR_GROUP_SCHED struct cfs_rq *cfs_rq; struct sched_entity *se; +#endif +#ifdef CONFIG_RT_GROUP_SCHED struct rt_rq *rt_rq; struct sched_rt_entity *rt_se; +#endif struct rq *rq; unsigned long flags; int i; @@ -7572,12 +7609,18 @@ struct task_group *sched_create_group(void) if (!tg) return ERR_PTR(-ENOMEM); +#ifdef CONFIG_FAIR_GROUP_SCHED tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); if (!tg->cfs_rq) goto err; tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); if (!tg->se) goto err; + + tg->shares = NICE_0_LOAD; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); if (!tg->rt_rq) goto err; @@ -7585,12 +7628,13 @@ struct task_group *sched_create_group(void) if (!tg->rt_se) goto err; - tg->shares = NICE_0_LOAD; tg->rt_runtime = 0; +#endif for_each_possible_cpu(i) { rq = cpu_rq(i); +#ifdef CONFIG_FAIR_GROUP_SCHED cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!cfs_rq) @@ -7601,6 +7645,10 @@ struct task_group *sched_create_group(void) if (!se) goto err; + init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); +#endif + +#ifdef CONFIG_RT_GROUP_SCHED rt_rq = kmalloc_node(sizeof(struct rt_rq), GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!rt_rq) @@ -7611,17 +7659,21 @@ struct task_group *sched_create_group(void) if (!rt_se) goto err; - init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); +#endif } spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { rq = cpu_rq(i); +#ifdef CONFIG_FAIR_GROUP_SCHED cfs_rq = tg->cfs_rq[i]; list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); +#endif +#ifdef CONFIG_RT_GROUP_SCHED rt_rq = tg->rt_rq[i]; list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); +#endif } list_add_rcu(&tg->list, &task_groups); spin_unlock_irqrestore(&task_group_lock, flags); @@ -7643,23 +7695,21 @@ static void free_sched_group_rcu(struct rcu_head *rhp) /* Destroy runqueue etc associated with a task group */ void sched_destroy_group(struct task_group *tg) { - struct cfs_rq *cfs_rq = NULL; - struct rt_rq *rt_rq = NULL; unsigned long flags; int i; spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { - cfs_rq = tg->cfs_rq[i]; - list_del_rcu(&cfs_rq->leaf_cfs_rq_list); - rt_rq = tg->rt_rq[i]; - list_del_rcu(&rt_rq->leaf_rt_rq_list); +#ifdef CONFIG_FAIR_GROUP_SCHED + list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list); +#endif +#ifdef CONFIG_RT_GROUP_SCHED + list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list); +#endif } list_del_rcu(&tg->list); spin_unlock_irqrestore(&task_group_lock, flags); - BUG_ON(!cfs_rq); - /* wait for possible concurrent references to cfs_rqs complete */ call_rcu(&tg->rcu, free_sched_group_rcu); } @@ -7699,6 +7749,7 @@ void sched_move_task(struct task_struct *tsk) task_rq_unlock(rq, &flags); } +#ifdef CONFIG_FAIR_GROUP_SCHED /* rq->lock to be locked by caller */ static void set_se_shares(struct sched_entity *se, unsigned long shares) { @@ -7786,7 +7837,9 @@ unsigned long sched_group_shares(struct task_group *tg) { return tg->shares; } +#endif +#ifdef CONFIG_RT_GROUP_SCHED /* * Ensure that the real time constraints are schedulable. */ @@ -7858,9 +7911,10 @@ long sched_group_rt_runtime(struct task_group *tg) do_div(rt_runtime_us, NSEC_PER_USEC); return rt_runtime_us; } -#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif +#endif /* CONFIG_GROUP_SCHED */ -#ifdef CONFIG_FAIR_CGROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED /* return corresponding task_group object of a cgroup */ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) @@ -7920,6 +7974,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, sched_move_task(tsk); } +#ifdef CONFIG_FAIR_GROUP_SCHED static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, u64 shareval) { @@ -7932,7 +7987,9 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) return (u64) tg->shares; } +#endif +#ifdef CONFIG_RT_GROUP_SCHED static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, struct file *file, const char __user *userbuf, @@ -7977,18 +8034,23 @@ static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); } +#endif static struct cftype cpu_files[] = { +#ifdef CONFIG_FAIR_GROUP_SCHED { .name = "shares", .read_uint = cpu_shares_read_uint, .write_uint = cpu_shares_write_uint, }, +#endif +#ifdef CONFIG_RT_GROUP_SCHED { .name = "rt_runtime_us", .read = cpu_rt_runtime_read, .write = cpu_rt_runtime_write, }, +#endif }; static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) @@ -8007,7 +8069,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { .early_init = 1, }; -#endif /* CONFIG_FAIR_CGROUP_SCHED */ +#endif /* CONFIG_CGROUP_SCHED */ #ifdef CONFIG_CGROUP_CPUACCT diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 35825b28e429..f54792b175b2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -55,7 +55,7 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se) return !list_empty(&rt_se->run_list); } -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { @@ -177,7 +177,7 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) static inline int rt_se_prio(struct sched_rt_entity *rt_se) { -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED struct rt_rq *rt_rq = group_rt_rq(rt_se); if (rt_rq) @@ -269,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { WARN_ON(!rt_prio(rt_se_prio(rt_se))); rt_rq->rt_nr_running++; -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_se_prio(rt_se) < rt_rq->highest_prio) rt_rq->highest_prio = rt_se_prio(rt_se); #endif @@ -281,7 +281,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) update_rt_migration(rq_of_rt_rq(rt_rq)); #endif -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED if (rt_se_boosted(rt_se)) rt_rq->rt_nr_boosted++; #endif @@ -293,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_rq->rt_nr_running); rt_rq->rt_nr_running--; -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_rq->rt_nr_running) { struct rt_prio_array *array; @@ -315,7 +315,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) update_rt_migration(rq_of_rt_rq(rt_rq)); #endif /* CONFIG_SMP */ -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED if (rt_se_boosted(rt_se)) rt_rq->rt_nr_boosted--; diff --git a/kernel/user.c b/kernel/user.c index 9f6d471bfd03..7132022a040c 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -57,7 +57,7 @@ struct user_struct root_user = { .uid_keyring = &root_user_keyring, .session_keyring = &root_session_keyring, #endif -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif }; @@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) return NULL; } -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED static void sched_destroy_user(struct user_struct *up) { @@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p) sched_move_task(p); } -#else /* CONFIG_FAIR_USER_SCHED */ +#else /* CONFIG_USER_SCHED */ static void sched_destroy_user(struct user_struct *up) { } static int sched_create_user(struct user_struct *up) { return 0; } static void sched_switch_user(struct task_struct *p) { } -#endif /* CONFIG_FAIR_USER_SCHED */ +#endif /* CONFIG_USER_SCHED */ -#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS) +#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ static DEFINE_MUTEX(uids_mutex); @@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void) } /* uid directory attributes */ +#ifdef CONFIG_FAIR_GROUP_SCHED static ssize_t cpu_shares_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -163,7 +164,9 @@ static ssize_t cpu_shares_store(struct kobject *kobj, static struct kobj_attribute cpu_share_attr = __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); +#endif +#ifdef CONFIG_RT_GROUP_SCHED static ssize_t cpu_rt_runtime_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -190,11 +193,16 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj, static struct kobj_attribute cpu_rt_runtime_attr = __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); +#endif /* default attributes per uid directory */ static struct attribute *uids_attributes[] = { +#ifdef CONFIG_FAIR_GROUP_SCHED &cpu_share_attr.attr, +#endif +#ifdef CONFIG_RT_GROUP_SCHED &cpu_rt_runtime_attr.attr, +#endif NULL }; @@ -297,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags) schedule_work(&up->work); } -#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */ +#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ int uids_sysfs_init(void) { return 0; } static inline int uids_user_create(struct user_struct *up) { return 0; } @@ -401,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { - /* This case is not possible when CONFIG_FAIR_USER_SCHED + /* This case is not possible when CONFIG_USER_SCHED * is defined, since we serialize alloc_uid() using * uids_mutex. Hence no need to call * sched_destroy_user() or remove_user_sysfs_dir(). -- cgit