summaryrefslogtreecommitdiff
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c210
-rw-r--r--kernel/sched/cpuacct.h18
-rw-r--r--kernel/sched/cpufreq_schedutil.c6
-rw-r--r--kernel/sched/cputime.c14
-rw-r--r--kernel/sched/deadline.c2
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/sched/stop_task.c2
-rw-r--r--kernel/sched/wait_bit.c18
10 files changed, 234 insertions, 42 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b82a0073532..75554f366fd3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -16,6 +16,7 @@
#include <linux/init_task.h>
#include <linux/context_tracking.h>
#include <linux/rcupdate_wait.h>
+#include <linux/compat.h>
#include <linux/blkdev.h>
#include <linux/kprobes.h>
@@ -5107,13 +5108,11 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
* Return: On success, 0 and the timeslice is in @interval. Otherwise,
* an error code.
*/
-SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
- struct timespec __user *, interval)
+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
{
struct task_struct *p;
unsigned int time_slice;
struct rq_flags rf;
- struct timespec t;
struct rq *rq;
int retval;
@@ -5137,15 +5136,40 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
task_rq_unlock(rq, p, &rf);
rcu_read_unlock();
- jiffies_to_timespec(time_slice, &t);
- retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
- return retval;
+ jiffies_to_timespec64(time_slice, t);
+ return 0;
out_unlock:
rcu_read_unlock();
return retval;
}
+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ struct timespec __user *, interval)
+{
+ struct timespec64 t;
+ int retval = sched_rr_get_interval(pid, &t);
+
+ if (retval == 0)
+ retval = put_timespec64(&t, interval);
+
+ return retval;
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
+ compat_pid_t, pid,
+ struct compat_timespec __user *, interval)
+{
+ struct timespec64 t;
+ int retval = sched_rr_get_interval(pid, &t);
+
+ if (retval == 0)
+ retval = compat_put_timespec64(&t, interval);
+ return retval;
+}
+#endif
+
void sched_show_task(struct task_struct *p)
{
unsigned long free = 0;
@@ -6620,7 +6644,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
return ret;
}
-static int cpu_stats_show(struct seq_file *sf, void *v)
+static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
{
struct task_group *tg = css_tg(seq_css(sf));
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
@@ -6660,7 +6684,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *css,
}
#endif /* CONFIG_RT_GROUP_SCHED */
-static struct cftype cpu_files[] = {
+static struct cftype cpu_legacy_files[] = {
#ifdef CONFIG_FAIR_GROUP_SCHED
{
.name = "shares",
@@ -6681,7 +6705,7 @@ static struct cftype cpu_files[] = {
},
{
.name = "stat",
- .seq_show = cpu_stats_show,
+ .seq_show = cpu_cfs_stat_show,
},
#endif
#ifdef CONFIG_RT_GROUP_SCHED
@@ -6699,16 +6723,182 @@ static struct cftype cpu_files[] = {
{ } /* Terminate */
};
+static int cpu_extra_stat_show(struct seq_file *sf,
+ struct cgroup_subsys_state *css)
+{
+#ifdef CONFIG_CFS_BANDWIDTH
+ {
+ struct task_group *tg = css_tg(css);
+ struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
+ u64 throttled_usec;
+
+ throttled_usec = cfs_b->throttled_time;
+ do_div(throttled_usec, NSEC_PER_USEC);
+
+ seq_printf(sf, "nr_periods %d\n"
+ "nr_throttled %d\n"
+ "throttled_usec %llu\n",
+ cfs_b->nr_periods, cfs_b->nr_throttled,
+ throttled_usec);
+ }
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ struct task_group *tg = css_tg(css);
+ u64 weight = scale_load_down(tg->shares);
+
+ return DIV_ROUND_CLOSEST_ULL(weight * CGROUP_WEIGHT_DFL, 1024);
+}
+
+static int cpu_weight_write_u64(struct cgroup_subsys_state *css,
+ struct cftype *cft, u64 weight)
+{
+ /*
+ * cgroup weight knobs should use the common MIN, DFL and MAX
+ * values which are 1, 100 and 10000 respectively. While it loses
+ * a bit of range on both ends, it maps pretty well onto the shares
+ * value used by scheduler and the round-trip conversions preserve
+ * the original value over the entire range.
+ */
+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ weight = DIV_ROUND_CLOSEST_ULL(weight * 1024, CGROUP_WEIGHT_DFL);
+
+ return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+
+static s64 cpu_weight_nice_read_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ unsigned long weight = scale_load_down(css_tg(css)->shares);
+ int last_delta = INT_MAX;
+ int prio, delta;
+
+ /* find the closest nice value to the current weight */
+ for (prio = 0; prio < ARRAY_SIZE(sched_prio_to_weight); prio++) {
+ delta = abs(sched_prio_to_weight[prio] - weight);
+ if (delta >= last_delta)
+ break;
+ last_delta = delta;
+ }
+
+ return PRIO_TO_NICE(prio - 1 + MAX_RT_PRIO);
+}
+
+static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
+ struct cftype *cft, s64 nice)
+{
+ unsigned long weight;
+
+ if (nice < MIN_NICE || nice > MAX_NICE)
+ return -ERANGE;
+
+ weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
+ return sched_group_set_shares(css_tg(css), scale_load(weight));
+}
+#endif
+
+static void __maybe_unused cpu_period_quota_print(struct seq_file *sf,
+ long period, long quota)
+{
+ if (quota < 0)
+ seq_puts(sf, "max");
+ else
+ seq_printf(sf, "%ld", quota);
+
+ seq_printf(sf, " %ld\n", period);
+}
+
+/* caller should put the current value in *@periodp before calling */
+static int __maybe_unused cpu_period_quota_parse(char *buf,
+ u64 *periodp, u64 *quotap)
+{
+ char tok[21]; /* U64_MAX */
+
+ if (!sscanf(buf, "%s %llu", tok, periodp))
+ return -EINVAL;
+
+ *periodp *= NSEC_PER_USEC;
+
+ if (sscanf(tok, "%llu", quotap))
+ *quotap *= NSEC_PER_USEC;
+ else if (!strcmp(tok, "max"))
+ *quotap = RUNTIME_INF;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+static int cpu_max_show(struct seq_file *sf, void *v)
+{
+ struct task_group *tg = css_tg(seq_css(sf));
+
+ cpu_period_quota_print(sf, tg_get_cfs_period(tg), tg_get_cfs_quota(tg));
+ return 0;
+}
+
+static ssize_t cpu_max_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct task_group *tg = css_tg(of_css(of));
+ u64 period = tg_get_cfs_period(tg);
+ u64 quota;
+ int ret;
+
+ ret = cpu_period_quota_parse(buf, &period, &quota);
+ if (!ret)
+ ret = tg_set_cfs_bandwidth(tg, period, quota);
+ return ret ?: nbytes;
+}
+#endif
+
+static struct cftype cpu_files[] = {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ {
+ .name = "weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = cpu_weight_read_u64,
+ .write_u64 = cpu_weight_write_u64,
+ },
+ {
+ .name = "weight.nice",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_s64 = cpu_weight_nice_read_s64,
+ .write_s64 = cpu_weight_nice_write_s64,
+ },
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+ {
+ .name = "max",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = cpu_max_show,
+ .write = cpu_max_write,
+ },
+#endif
+ { } /* terminate */
+};
+
struct cgroup_subsys cpu_cgrp_subsys = {
.css_alloc = cpu_cgroup_css_alloc,
.css_online = cpu_cgroup_css_online,
.css_released = cpu_cgroup_css_released,
.css_free = cpu_cgroup_css_free,
+ .css_extra_stat_show = cpu_extra_stat_show,
.fork = cpu_cgroup_fork,
.can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach,
- .legacy_cftypes = cpu_files,
+ .legacy_cftypes = cpu_legacy_files,
+ .dfl_cftypes = cpu_files,
.early_init = true,
+ .threaded = true,
};
#endif /* CONFIG_CGROUP_SCHED */
diff --git a/kernel/sched/cpuacct.h b/kernel/sched/cpuacct.h
deleted file mode 100644
index a8358a57a316..000000000000
--- a/kernel/sched/cpuacct.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_CGROUP_CPUACCT
-
-extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
-extern void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
-
-#else
-
-static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime)
-{
-}
-
-static inline void
-cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
-{
-}
-
-#endif
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index ba0da243fdd8..2f52ec0f1539 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -282,8 +282,12 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
- if (busy && next_f < sg_policy->next_freq)
+ if (busy && next_f < sg_policy->next_freq) {
next_f = sg_policy->next_freq;
+
+ /* Reset cached freq as next_freq has changed */
+ sg_policy->cached_raw_freq = 0;
+ }
}
sugov_update_commit(sg_policy, time, next_f);
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 9be8b68a66da..bac6ac9a4ec7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -109,7 +109,7 @@ static inline void task_group_account_field(struct task_struct *p, int index,
*/
__this_cpu_add(kernel_cpustat.cpustat[index], tmp);
- cpuacct_account_field(p, index, tmp);
+ cgroup_account_cputime_field(p, index, tmp);
}
/*
@@ -446,6 +446,13 @@ void vtime_account_irq_enter(struct task_struct *tsk)
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
+void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
+ u64 *ut, u64 *st)
+{
+ *ut = curr->utime;
+ *st = curr->stime;
+}
+
void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
{
*ut = p->utime;
@@ -584,9 +591,8 @@ drop_precision:
*
* Assuming that rtime_i+1 >= rtime_i.
*/
-static void cputime_adjust(struct task_cputime *curr,
- struct prev_cputime *prev,
- u64 *ut, u64 *st)
+void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
+ u64 *ut, u64 *st)
{
u64 rtime, stime, utime;
unsigned long flags;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index f349f7e98dec..2473736c7616 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1144,7 +1144,7 @@ static void update_curr_dl(struct rq *rq)
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq_clock_task(rq);
- cpuacct_charge(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0989676c50e9..4037e19bbca2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -844,7 +844,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
struct task_struct *curtask = task_of(curr);
trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
- cpuacct_charge(curtask, delta_exec);
+ cgroup_account_cputime(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d8c43d73e078..4056c19ca3f0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -969,7 +969,7 @@ static void update_curr_rt(struct rq *rq)
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq_clock_task(rq);
- cpuacct_charge(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 45ab0bf564e7..b19552a212de 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -30,6 +30,7 @@
#include <linux/irq_work.h>
#include <linux/tick.h>
#include <linux/slab.h>
+#include <linux/cgroup.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
@@ -37,7 +38,6 @@
#include "cpupri.h"
#include "cpudeadline.h"
-#include "cpuacct.h"
#ifdef CONFIG_SCHED_DEBUG
# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 45caf90b24cd..210b1f2146ff 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -72,7 +72,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq_clock_task(rq);
- cpuacct_charge(curr, delta_exec);
+ cgroup_account_cputime(curr, delta_exec);
}
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index f8159698aa4d..84cb3acd9260 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -183,7 +183,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
*/
static __sched
int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry,
- int (*action)(atomic_t *), unsigned mode)
+ wait_atomic_t_action_f action, unsigned int mode)
{
atomic_t *val;
int ret = 0;
@@ -193,7 +193,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
val = wbq_entry->key.flags;
if (atomic_read(val) == 0)
break;
- ret = (*action)(val);
+ ret = (*action)(val, mode);
} while (!ret && atomic_read(val) != 0);
finish_wait(wq_head, &wbq_entry->wq_entry);
return ret;
@@ -210,8 +210,9 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
}, \
}
-__sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
- unsigned mode)
+__sched int out_of_line_wait_on_atomic_t(atomic_t *p,
+ wait_atomic_t_action_f action,
+ unsigned int mode)
{
struct wait_queue_head *wq_head = atomic_t_waitqueue(p);
DEFINE_WAIT_ATOMIC_T(wq_entry, p);
@@ -220,6 +221,15 @@ __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *),
}
EXPORT_SYMBOL(out_of_line_wait_on_atomic_t);
+__sched int atomic_t_wait(atomic_t *counter, unsigned int mode)
+{
+ schedule();
+ if (signal_pending_state(mode, current))
+ return -EINTR;
+ return 0;
+}
+EXPORT_SYMBOL(atomic_t_wait);
+
/**
* wake_up_atomic_t - Wake up a waiter on a atomic_t
* @p: The atomic_t being waited on, a kernel virtual address