diff options
| -rw-r--r-- | include/linux/rseq.h | 8 | ||||
| -rw-r--r-- | include/linux/rseq_types.h | 18 | ||||
| -rw-r--r-- | kernel/sched/core.c | 2 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 150 |
4 files changed, 168 insertions, 10 deletions
diff --git a/include/linux/rseq.h b/include/linux/rseq.h index bf8a6bf315f3..4c0e8bdd2dd9 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -73,13 +73,13 @@ static __always_inline void rseq_sched_switch_event(struct task_struct *t) } /* - * Invoked from __set_task_cpu() when a task migrates to enforce an IDs - * update. + * Invoked from __set_task_cpu() when a task migrates or from + * mm_cid_schedin() when the CID changes to enforce an IDs update. * * This does not raise TIF_NOTIFY_RESUME as that happens in * rseq_sched_switch_event(). */ -static __always_inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) +static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t) { t->rseq.event.ids_changed = true; } @@ -168,7 +168,7 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) static inline void rseq_handle_slowpath(struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_sched_switch_event(struct task_struct *t) { } -static inline void rseq_sched_set_task_cpu(struct task_struct *t, unsigned int cpu) { } +static inline void rseq_sched_set_ids_changed(struct task_struct *t) { } static inline void rseq_sched_set_task_mm_cid(struct task_struct *t, unsigned int cid) { } static inline void rseq_force_update(void) { } static inline void rseq_virt_userspace_exit(void) { } diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 87854effe1ad..66b1482e1146 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -119,23 +119,31 @@ struct mm_cid_pcpu { /** * struct mm_mm_cid - Storage for per MM CID data * @pcpu: Per CPU storage for CIDs associated to a CPU + * @percpu: Set, when CIDs are in per CPU mode + * @transit: Set to MM_CID_TRANSIT during a mode change transition phase * @max_cids: The exclusive maximum CID value for allocation and convergence + * @lock: Spinlock to protect all fields except @pcpu. It also protects + * the MM cid cpumask and the MM cidmask bitmap. + * @mutex: Mutex to serialize forks and exits related to this mm * @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map * is growth only. * @users: The number of tasks sharing this MM. Separate from mm::mm_users * as that is modified by mmget()/mm_put() by other entities which * do not actually share the MM. - * @lock: Spinlock to protect all fields except @pcpu. It also protects - * the MM cid cpumask and the MM cidmask bitmap. - * @mutex: Mutex to serialize forks and exits related to this mm */ struct mm_mm_cid { + /* Hotpath read mostly members */ struct mm_cid_pcpu __percpu *pcpu; + unsigned int percpu; + unsigned int transit; unsigned int max_cids; - unsigned int nr_cpus_allowed; - unsigned int users; + raw_spinlock_t lock; struct mutex mutex; + + /* Low frequency modified */ + unsigned int nr_cpus_allowed; + unsigned int users; }____cacheline_aligned_in_smp; #else /* CONFIG_SCHED_MM_CID */ struct mm_mm_cid { }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 55bb9c9ae32c..659ae56b459f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10495,6 +10495,8 @@ void mm_init_cid(struct mm_struct *mm, struct task_struct *p) per_cpu_ptr(pcpu, cpu)->cid = MM_CID_UNSET; mm->mm_cid.max_cids = 0; + mm->mm_cid.percpu = 0; + mm->mm_cid.transit = 0; mm->mm_cid.nr_cpus_allowed = p->nr_cpus_allowed; mm->mm_cid.users = 0; raw_spin_lock_init(&mm->mm_cid.lock); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4b49284504fb..82c7978d548e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2209,7 +2209,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) smp_wmb(); WRITE_ONCE(task_thread_info(p)->cpu, cpu); p->wake_cpu = cpu; - rseq_sched_set_task_cpu(p, cpu); + rseq_sched_set_ids_changed(p); #endif /* CONFIG_SMP */ } @@ -3598,6 +3598,153 @@ static __always_inline void mm_drop_cid_on_cpu(struct mm_struct *mm, struct mm_c mm_drop_cid(mm, pcp->cid); } +static inline unsigned int __mm_get_cid(struct mm_struct *mm, unsigned int max_cids) +{ + unsigned int cid = find_first_zero_bit(mm_cidmask(mm), max_cids); + + if (cid >= max_cids) + return MM_CID_UNSET; + if (test_and_set_bit(cid, mm_cidmask(mm))) + return MM_CID_UNSET; + return cid; +} + +static inline unsigned int mm_get_cid(struct mm_struct *mm) +{ + unsigned int cid = __mm_get_cid(mm, READ_ONCE(mm->mm_cid.max_cids)); + + while (cid == MM_CID_UNSET) { + cpu_relax(); + cid = __mm_get_cid(mm, num_possible_cpus()); + } + return cid; +} + +static inline unsigned int mm_cid_converge(struct mm_struct *mm, unsigned int orig_cid, + unsigned int max_cids) +{ + unsigned int new_cid, cid = cpu_cid_to_cid(orig_cid); + + /* Is it in the optimal CID space? */ + if (likely(cid < max_cids)) + return orig_cid; + + /* Try to find one in the optimal space. Otherwise keep the provided. */ + new_cid = __mm_get_cid(mm, max_cids); + if (new_cid != MM_CID_UNSET) { + mm_drop_cid(mm, cid); + /* Preserve the ONCPU mode of the original CID */ + return new_cid | (orig_cid & MM_CID_ONCPU); + } + return orig_cid; +} + +static __always_inline void mm_cid_update_task_cid(struct task_struct *t, unsigned int cid) +{ + if (t->mm_cid.cid != cid) { + t->mm_cid.cid = cid; + rseq_sched_set_ids_changed(t); + } +} + +static __always_inline void mm_cid_update_pcpu_cid(struct mm_struct *mm, unsigned int cid) +{ + __this_cpu_write(mm->mm_cid.pcpu->cid, cid); +} + +static __always_inline void mm_cid_from_cpu(struct task_struct *t, unsigned int cpu_cid) +{ + unsigned int max_cids, tcid = t->mm_cid.cid; + struct mm_struct *mm = t->mm; + + max_cids = READ_ONCE(mm->mm_cid.max_cids); + /* Optimize for the common case where both have the ONCPU bit set */ + if (likely(cid_on_cpu(cpu_cid & tcid))) { + if (likely(cpu_cid_to_cid(cpu_cid) < max_cids)) { + mm_cid_update_task_cid(t, cpu_cid); + return; + } + /* Try to converge into the optimal CID space */ + cpu_cid = mm_cid_converge(mm, cpu_cid, max_cids); + } else { + /* Hand over or drop the task owned CID */ + if (cid_on_task(tcid)) { + if (cid_on_cpu(cpu_cid)) + mm_unset_cid_on_task(t); + else + cpu_cid = cid_to_cpu_cid(tcid); + } + /* Still nothing, allocate a new one */ + if (!cid_on_cpu(cpu_cid)) + cpu_cid = cid_to_cpu_cid(mm_get_cid(mm)); + } + mm_cid_update_pcpu_cid(mm, cpu_cid); + mm_cid_update_task_cid(t, cpu_cid); +} + +static __always_inline void mm_cid_from_task(struct task_struct *t, unsigned int cpu_cid) +{ + unsigned int max_cids, tcid = t->mm_cid.cid; + struct mm_struct *mm = t->mm; + + max_cids = READ_ONCE(mm->mm_cid.max_cids); + /* Optimize for the common case, where both have the ONCPU bit clear */ + if (likely(cid_on_task(tcid | cpu_cid))) { + if (likely(tcid < max_cids)) { + mm_cid_update_pcpu_cid(mm, tcid); + return; + } + /* Try to converge into the optimal CID space */ + tcid = mm_cid_converge(mm, tcid, max_cids); + } else { + /* Hand over or drop the CPU owned CID */ + if (cid_on_cpu(cpu_cid)) { + if (cid_on_task(tcid)) + mm_drop_cid_on_cpu(mm, this_cpu_ptr(mm->mm_cid.pcpu)); + else + tcid = cpu_cid_to_cid(cpu_cid); + } + /* Still nothing, allocate a new one */ + if (!cid_on_task(tcid)) + tcid = mm_get_cid(mm); + /* Set the transition mode flag if required */ + tcid |= READ_ONCE(mm->mm_cid.transit); + } + mm_cid_update_pcpu_cid(mm, tcid); + mm_cid_update_task_cid(t, tcid); +} + +static __always_inline void mm_cid_schedin(struct task_struct *next) +{ + struct mm_struct *mm = next->mm; + unsigned int cpu_cid; + + if (!next->mm_cid.active) + return; + + cpu_cid = __this_cpu_read(mm->mm_cid.pcpu->cid); + if (likely(!READ_ONCE(mm->mm_cid.percpu))) + mm_cid_from_task(next, cpu_cid); + else + mm_cid_from_cpu(next, cpu_cid); +} + +static __always_inline void mm_cid_schedout(struct task_struct *prev) +{ + /* During mode transitions CIDs are temporary and need to be dropped */ + if (likely(!cid_in_transit(prev->mm_cid.cid))) + return; + + mm_drop_cid(prev->mm, cid_from_transit_cid(prev->mm_cid.cid)); + prev->mm_cid.cid = MM_CID_UNSET; +} + +static inline void mm_cid_switch_to(struct task_struct *prev, struct task_struct *next) +{ + mm_cid_schedout(prev); + mm_cid_schedin(next); +} + /* Active implementation */ static inline void init_sched_mm_cid(struct task_struct *t) { @@ -3675,6 +3822,7 @@ static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *n #else /* !CONFIG_SCHED_MM_CID: */ static inline void mm_cid_select(struct task_struct *t) { } static inline void switch_mm_cid(struct task_struct *prev, struct task_struct *next) { } +static inline void mm_cid_switch_to(struct task_struct *prev, struct task_struct *next) { } #endif /* !CONFIG_SCHED_MM_CID */ extern u64 avg_vruntime(struct cfs_rq *cfs_rq); |
