diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r-- | fs/proc/task_mmu.c | 190 |
1 files changed, 131 insertions, 59 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b26ae556b446..fc35a0543f01 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -132,18 +132,24 @@ static void release_task_mempolicy(struct proc_maps_private *priv) #ifdef CONFIG_PER_VMA_LOCK -static void unlock_vma(struct proc_maps_private *priv) +static void reset_lock_ctx(struct proc_maps_locking_ctx *lock_ctx) { - if (priv->locked_vma) { - vma_end_read(priv->locked_vma); - priv->locked_vma = NULL; + lock_ctx->locked_vma = NULL; + lock_ctx->mmap_locked = false; +} + +static void unlock_ctx_vma(struct proc_maps_locking_ctx *lock_ctx) +{ + if (lock_ctx->locked_vma) { + vma_end_read(lock_ctx->locked_vma); + lock_ctx->locked_vma = NULL; } } static const struct seq_operations proc_pid_maps_op; static inline bool lock_vma_range(struct seq_file *m, - struct proc_maps_private *priv) + struct proc_maps_locking_ctx *lock_ctx) { /* * smaps and numa_maps perform page table walk, therefore require @@ -151,25 +157,24 @@ static inline bool lock_vma_range(struct seq_file *m, * walking the vma tree under rcu read protection. */ if (m->op != &proc_pid_maps_op) { - if (mmap_read_lock_killable(priv->mm)) + if (mmap_read_lock_killable(lock_ctx->mm)) return false; - priv->mmap_locked = true; + lock_ctx->mmap_locked = true; } else { rcu_read_lock(); - priv->locked_vma = NULL; - priv->mmap_locked = false; + reset_lock_ctx(lock_ctx); } return true; } -static inline void unlock_vma_range(struct proc_maps_private *priv) +static inline void unlock_vma_range(struct proc_maps_locking_ctx *lock_ctx) { - if (priv->mmap_locked) { - mmap_read_unlock(priv->mm); + if (lock_ctx->mmap_locked) { + mmap_read_unlock(lock_ctx->mm); } else { - unlock_vma(priv); + unlock_ctx_vma(lock_ctx); rcu_read_unlock(); } } @@ -177,15 +182,16 @@ static inline void unlock_vma_range(struct proc_maps_private *priv) static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, loff_t last_pos) { + struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx; struct vm_area_struct *vma; - if (priv->mmap_locked) + if (lock_ctx->mmap_locked) return vma_next(&priv->iter); - unlock_vma(priv); - vma = lock_next_vma(priv->mm, &priv->iter, last_pos); + unlock_ctx_vma(lock_ctx); + vma = lock_next_vma(lock_ctx->mm, &priv->iter, last_pos); if (!IS_ERR_OR_NULL(vma)) - priv->locked_vma = vma; + lock_ctx->locked_vma = vma; return vma; } @@ -193,14 +199,16 @@ static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, loff_t pos) { - if (priv->mmap_locked) + struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx; + + if (lock_ctx->mmap_locked) return false; rcu_read_unlock(); - mmap_read_lock(priv->mm); + mmap_read_lock(lock_ctx->mm); /* Reinitialize the iterator after taking mmap_lock */ vma_iter_set(&priv->iter, pos); - priv->mmap_locked = true; + lock_ctx->mmap_locked = true; return true; } @@ -208,14 +216,14 @@ static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, #else /* CONFIG_PER_VMA_LOCK */ static inline bool lock_vma_range(struct seq_file *m, - struct proc_maps_private *priv) + struct proc_maps_locking_ctx *lock_ctx) { - return mmap_read_lock_killable(priv->mm) == 0; + return mmap_read_lock_killable(lock_ctx->mm) == 0; } -static inline void unlock_vma_range(struct proc_maps_private *priv) +static inline void unlock_vma_range(struct proc_maps_locking_ctx *lock_ctx) { - mmap_read_unlock(priv->mm); + mmap_read_unlock(lock_ctx->mm); } static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, @@ -258,7 +266,7 @@ retry: *ppos = vma->vm_end; } else { *ppos = SENTINEL_VMA_GATE; - vma = get_gate_vma(priv->mm); + vma = get_gate_vma(priv->lock_ctx.mm); } return vma; @@ -267,6 +275,7 @@ retry: static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; + struct proc_maps_locking_ctx *lock_ctx; loff_t last_addr = *ppos; struct mm_struct *mm; @@ -278,14 +287,15 @@ static void *m_start(struct seq_file *m, loff_t *ppos) if (!priv->task) return ERR_PTR(-ESRCH); - mm = priv->mm; + lock_ctx = &priv->lock_ctx; + mm = lock_ctx->mm; if (!mm || !mmget_not_zero(mm)) { put_task_struct(priv->task); priv->task = NULL; return NULL; } - if (!lock_vma_range(m, priv)) { + if (!lock_vma_range(m, lock_ctx)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; @@ -318,13 +328,13 @@ static void *m_next(struct seq_file *m, void *v, loff_t *ppos) static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; - struct mm_struct *mm = priv->mm; + struct mm_struct *mm = priv->lock_ctx.mm; if (!priv->task) return; release_task_mempolicy(priv); - unlock_vma_range(priv); + unlock_vma_range(&priv->lock_ctx); mmput(mm); put_task_struct(priv->task); priv->task = NULL; @@ -339,9 +349,9 @@ static int proc_maps_open(struct inode *inode, struct file *file, return -ENOMEM; priv->inode = inode; - priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - int err = PTR_ERR(priv->mm); + priv->lock_ctx.mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR(priv->lock_ctx.mm)) { + int err = PTR_ERR(priv->lock_ctx.mm); seq_release_private(inode, file); return err; @@ -355,8 +365,8 @@ static int proc_map_release(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; - if (priv->mm) - mmdrop(priv->mm); + if (priv->lock_ctx.mm) + mmdrop(priv->lock_ctx.mm); return seq_release_private(inode, file); } @@ -517,28 +527,90 @@ static int pid_maps_open(struct inode *inode, struct file *file) PROCMAP_QUERY_VMA_FLAGS \ ) -static int query_vma_setup(struct mm_struct *mm) +#ifdef CONFIG_PER_VMA_LOCK + +static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) { - return mmap_read_lock_killable(mm); + reset_lock_ctx(lock_ctx); + + return 0; } -static void query_vma_teardown(struct mm_struct *mm, struct vm_area_struct *vma) +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) { - mmap_read_unlock(mm); + if (lock_ctx->mmap_locked) { + mmap_read_unlock(lock_ctx->mm); + lock_ctx->mmap_locked = false; + } else { + unlock_ctx_vma(lock_ctx); + } +} + +static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, + unsigned long addr) +{ + struct mm_struct *mm = lock_ctx->mm; + struct vm_area_struct *vma; + struct vma_iterator vmi; + + if (lock_ctx->mmap_locked) + return find_vma(mm, addr); + + /* Unlock previously locked VMA and find the next one under RCU */ + unlock_ctx_vma(lock_ctx); + rcu_read_lock(); + vma_iter_init(&vmi, mm, addr); + vma = lock_next_vma(mm, &vmi, addr); + rcu_read_unlock(); + + if (!vma) + return NULL; + + if (!IS_ERR(vma)) { + lock_ctx->locked_vma = vma; + return vma; + } + + if (PTR_ERR(vma) == -EAGAIN) { + /* Fallback to mmap_lock on vma->vm_refcnt overflow */ + mmap_read_lock(mm); + vma = find_vma(mm, addr); + lock_ctx->mmap_locked = true; + } + + return vma; +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static int query_vma_setup(struct proc_maps_locking_ctx *lock_ctx) +{ + return mmap_read_lock_killable(lock_ctx->mm); +} + +static void query_vma_teardown(struct proc_maps_locking_ctx *lock_ctx) +{ + mmap_read_unlock(lock_ctx->mm); } -static struct vm_area_struct *query_vma_find_by_addr(struct mm_struct *mm, unsigned long addr) +static struct vm_area_struct *query_vma_find_by_addr(struct proc_maps_locking_ctx *lock_ctx, + unsigned long addr) { - return find_vma(mm, addr); + return find_vma(lock_ctx->mm, addr); } -static struct vm_area_struct *query_matching_vma(struct mm_struct *mm, +#endif /* CONFIG_PER_VMA_LOCK */ + +static struct vm_area_struct *query_matching_vma(struct proc_maps_locking_ctx *lock_ctx, unsigned long addr, u32 flags) { struct vm_area_struct *vma; next_vma: - vma = query_vma_find_by_addr(mm, addr); + vma = query_vma_find_by_addr(lock_ctx, addr); + if (IS_ERR(vma)) + return vma; + if (!vma) goto no_vma; @@ -579,11 +651,11 @@ no_vma: return ERR_PTR(-ENOENT); } -static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) +static int do_procmap_query(struct mm_struct *mm, void __user *uarg) { + struct proc_maps_locking_ctx lock_ctx = { .mm = mm }; struct procmap_query karg; struct vm_area_struct *vma; - struct mm_struct *mm; const char *name = NULL; char build_id_buf[BUILD_ID_SIZE_MAX], *name_buf = NULL; __u64 usize; @@ -610,17 +682,16 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) if (!!karg.build_id_size != !!karg.build_id_addr) return -EINVAL; - mm = priv->mm; if (!mm || !mmget_not_zero(mm)) return -ESRCH; - err = query_vma_setup(mm); + err = query_vma_setup(&lock_ctx); if (err) { mmput(mm); return err; } - vma = query_matching_vma(mm, karg.query_addr, karg.query_flags); + vma = query_matching_vma(&lock_ctx, karg.query_addr, karg.query_flags); if (IS_ERR(vma)) { err = PTR_ERR(vma); vma = NULL; @@ -705,7 +776,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) } /* unlock vma or mmap_lock, and put mm_struct before copying data to user */ - query_vma_teardown(mm, vma); + query_vma_teardown(&lock_ctx); mmput(mm); if (karg.vma_name_size && copy_to_user(u64_to_user_ptr(karg.vma_name_addr), @@ -725,7 +796,7 @@ static int do_procmap_query(struct proc_maps_private *priv, void __user *uarg) return 0; out: - query_vma_teardown(mm, vma); + query_vma_teardown(&lock_ctx); mmput(mm); kfree(name_buf); return err; @@ -738,7 +809,8 @@ static long procfs_procmap_ioctl(struct file *file, unsigned int cmd, unsigned l switch (cmd) { case PROCMAP_QUERY: - return do_procmap_query(priv, (void __user *)arg); + /* priv->lock_ctx.mm is set during file open operation */ + return do_procmap_query(priv->lock_ctx.mm, (void __user *)arg); default: return -ENOIOCTLCMD; } @@ -1297,8 +1369,8 @@ static int show_smap(struct seq_file *m, void *v) __show_smap(m, &mss, false); seq_printf(m, "THPeligible: %8u\n", - !!thp_vma_allowable_orders(vma, vma->vm_flags, - TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL)); + !!thp_vma_allowable_orders(vma, vma->vm_flags, TVA_SMAPS, + THP_ORDERS_ALL)); if (arch_pkeys_enabled()) seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); @@ -1311,7 +1383,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct mem_size_stats mss = {}; - struct mm_struct *mm = priv->mm; + struct mm_struct *mm = priv->lock_ctx.mm; struct vm_area_struct *vma; unsigned long vma_start = 0, last_vma_end = 0; int ret = 0; @@ -1456,9 +1528,9 @@ static int smaps_rollup_open(struct inode *inode, struct file *file) goto out_free; priv->inode = inode; - priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR_OR_NULL(priv->mm)) { - ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; + priv->lock_ctx.mm = proc_mem_open(inode, PTRACE_MODE_READ); + if (IS_ERR_OR_NULL(priv->lock_ctx.mm)) { + ret = priv->lock_ctx.mm ? PTR_ERR(priv->lock_ctx.mm) : -ESRCH; single_release(inode, file); goto out_free; @@ -1476,8 +1548,8 @@ static int smaps_rollup_release(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct proc_maps_private *priv = seq->private; - if (priv->mm) - mmdrop(priv->mm); + if (priv->lock_ctx.mm) + mmdrop(priv->lock_ctx.mm); kfree(priv); return single_release(inode, file); @@ -1520,7 +1592,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, return false; if (!is_cow_mapping(vma->vm_flags)) return false; - if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))) + if (likely(!mm_flags_test(MMF_HAS_PINNED, vma->vm_mm))) return false; folio = vm_normal_folio(vma, addr, pte); if (!folio) |