summaryrefslogtreecommitdiff
path: root/fs/proc/task_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r--fs/proc/task_mmu.c196
1 files changed, 168 insertions, 28 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4be91eb6ea5c..29cca0e6d0ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -29,6 +29,9 @@
#include <asm/tlbflush.h>
#include "internal.h"
+#define SENTINEL_VMA_END -1
+#define SENTINEL_VMA_GATE -2
+
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
void task_mem(struct seq_file *m, struct mm_struct *mm)
@@ -36,9 +39,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long text, lib, swap, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
- anon = get_mm_counter(mm, MM_ANONPAGES);
- file = get_mm_counter(mm, MM_FILEPAGES);
- shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+ anon = get_mm_counter_sum(mm, MM_ANONPAGES);
+ file = get_mm_counter_sum(mm, MM_FILEPAGES);
+ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES);
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -59,7 +62,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = min(text, mm->exec_vm << PAGE_SHIFT);
lib = (mm->exec_vm << PAGE_SHIFT) - text;
- swap = get_mm_counter(mm, MM_SWAPENTS);
+ swap = get_mm_counter_sum(mm, MM_SWAPENTS);
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
@@ -92,12 +95,12 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
+ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) +
+ get_mm_counter_sum(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->data_vm + mm->stack_vm;
- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
+ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -127,15 +130,134 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
-static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
- loff_t *ppos)
+#ifdef CONFIG_PER_VMA_LOCK
+
+static void unlock_vma(struct proc_maps_private *priv)
+{
+ if (priv->locked_vma) {
+ vma_end_read(priv->locked_vma);
+ priv->locked_vma = NULL;
+ }
+}
+
+static const struct seq_operations proc_pid_maps_op;
+
+static inline bool lock_vma_range(struct seq_file *m,
+ struct proc_maps_private *priv)
+{
+ /*
+ * smaps and numa_maps perform page table walk, therefore require
+ * mmap_lock but maps can be read with locking just the vma and
+ * walking the vma tree under rcu read protection.
+ */
+ if (m->op != &proc_pid_maps_op) {
+ if (mmap_read_lock_killable(priv->mm))
+ return false;
+
+ priv->mmap_locked = true;
+ } else {
+ rcu_read_lock();
+ priv->locked_vma = NULL;
+ priv->mmap_locked = false;
+ }
+
+ return true;
+}
+
+static inline void unlock_vma_range(struct proc_maps_private *priv)
+{
+ if (priv->mmap_locked) {
+ mmap_read_unlock(priv->mm);
+ } else {
+ unlock_vma(priv);
+ rcu_read_unlock();
+ }
+}
+
+static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
+ loff_t last_pos)
{
- struct vm_area_struct *vma = vma_next(&priv->iter);
+ struct vm_area_struct *vma;
+
+ if (priv->mmap_locked)
+ return vma_next(&priv->iter);
+
+ unlock_vma(priv);
+ vma = lock_next_vma(priv->mm, &priv->iter, last_pos);
+ if (!IS_ERR_OR_NULL(vma))
+ priv->locked_vma = vma;
+
+ return vma;
+}
+static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
+ loff_t pos)
+{
+ if (priv->mmap_locked)
+ return false;
+
+ rcu_read_unlock();
+ mmap_read_lock(priv->mm);
+ /* Reinitialize the iterator after taking mmap_lock */
+ vma_iter_set(&priv->iter, pos);
+ priv->mmap_locked = true;
+
+ return true;
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
+static inline bool lock_vma_range(struct seq_file *m,
+ struct proc_maps_private *priv)
+{
+ return mmap_read_lock_killable(priv->mm) == 0;
+}
+
+static inline void unlock_vma_range(struct proc_maps_private *priv)
+{
+ mmap_read_unlock(priv->mm);
+}
+
+static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
+ loff_t last_pos)
+{
+ return vma_next(&priv->iter);
+}
+
+static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
+ loff_t pos)
+{
+ return false;
+}
+
+#endif /* CONFIG_PER_VMA_LOCK */
+
+static struct vm_area_struct *proc_get_vma(struct seq_file *m, loff_t *ppos)
+{
+ struct proc_maps_private *priv = m->private;
+ struct vm_area_struct *vma;
+
+retry:
+ vma = get_next_vma(priv, *ppos);
+ /* EINTR of EAGAIN is possible */
+ if (IS_ERR(vma)) {
+ if (PTR_ERR(vma) == -EAGAIN && fallback_to_mmap_lock(priv, *ppos))
+ goto retry;
+
+ return vma;
+ }
+
+ /* Store previous position to be able to restart if needed */
+ priv->last_pos = *ppos;
if (vma) {
- *ppos = vma->vm_start;
+ /*
+ * Track the end of the reported vma to ensure position changes
+ * even if previous vma was merged with the next vma and we
+ * found the extended vma with the same vm_start.
+ */
+ *ppos = vma->vm_end;
} else {
- *ppos = -2UL;
+ *ppos = SENTINEL_VMA_GATE;
vma = get_gate_vma(priv->mm);
}
@@ -145,11 +267,11 @@ static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
- unsigned long last_addr = *ppos;
+ loff_t last_addr = *ppos;
struct mm_struct *mm;
/* See m_next(). Zero at the start or after lseek. */
- if (last_addr == -1UL)
+ if (last_addr == SENTINEL_VMA_END)
return NULL;
priv->task = get_proc_task(priv->inode);
@@ -163,28 +285,34 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return NULL;
}
- if (mmap_read_lock_killable(mm)) {
+ if (!lock_vma_range(m, priv)) {
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
return ERR_PTR(-EINTR);
}
- vma_iter_init(&priv->iter, mm, last_addr);
+ /*
+ * Reset current position if last_addr was set before
+ * and it's not a sentinel.
+ */
+ if (last_addr > 0)
+ *ppos = last_addr = priv->last_pos;
+ vma_iter_init(&priv->iter, mm, (unsigned long)last_addr);
hold_task_mempolicy(priv);
- if (last_addr == -2UL)
+ if (last_addr == SENTINEL_VMA_GATE)
return get_gate_vma(mm);
- return proc_get_vma(priv, ppos);
+ return proc_get_vma(m, ppos);
}
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
{
- if (*ppos == -2UL) {
- *ppos = -1UL;
+ if (*ppos == SENTINEL_VMA_GATE) {
+ *ppos = SENTINEL_VMA_END;
return NULL;
}
- return proc_get_vma(m->private, ppos);
+ return proc_get_vma(m, ppos);
}
static void m_stop(struct seq_file *m, void *v)
@@ -196,7 +324,7 @@ static void m_stop(struct seq_file *m, void *v)
return;
release_task_mempolicy(priv);
- mmap_read_unlock(mm);
+ unlock_vma_range(priv);
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
@@ -212,8 +340,8 @@ static int proc_maps_open(struct inode *inode, struct file *file,
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR_OR_NULL(priv->mm)) {
- int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
+ if (IS_ERR(priv->mm)) {
+ int err = PTR_ERR(priv->mm);
seq_release_private(inode, file);
return err;
@@ -1020,10 +1148,13 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
- pte_t ptent = huge_ptep_get(walk->mm, addr, pte);
struct folio *folio = NULL;
bool present = false;
+ spinlock_t *ptl;
+ pte_t ptent;
+ ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
+ ptent = huge_ptep_get(walk->mm, addr, pte);
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
present = true;
@@ -1042,6 +1173,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
}
+ spin_unlock(ptl);
return 0;
}
#else
@@ -1889,12 +2021,14 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
struct pagemapread *pm = walk->private;
struct vm_area_struct *vma = walk->vma;
u64 flags = 0, frame = 0;
+ spinlock_t *ptl;
int err = 0;
pte_t pte;
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
+ ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep);
pte = huge_ptep_get(walk->mm, addr, ptep);
if (pte_present(pte)) {
struct folio *folio = page_folio(pte_page(pte));
@@ -1922,11 +2056,12 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
err = add_to_pagemap(&pme, pm);
if (err)
- return err;
+ break;
if (pm->show_pfn && (flags & PM_PRESENT))
frame++;
}
+ spin_unlock(ptl);
cond_resched();
return err;
@@ -3000,17 +3135,22 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
unsigned long addr, unsigned long end, struct mm_walk *walk)
{
- pte_t huge_pte = huge_ptep_get(walk->mm, addr, pte);
+ pte_t huge_pte;
struct numa_maps *md;
struct page *page;
+ spinlock_t *ptl;
+ ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+ huge_pte = huge_ptep_get(walk->mm, addr, pte);
if (!pte_present(huge_pte))
- return 0;
+ goto out;
page = pte_page(huge_pte);
md = walk->private;
gather_stats(page, md, pte_dirty(huge_pte), 1);
+out:
+ spin_unlock(ptl);
return 0;
}