diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/generic.c | 2 | ||||
-rw-r--r-- | fs/proc/inode.c | 2 | ||||
-rw-r--r-- | fs/proc/internal.h | 10 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 3 | ||||
-rw-r--r-- | fs/proc/page.c | 54 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 158 |
6 files changed, 189 insertions, 40 deletions
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5635453cd476..76e800e38c8f 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -571,6 +571,8 @@ static void pde_set_flags(struct proc_dir_entry *pde) if (pde->proc_ops->proc_compat_ioctl) pde->flags |= PROC_ENTRY_proc_compat_ioctl; #endif + if (pde->proc_ops->proc_lseek) + pde->flags |= PROC_ENTRY_proc_lseek; } struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3604b616311c..129490151be1 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -473,7 +473,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) typeof_member(struct proc_ops, proc_open) open; struct pde_opener *pdeo; - if (!pde->proc_ops->proc_lseek) + if (!pde_has_proc_lseek(pde)) file->f_mode &= ~FMODE_LSEEK; if (pde_is_permanent(pde)) { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 520c4742101d..e737401d7383 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -98,6 +98,11 @@ static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde) #endif } +static inline bool pde_has_proc_lseek(const struct proc_dir_entry *pde) +{ + return pde->flags & PROC_ENTRY_proc_lseek; +} + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); @@ -378,6 +383,11 @@ struct proc_maps_private { struct task_struct *task; struct mm_struct *mm; struct vma_iterator iter; + loff_t last_pos; +#ifdef CONFIG_PER_VMA_LOCK + bool mmap_locked; + struct vm_area_struct *locked_vma; +#endif #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index bc2bc60c36cc..a458f1e112fd 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -121,8 +121,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "NFS_Unstable: ", 0); show_val_kb(m, "Bounce: ", 0); - show_val_kb(m, "WritebackTmp: ", - global_node_page_state(NR_WRITEBACK_TEMP)); + show_val_kb(m, "WritebackTmp: ", 0); show_val_kb(m, "CommitLimit: ", vm_commit_limit()); show_val_kb(m, "Committed_AS: ", committed); seq_printf(m, "VmallocTotal: %8lu kB\n", diff --git a/fs/proc/page.c b/fs/proc/page.c index 999af26c7298..ba3568e97fd1 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -43,6 +43,22 @@ static inline unsigned long get_max_dump_pfn(void) #endif } +static u64 get_kpage_count(const struct page *page) +{ + struct page_snapshot ps; + u64 ret; + + snapshot_page(&ps, page); + + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + ret = folio_precise_page_mapcount(&ps.folio_snapshot, + &ps.page_snapshot); + else + ret = folio_average_page_mapcount(&ps.folio_snapshot); + + return ret; +} + static ssize_t kpage_read(struct file *file, char __user *buf, size_t count, loff_t *ppos, enum kpage_operation op) @@ -75,10 +91,7 @@ static ssize_t kpage_read(struct file *file, char __user *buf, info = stable_page_flags(page); break; case KPAGE_COUNT: - if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) - info = folio_precise_page_mapcount(page_folio(page), page); - else - info = folio_average_page_mapcount(page_folio(page)); + info = get_kpage_count(page); break; case KPAGE_CGROUP: info = page_cgroup_ino(page); @@ -134,6 +147,7 @@ static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) u64 stable_page_flags(const struct page *page) { const struct folio *folio; + struct page_snapshot ps; unsigned long k; unsigned long mapping; bool is_anon; @@ -145,20 +159,22 @@ u64 stable_page_flags(const struct page *page) */ if (!page) return 1 << KPF_NOPAGE; - folio = page_folio(page); + + snapshot_page(&ps, page); + folio = &ps.folio_snapshot; k = folio->flags; mapping = (unsigned long)folio->mapping; - is_anon = mapping & PAGE_MAPPING_ANON; + is_anon = mapping & FOLIO_MAPPING_ANON; /* * pseudo flags for the well known (anonymous) memory mapped pages */ - if (page_mapped(page)) + if (folio_mapped(folio)) u |= 1 << KPF_MMAP; if (is_anon) { u |= 1 << KPF_ANON; - if (mapping & PAGE_MAPPING_KSM) + if (mapping & FOLIO_MAPPING_KSM) u |= 1 << KPF_KSM; } @@ -166,7 +182,7 @@ u64 stable_page_flags(const struct page *page) * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ - if (page == &folio->page) + if (ps.idx == 0) u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head); else u |= 1 << KPF_COMPOUND_TAIL; @@ -176,25 +192,19 @@ u64 stable_page_flags(const struct page *page) folio_test_large_rmappable(folio)) { /* Note: we indicate any THPs here, not just PMD-sized ones */ u |= 1 << KPF_THP; - } else if (is_huge_zero_folio(folio)) { + } else if (is_huge_zero_pfn(ps.pfn)) { u |= 1 << KPF_ZERO_PAGE; u |= 1 << KPF_THP; - } else if (is_zero_folio(folio)) { + } else if (is_zero_pfn(ps.pfn)) { u |= 1 << KPF_ZERO_PAGE; } - /* - * Caveats on high order pages: PG_buddy and PG_slab will only be set - * on the head page. - */ - if (PageBuddy(page)) - u |= 1 << KPF_BUDDY; - else if (page_count(page) == 0 && is_free_buddy_page(page)) + if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY) u |= 1 << KPF_BUDDY; - if (PageOffline(page)) + if (folio_test_offline(folio)) u |= 1 << KPF_OFFLINE; - if (PageTable(page)) + if (folio_test_pgtable(folio)) u |= 1 << KPF_PGTABLE; if (folio_test_slab(folio)) u |= 1 << KPF_SLAB; @@ -202,7 +212,7 @@ u64 stable_page_flags(const struct page *page) #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); #else - if (folio_test_idle(folio)) + if (ps.flags & PAGE_SNAPSHOT_PG_IDLE) u |= 1 << KPF_IDLE; #endif @@ -228,7 +238,7 @@ u64 stable_page_flags(const struct page *page) if (u & (1 << KPF_HUGE)) u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); else - u |= kpf_copy_bit(page->flags, KPF_HWPOISON, PG_hwpoison); + u |= kpf_copy_bit(ps.page_snapshot.flags, KPF_HWPOISON, PG_hwpoison); #endif u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 751479eb128f..3d6d8a9f13fc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -29,6 +29,9 @@ #include <asm/tlbflush.h> #include "internal.h" +#define SENTINEL_VMA_END -1 +#define SENTINEL_VMA_GATE -2 + #define SEQ_PUT_DEC(str, val) \ seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) void task_mem(struct seq_file *m, struct mm_struct *mm) @@ -127,15 +130,134 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif -static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, - loff_t *ppos) +#ifdef CONFIG_PER_VMA_LOCK + +static void unlock_vma(struct proc_maps_private *priv) +{ + if (priv->locked_vma) { + vma_end_read(priv->locked_vma); + priv->locked_vma = NULL; + } +} + +static const struct seq_operations proc_pid_maps_op; + +static inline bool lock_vma_range(struct seq_file *m, + struct proc_maps_private *priv) +{ + /* + * smaps and numa_maps perform page table walk, therefore require + * mmap_lock but maps can be read with locking just the vma and + * walking the vma tree under rcu read protection. + */ + if (m->op != &proc_pid_maps_op) { + if (mmap_read_lock_killable(priv->mm)) + return false; + + priv->mmap_locked = true; + } else { + rcu_read_lock(); + priv->locked_vma = NULL; + priv->mmap_locked = false; + } + + return true; +} + +static inline void unlock_vma_range(struct proc_maps_private *priv) +{ + if (priv->mmap_locked) { + mmap_read_unlock(priv->mm); + } else { + unlock_vma(priv); + rcu_read_unlock(); + } +} + +static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, + loff_t last_pos) +{ + struct vm_area_struct *vma; + + if (priv->mmap_locked) + return vma_next(&priv->iter); + + unlock_vma(priv); + vma = lock_next_vma(priv->mm, &priv->iter, last_pos); + if (!IS_ERR_OR_NULL(vma)) + priv->locked_vma = vma; + + return vma; +} + +static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, + loff_t pos) +{ + if (priv->mmap_locked) + return false; + + rcu_read_unlock(); + mmap_read_lock(priv->mm); + /* Reinitialize the iterator after taking mmap_lock */ + vma_iter_set(&priv->iter, pos); + priv->mmap_locked = true; + + return true; +} + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline bool lock_vma_range(struct seq_file *m, + struct proc_maps_private *priv) +{ + return mmap_read_lock_killable(priv->mm) == 0; +} + +static inline void unlock_vma_range(struct proc_maps_private *priv) +{ + mmap_read_unlock(priv->mm); +} + +static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv, + loff_t last_pos) +{ + return vma_next(&priv->iter); +} + +static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv, + loff_t pos) { - struct vm_area_struct *vma = vma_next(&priv->iter); + return false; +} + +#endif /* CONFIG_PER_VMA_LOCK */ + +static struct vm_area_struct *proc_get_vma(struct seq_file *m, loff_t *ppos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma; + +retry: + vma = get_next_vma(priv, *ppos); + /* EINTR of EAGAIN is possible */ + if (IS_ERR(vma)) { + if (PTR_ERR(vma) == -EAGAIN && fallback_to_mmap_lock(priv, *ppos)) + goto retry; + return vma; + } + + /* Store previous position to be able to restart if needed */ + priv->last_pos = *ppos; if (vma) { - *ppos = vma->vm_start; + /* + * Track the end of the reported vma to ensure position changes + * even if previous vma was merged with the next vma and we + * found the extended vma with the same vm_start. + */ + *ppos = vma->vm_end; } else { - *ppos = -2UL; + *ppos = SENTINEL_VMA_GATE; vma = get_gate_vma(priv->mm); } @@ -145,11 +267,11 @@ static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv, static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; - unsigned long last_addr = *ppos; + loff_t last_addr = *ppos; struct mm_struct *mm; /* See m_next(). Zero at the start or after lseek. */ - if (last_addr == -1UL) + if (last_addr == SENTINEL_VMA_END) return NULL; priv->task = get_proc_task(priv->inode); @@ -163,28 +285,34 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return NULL; } - if (mmap_read_lock_killable(mm)) { + if (!lock_vma_range(m, priv)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; return ERR_PTR(-EINTR); } - vma_iter_init(&priv->iter, mm, last_addr); + /* + * Reset current position if last_addr was set before + * and it's not a sentinel. + */ + if (last_addr > 0) + *ppos = last_addr = priv->last_pos; + vma_iter_init(&priv->iter, mm, (unsigned long)last_addr); hold_task_mempolicy(priv); - if (last_addr == -2UL) + if (last_addr == SENTINEL_VMA_GATE) return get_gate_vma(mm); - return proc_get_vma(priv, ppos); + return proc_get_vma(m, ppos); } static void *m_next(struct seq_file *m, void *v, loff_t *ppos) { - if (*ppos == -2UL) { - *ppos = -1UL; + if (*ppos == SENTINEL_VMA_GATE) { + *ppos = SENTINEL_VMA_END; return NULL; } - return proc_get_vma(m->private, ppos); + return proc_get_vma(m, ppos); } static void m_stop(struct seq_file *m, void *v) @@ -196,7 +324,7 @@ static void m_stop(struct seq_file *m, void *v) return; release_task_mempolicy(priv); - mmap_read_unlock(mm); + unlock_vma_range(priv); mmput(mm); put_task_struct(priv->task); priv->task = NULL; |