summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c15
-rw-r--r--fs/proc/fd.c11
-rw-r--r--fs/proc/generic.c12
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/internal.h16
-rw-r--r--fs/proc/meminfo.c3
-rw-r--r--fs/proc/namespaces.c3
-rw-r--r--fs/proc/page.c54
-rw-r--r--fs/proc/proc_sysctl.c25
-rw-r--r--fs/proc/root.c10
-rw-r--r--fs/proc/task_mmu.c174
11 files changed, 233 insertions, 94 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c667702dc69b..62d35631ba8c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2704,8 +2704,7 @@ static struct dentry *proc_pident_instantiate(struct dentry *dentry,
inode->i_fop = p->fop;
ei->op = p->op;
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
static struct dentry *proc_pident_lookup(struct inode *dir,
@@ -3291,7 +3290,7 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
}
#endif /* CONFIG_KSM */
-#ifdef CONFIG_STACKLEAK_METRICS
+#ifdef CONFIG_KSTACK_ERASE_METRICS
static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
@@ -3304,7 +3303,7 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
prev_depth, depth);
return 0;
}
-#endif /* CONFIG_STACKLEAK_METRICS */
+#endif /* CONFIG_KSTACK_ERASE_METRICS */
/*
* Thread groups
@@ -3411,7 +3410,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
-#ifdef CONFIG_STACKLEAK_METRICS
+#ifdef CONFIG_KSTACK_ERASE_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
#endif
#ifdef CONFIG_PROC_PID_ARCH_STATUS
@@ -3501,8 +3500,7 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry,
set_nlink(inode, nlink_tgid);
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
@@ -3804,8 +3802,7 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry,
set_nlink(inode, nlink_tid);
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 37aa778d1af7..9eeccff49b2a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -352,18 +352,9 @@ static int proc_fd_getattr(struct mnt_idmap *idmap,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- int rv = 0;
generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
-
- /* If it's a directory, put the number of open fds there */
- if (S_ISDIR(inode->i_mode)) {
- rv = proc_readfd_count(inode, &stat->size);
- if (rv < 0)
- return rv;
- }
-
- return rv;
+ return proc_readfd_count(inode, &stat->size);
}
const struct inode_operations proc_fd_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a3e22803cddf..76e800e38c8f 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -254,8 +254,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, de->proc_dops);
- return d_splice_alias(inode, dentry);
+ if (de->flags & PROC_ENTRY_FORCE_LOOKUP)
+ return d_splice_alias_ops(inode, dentry,
+ &proc_net_dentry_ops);
+ return d_splice_alias_ops(inode, dentry,
+ &proc_misc_dentry_ops);
}
read_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT);
@@ -448,9 +451,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
INIT_LIST_HEAD(&ent->pde_openers);
proc_set_user(ent, (*parent)->uid, (*parent)->gid);
- ent->proc_dops = &proc_misc_dentry_ops;
/* Revalidate everything under /proc/${pid}/net */
- if ((*parent)->proc_dops == &proc_net_dentry_ops)
+ if ((*parent)->flags & PROC_ENTRY_FORCE_LOOKUP)
pde_force_lookup(ent);
out:
@@ -569,6 +571,8 @@ static void pde_set_flags(struct proc_dir_entry *pde)
if (pde->proc_ops->proc_compat_ioctl)
pde->flags |= PROC_ENTRY_proc_compat_ioctl;
#endif
+ if (pde->proc_ops->proc_lseek)
+ pde->flags |= PROC_ENTRY_proc_lseek;
}
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a3eb3b740f76..129490151be1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode)
head = ei->sysctl;
if (head) {
- RCU_INIT_POINTER(ei->sysctl, NULL);
+ WRITE_ONCE(ei->sysctl, NULL);
proc_sys_evict_inode(inode, head);
}
}
@@ -473,7 +473,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
typeof_member(struct proc_ops, proc_open) open;
struct pde_opener *pdeo;
- if (!pde->proc_ops->proc_lseek)
+ if (!pde_has_proc_lseek(pde))
file->f_mode &= ~FMODE_LSEEK;
if (pde_is_permanent(pde)) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 96122e91c645..e737401d7383 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,6 @@ struct proc_dir_entry {
const struct proc_ops *proc_ops;
const struct file_operations *proc_dir_ops;
};
- const struct dentry_operations *proc_dops;
union {
const struct seq_operations *seq_ops;
int (*single_show)(struct seq_file *, void *);
@@ -99,6 +98,11 @@ static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde)
#endif
}
+static inline bool pde_has_proc_lseek(const struct proc_dir_entry *pde)
+{
+ return pde->flags & PROC_ENTRY_proc_lseek;
+}
+
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
@@ -379,6 +383,11 @@ struct proc_maps_private {
struct task_struct *task;
struct mm_struct *mm;
struct vma_iterator iter;
+ loff_t last_pos;
+#ifdef CONFIG_PER_VMA_LOCK
+ bool mmap_locked;
+ struct vm_area_struct *locked_vma;
+#endif
#ifdef CONFIG_NUMA
struct mempolicy *task_mempolicy;
#endif
@@ -403,7 +412,7 @@ extern const struct dentry_operations proc_net_dentry_ops;
static inline void pde_force_lookup(struct proc_dir_entry *pde)
{
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
- pde->proc_dops = &proc_net_dentry_ops;
+ pde->flags |= PROC_ENTRY_FORCE_LOOKUP;
}
/*
@@ -414,7 +423,6 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde)
static inline struct dentry *proc_splice_unmountable(struct inode *inode,
struct dentry *dentry, const struct dentry_operations *d_ops)
{
- d_set_d_op(dentry, d_ops);
dont_mount(dentry);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, d_ops);
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index bc2bc60c36cc..a458f1e112fd 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -121,8 +121,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "NFS_Unstable: ", 0);
show_val_kb(m, "Bounce: ", 0);
- show_val_kb(m, "WritebackTmp: ",
- global_node_page_state(NR_WRITEBACK_TEMP));
+ show_val_kb(m, "WritebackTmp: ", 0);
show_val_kb(m, "CommitLimit: ", vm_commit_limit());
show_val_kb(m, "Committed_AS: ", committed);
seq_printf(m, "VmallocTotal: %8lu kB\n",
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index c610224faf10..4403a2e20c16 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -111,8 +111,7 @@ static struct dentry *proc_ns_instantiate(struct dentry *dentry,
ei->ns_ops = ns_ops;
pid_update_inode(task, inode);
- d_set_d_op(dentry, &pid_dentry_operations);
- return d_splice_alias(inode, dentry);
+ return d_splice_alias_ops(inode, dentry, &pid_dentry_operations);
}
static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 999af26c7298..ba3568e97fd1 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -43,6 +43,22 @@ static inline unsigned long get_max_dump_pfn(void)
#endif
}
+static u64 get_kpage_count(const struct page *page)
+{
+ struct page_snapshot ps;
+ u64 ret;
+
+ snapshot_page(&ps, page);
+
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ ret = folio_precise_page_mapcount(&ps.folio_snapshot,
+ &ps.page_snapshot);
+ else
+ ret = folio_average_page_mapcount(&ps.folio_snapshot);
+
+ return ret;
+}
+
static ssize_t kpage_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos,
enum kpage_operation op)
@@ -75,10 +91,7 @@ static ssize_t kpage_read(struct file *file, char __user *buf,
info = stable_page_flags(page);
break;
case KPAGE_COUNT:
- if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
- info = folio_precise_page_mapcount(page_folio(page), page);
- else
- info = folio_average_page_mapcount(page_folio(page));
+ info = get_kpage_count(page);
break;
case KPAGE_CGROUP:
info = page_cgroup_ino(page);
@@ -134,6 +147,7 @@ static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
u64 stable_page_flags(const struct page *page)
{
const struct folio *folio;
+ struct page_snapshot ps;
unsigned long k;
unsigned long mapping;
bool is_anon;
@@ -145,20 +159,22 @@ u64 stable_page_flags(const struct page *page)
*/
if (!page)
return 1 << KPF_NOPAGE;
- folio = page_folio(page);
+
+ snapshot_page(&ps, page);
+ folio = &ps.folio_snapshot;
k = folio->flags;
mapping = (unsigned long)folio->mapping;
- is_anon = mapping & PAGE_MAPPING_ANON;
+ is_anon = mapping & FOLIO_MAPPING_ANON;
/*
* pseudo flags for the well known (anonymous) memory mapped pages
*/
- if (page_mapped(page))
+ if (folio_mapped(folio))
u |= 1 << KPF_MMAP;
if (is_anon) {
u |= 1 << KPF_ANON;
- if (mapping & PAGE_MAPPING_KSM)
+ if (mapping & FOLIO_MAPPING_KSM)
u |= 1 << KPF_KSM;
}
@@ -166,7 +182,7 @@ u64 stable_page_flags(const struct page *page)
* compound pages: export both head/tail info
* they together define a compound page's start/end pos and order
*/
- if (page == &folio->page)
+ if (ps.idx == 0)
u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head);
else
u |= 1 << KPF_COMPOUND_TAIL;
@@ -176,25 +192,19 @@ u64 stable_page_flags(const struct page *page)
folio_test_large_rmappable(folio)) {
/* Note: we indicate any THPs here, not just PMD-sized ones */
u |= 1 << KPF_THP;
- } else if (is_huge_zero_folio(folio)) {
+ } else if (is_huge_zero_pfn(ps.pfn)) {
u |= 1 << KPF_ZERO_PAGE;
u |= 1 << KPF_THP;
- } else if (is_zero_folio(folio)) {
+ } else if (is_zero_pfn(ps.pfn)) {
u |= 1 << KPF_ZERO_PAGE;
}
- /*
- * Caveats on high order pages: PG_buddy and PG_slab will only be set
- * on the head page.
- */
- if (PageBuddy(page))
- u |= 1 << KPF_BUDDY;
- else if (page_count(page) == 0 && is_free_buddy_page(page))
+ if (ps.flags & PAGE_SNAPSHOT_PG_BUDDY)
u |= 1 << KPF_BUDDY;
- if (PageOffline(page))
+ if (folio_test_offline(folio))
u |= 1 << KPF_OFFLINE;
- if (PageTable(page))
+ if (folio_test_pgtable(folio))
u |= 1 << KPF_PGTABLE;
if (folio_test_slab(folio))
u |= 1 << KPF_SLAB;
@@ -202,7 +212,7 @@ u64 stable_page_flags(const struct page *page)
#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
u |= kpf_copy_bit(k, KPF_IDLE, PG_idle);
#else
- if (folio_test_idle(folio))
+ if (ps.flags & PAGE_SNAPSHOT_PG_IDLE)
u |= 1 << KPF_IDLE;
#endif
@@ -228,7 +238,7 @@ u64 stable_page_flags(const struct page *page)
if (u & (1 << KPF_HUGE))
u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
else
- u |= kpf_copy_bit(page->flags, KPF_HWPOISON, PG_hwpoison);
+ u |= kpf_copy_bit(ps.page_snapshot.flags, KPF_HWPOISON, PG_hwpoison);
#endif
u |= kpf_copy_bit(k, KPF_RESERVED, PG_reserved);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index cc9d74a06ff0..49ab74e0bfde 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -540,9 +540,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
- d_set_d_op(dentry, &proc_sys_dentry_operations);
inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
- err = d_splice_alias(inode, dentry);
+ err = d_splice_alias_ops(inode, dentry, &proc_sys_dentry_operations);
out:
if (h)
@@ -699,9 +698,9 @@ static bool proc_sys_fill_cache(struct file *file,
return false;
if (d_in_lookup(child)) {
struct dentry *res;
- d_set_d_op(child, &proc_sys_dentry_operations);
inode = proc_sys_make_inode(dir->d_sb, head, table);
- res = d_splice_alias(inode, child);
+ res = d_splice_alias_ops(inode, child,
+ &proc_sys_dentry_operations);
d_lookup_done(child);
if (unlikely(res)) {
dput(child);
@@ -918,17 +917,21 @@ static int proc_sys_compare(const struct dentry *dentry,
struct ctl_table_header *head;
struct inode *inode;
- /* Although proc doesn't have negative dentries, rcu-walk means
- * that inode here can be NULL */
- /* AV: can it, indeed? */
- inode = d_inode_rcu(dentry);
- if (!inode)
- return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- head = rcu_dereference(PROC_I(inode)->sysctl);
+
+ // false positive is fine here - we'll recheck anyway
+ if (d_in_lookup(dentry))
+ return 0;
+
+ inode = d_inode_rcu(dentry);
+ // we just might have run into dentry in the middle of __dentry_kill()
+ if (!inode)
+ return 1;
+
+ head = READ_ONCE(PROC_I(inode)->sysctl);
return !head || !sysctl_is_seen(head);
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 06a297a27ba3..ed86ac710384 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -363,12 +363,12 @@ static const struct inode_operations proc_root_inode_operations = {
* This is the root "inode" in the /proc tree..
*/
struct proc_dir_entry proc_root = {
- .low_ino = PROC_ROOT_INO,
- .namelen = 5,
- .mode = S_IFDIR | S_IRUGO | S_IXUGO,
- .nlink = 2,
+ .low_ino = PROCFS_ROOT_INO,
+ .namelen = 5,
+ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ .nlink = 2,
.refcnt = REFCOUNT_INIT(1),
- .proc_iops = &proc_root_inode_operations,
+ .proc_iops = &proc_root_inode_operations,
.proc_dir_ops = &proc_root_operations,
.parent = &proc_root,
.subdir = RB_ROOT,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 27972c0749e7..3d6d8a9f13fc 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -29,6 +29,9 @@
#include <asm/tlbflush.h>
#include "internal.h"
+#define SENTINEL_VMA_END -1
+#define SENTINEL_VMA_GATE -2
+
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
void task_mem(struct seq_file *m, struct mm_struct *mm)
@@ -36,9 +39,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long text, lib, swap, anon, file, shmem;
unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;
- anon = get_mm_counter(mm, MM_ANONPAGES);
- file = get_mm_counter(mm, MM_FILEPAGES);
- shmem = get_mm_counter(mm, MM_SHMEMPAGES);
+ anon = get_mm_counter_sum(mm, MM_ANONPAGES);
+ file = get_mm_counter_sum(mm, MM_FILEPAGES);
+ shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES);
/*
* Note: to minimize their overhead, mm maintains hiwater_vm and
@@ -59,7 +62,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
text = min(text, mm->exec_vm << PAGE_SHIFT);
lib = (mm->exec_vm << PAGE_SHIFT) - text;
- swap = get_mm_counter(mm, MM_SWAPENTS);
+ swap = get_mm_counter_sum(mm, MM_SWAPENTS);
SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
@@ -92,12 +95,12 @@ unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
- *shared = get_mm_counter(mm, MM_FILEPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
+ *shared = get_mm_counter_sum(mm, MM_FILEPAGES) +
+ get_mm_counter_sum(mm, MM_SHMEMPAGES);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->data_vm + mm->stack_vm;
- *resident = *shared + get_mm_counter(mm, MM_ANONPAGES);
+ *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES);
return mm->total_vm;
}
@@ -127,15 +130,134 @@ static void release_task_mempolicy(struct proc_maps_private *priv)
}
#endif
-static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
- loff_t *ppos)
+#ifdef CONFIG_PER_VMA_LOCK
+
+static void unlock_vma(struct proc_maps_private *priv)
+{
+ if (priv->locked_vma) {
+ vma_end_read(priv->locked_vma);
+ priv->locked_vma = NULL;
+ }
+}
+
+static const struct seq_operations proc_pid_maps_op;
+
+static inline bool lock_vma_range(struct seq_file *m,
+ struct proc_maps_private *priv)
+{
+ /*
+ * smaps and numa_maps perform page table walk, therefore require
+ * mmap_lock but maps can be read with locking just the vma and
+ * walking the vma tree under rcu read protection.
+ */
+ if (m->op != &proc_pid_maps_op) {
+ if (mmap_read_lock_killable(priv->mm))
+ return false;
+
+ priv->mmap_locked = true;
+ } else {
+ rcu_read_lock();
+ priv->locked_vma = NULL;
+ priv->mmap_locked = false;
+ }
+
+ return true;
+}
+
+static inline void unlock_vma_range(struct proc_maps_private *priv)
+{
+ if (priv->mmap_locked) {
+ mmap_read_unlock(priv->mm);
+ } else {
+ unlock_vma(priv);
+ rcu_read_unlock();
+ }
+}
+
+static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
+ loff_t last_pos)
+{
+ struct vm_area_struct *vma;
+
+ if (priv->mmap_locked)
+ return vma_next(&priv->iter);
+
+ unlock_vma(priv);
+ vma = lock_next_vma(priv->mm, &priv->iter, last_pos);
+ if (!IS_ERR_OR_NULL(vma))
+ priv->locked_vma = vma;
+
+ return vma;
+}
+
+static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
+ loff_t pos)
+{
+ if (priv->mmap_locked)
+ return false;
+
+ rcu_read_unlock();
+ mmap_read_lock(priv->mm);
+ /* Reinitialize the iterator after taking mmap_lock */
+ vma_iter_set(&priv->iter, pos);
+ priv->mmap_locked = true;
+
+ return true;
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
+static inline bool lock_vma_range(struct seq_file *m,
+ struct proc_maps_private *priv)
+{
+ return mmap_read_lock_killable(priv->mm) == 0;
+}
+
+static inline void unlock_vma_range(struct proc_maps_private *priv)
+{
+ mmap_read_unlock(priv->mm);
+}
+
+static struct vm_area_struct *get_next_vma(struct proc_maps_private *priv,
+ loff_t last_pos)
+{
+ return vma_next(&priv->iter);
+}
+
+static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
+ loff_t pos)
{
- struct vm_area_struct *vma = vma_next(&priv->iter);
+ return false;
+}
+
+#endif /* CONFIG_PER_VMA_LOCK */
+
+static struct vm_area_struct *proc_get_vma(struct seq_file *m, loff_t *ppos)
+{
+ struct proc_maps_private *priv = m->private;
+ struct vm_area_struct *vma;
+
+retry:
+ vma = get_next_vma(priv, *ppos);
+ /* EINTR of EAGAIN is possible */
+ if (IS_ERR(vma)) {
+ if (PTR_ERR(vma) == -EAGAIN && fallback_to_mmap_lock(priv, *ppos))
+ goto retry;
+ return vma;
+ }
+
+ /* Store previous position to be able to restart if needed */
+ priv->last_pos = *ppos;
if (vma) {
- *ppos = vma->vm_start;
+ /*
+ * Track the end of the reported vma to ensure position changes
+ * even if previous vma was merged with the next vma and we
+ * found the extended vma with the same vm_start.
+ */
+ *ppos = vma->vm_end;
} else {
- *ppos = -2UL;
+ *ppos = SENTINEL_VMA_GATE;
vma = get_gate_vma(priv->mm);
}
@@ -145,11 +267,11 @@ static struct vm_area_struct *proc_get_vma(struct proc_maps_private *priv,
static void *m_start(struct seq_file *m, loff_t *ppos)
{
struct proc_maps_private *priv = m->private;
- unsigned long last_addr = *ppos;
+ loff_t last_addr = *ppos;
struct mm_struct *mm;
/* See m_next(). Zero at the start or after lseek. */
- if (last_addr == -1UL)
+ if (last_addr == SENTINEL_VMA_END)
return NULL;
priv->task = get_proc_task(priv->inode);
@@ -163,28 +285,34 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return NULL;
}
- if (mmap_read_lock_killable(mm)) {
+ if (!lock_vma_range(m, priv)) {
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
return ERR_PTR(-EINTR);
}
- vma_iter_init(&priv->iter, mm, last_addr);
+ /*
+ * Reset current position if last_addr was set before
+ * and it's not a sentinel.
+ */
+ if (last_addr > 0)
+ *ppos = last_addr = priv->last_pos;
+ vma_iter_init(&priv->iter, mm, (unsigned long)last_addr);
hold_task_mempolicy(priv);
- if (last_addr == -2UL)
+ if (last_addr == SENTINEL_VMA_GATE)
return get_gate_vma(mm);
- return proc_get_vma(priv, ppos);
+ return proc_get_vma(m, ppos);
}
static void *m_next(struct seq_file *m, void *v, loff_t *ppos)
{
- if (*ppos == -2UL) {
- *ppos = -1UL;
+ if (*ppos == SENTINEL_VMA_GATE) {
+ *ppos = SENTINEL_VMA_END;
return NULL;
}
- return proc_get_vma(m->private, ppos);
+ return proc_get_vma(m, ppos);
}
static void m_stop(struct seq_file *m, void *v)
@@ -196,7 +324,7 @@ static void m_stop(struct seq_file *m, void *v)
return;
release_task_mempolicy(priv);
- mmap_read_unlock(mm);
+ unlock_vma_range(priv);
mmput(mm);
put_task_struct(priv->task);
priv->task = NULL;
@@ -2182,7 +2310,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_FILE;
}
- if (is_zero_pfn(pmd_pfn(pmd)))
+ if (is_huge_zero_pmd(pmd))
categories |= PAGE_IS_PFNZERO;
if (pmd_soft_dirty(pmd))
categories |= PAGE_IS_SOFT_DIRTY;