summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig9
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c169
-rw-r--r--fs/proc/inode.c27
-rw-r--r--fs/proc/meminfo.c10
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/proc/root.c12
-rw-r--r--fs/proc/task_mmu.c206
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/proc/vmcore.c19
10 files changed, 296 insertions, 174 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 62ee41b4bbd0..cb5629bd5fff 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -58,7 +58,8 @@ config PROC_VMCORE_DEVICE_DUMP
snapshot.
If you say Y here, the collected device dumps will be added
- as ELF notes to /proc/vmcore.
+ as ELF notes to /proc/vmcore. You can still disable device
+ dump using the kernel command line option 'novmcoredd'.
config PROC_SYSCTL
bool "Sysctl support (/proc/sys)" if EXPERT
@@ -72,7 +73,7 @@ config PROC_SYSCTL
interface is through /proc/sys. If you say Y here a tree of
modifiable sysctl entries will be generated beneath the
/proc/sys directory. They are explained in the files
- in <file:Documentation/sysctl/>. Note that enabling this
+ in <file:Documentation/admin-guide/sysctl/>. Note that enabling this
option will enlarge the kernel by at least 8 KB.
As it is generally a good thing, you should say Y here unless
@@ -98,3 +99,7 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+
+config PROC_PID_ARCH_STATUS
+ def_bool n
+ depends on PROC_FS
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 55180501b915..46dcb6f0eccf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -381,9 +381,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t%*pb\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
- cpumask_pr_args(&task->cpus_allowed));
+ cpumask_pr_args(task->cpus_ptr));
}
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 255f6754c70d..ebea9501afb8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -209,12 +209,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
+/*
+ * If the user used setproctitle(), we just get the string from
+ * user space at arg_start, and limit it to a maximum of one page.
+ */
+static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
+ size_t count, unsigned long pos,
+ unsigned long arg_start)
+{
+ char *page;
+ int ret, got;
+
+ if (pos >= PAGE_SIZE)
+ return 0;
+
+ page = (char *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ ret = 0;
+ got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
+ if (got > 0) {
+ int len = strnlen(page, got);
+
+ /* Include the NUL character if it was found */
+ if (len < got)
+ len++;
+
+ if (len > pos) {
+ len -= pos;
+ if (len > count)
+ len = count;
+ len -= copy_to_user(buf, page+pos, len);
+ if (!len)
+ len = -EFAULT;
+ ret = len;
+ }
+ }
+ free_page((unsigned long)page);
+ return ret;
+}
+
static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long pos, len;
- char *page;
+ char *page, c;
/* Check if process spawned far enough to have cmdline. */
if (!mm->env_end)
@@ -231,28 +272,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
return 0;
/*
- * We have traditionally allowed the user to re-write
- * the argument strings and overflow the end result
- * into the environment section. But only do that if
- * the environment area is contiguous to the arguments.
+ * We allow setproctitle() to overwrite the argument
+ * strings, and overflow past the original end. But
+ * only when it overflows into the environment area.
*/
- if (env_start != arg_end || env_start >= env_end)
+ if (env_start != arg_end || env_end < env_start)
env_start = env_end = arg_end;
-
- /* .. and limit it to a maximum of one page of slop */
- if (env_end >= arg_end + PAGE_SIZE)
- env_end = arg_end + PAGE_SIZE - 1;
+ len = env_end - arg_start;
/* We're not going to care if "*ppos" has high bits set */
- pos = arg_start + *ppos;
-
- /* .. but we do check the result is in the proper range */
- if (pos < arg_start || pos >= env_end)
+ pos = *ppos;
+ if (pos >= len)
+ return 0;
+ if (count > len - pos)
+ count = len - pos;
+ if (!count)
return 0;
- /* .. and we never go past env_end */
- if (env_end - pos < count)
- count = env_end - pos;
+ /*
+ * Magical special case: if the argv[] end byte is not
+ * zero, the user has overwritten it with setproctitle(3).
+ *
+ * Possible future enhancement: do this only once when
+ * pos is 0, and set a flag in the 'struct file'.
+ */
+ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
+ return get_mm_proctitle(mm, buf, count, pos, arg_start);
+
+ /*
+ * For the non-setproctitle() case we limit things strictly
+ * to the [arg_start, arg_end[ range.
+ */
+ pos += arg_start;
+ if (pos < arg_start || pos >= arg_end)
+ return 0;
+ if (count > arg_end - pos)
+ count = arg_end - pos;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
@@ -262,48 +317,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
while (count) {
int got;
size_t size = min_t(size_t, PAGE_SIZE, count);
- long offset;
-
- /*
- * Are we already starting past the official end?
- * We always include the last byte that is *supposed*
- * to be NUL
- */
- offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
- got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
- if (got <= offset)
+ got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
+ if (got <= 0)
break;
- got -= offset;
-
- /* Don't walk past a NUL character once you hit arg_end */
- if (pos + got >= arg_end) {
- int n = 0;
-
- /*
- * If we started before 'arg_end' but ended up
- * at or after it, we start the NUL character
- * check at arg_end-1 (where we expect the normal
- * EOF to be).
- *
- * NOTE! This is smaller than 'got', because
- * pos + got >= arg_end
- */
- if (pos < arg_end)
- n = arg_end - pos - 1;
-
- /* Cut off at first NUL after 'n' */
- got = n + strnlen(page+n, offset+got-n);
- if (got < offset)
- break;
- got -= offset;
-
- /* Include the NUL if it existed */
- if (got < size)
- got++;
- }
-
- got -= copy_to_user(buf, page+offset, got);
+ got -= copy_to_user(buf, page, got);
if (unlikely(!got)) {
if (!len)
len = -EFAULT;
@@ -532,8 +550,7 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
unsigned long totalpages = totalram_pages() + total_swap_pages;
unsigned long points = 0;
- points = oom_badness(task, NULL, NULL, totalpages) *
- 1000 / totalpages;
+ points = oom_badness(task, totalpages) * 1000 / totalpages;
seq_printf(m, "%lu\n", points);
return 0;
@@ -1962,9 +1979,12 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
goto out;
if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
- down_read(&mm->mmap_sem);
- exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
- up_read(&mm->mmap_sem);
+ status = down_read_killable(&mm->mmap_sem);
+ if (!status) {
+ exact_vma_exists = !!find_exact_vma(mm, vm_start,
+ vm_end);
+ up_read(&mm->mmap_sem);
+ }
}
mmput(mm);
@@ -2010,8 +2030,11 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = down_read_killable(&mm->mmap_sem);
+ if (rc)
+ goto out_mmput;
+
rc = -ENOENT;
- down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
*path = vma->vm_file->f_path;
@@ -2107,7 +2130,11 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
if (!mm)
goto out_put_task;
- down_read(&mm->mmap_sem);
+ result = ERR_PTR(-EINTR);
+ if (down_read_killable(&mm->mmap_sem))
+ goto out_put_mm;
+
+ result = ERR_PTR(-ENOENT);
vma = find_exact_vma(mm, vm_start, vm_end);
if (!vma)
goto out_no_vma;
@@ -2118,6 +2145,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
out_no_vma:
up_read(&mm->mmap_sem);
+out_put_mm:
mmput(mm);
out_put_task:
put_task_struct(task);
@@ -2160,7 +2188,12 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
mm = get_task_mm(task);
if (!mm)
goto out_put_task;
- down_read(&mm->mmap_sem);
+
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret) {
+ mmput(mm);
+ goto out_put_task;
+ }
nr_files = 0;
@@ -3061,6 +3094,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKLEAK_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3448,6 +3484,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 5f8d215b3fd0..dbe43a50caf2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -200,7 +200,8 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
if (use_pde(pde)) {
- loff_t (*llseek)(struct file *, loff_t, int);
+ typeof_member(struct file_operations, llseek) llseek;
+
llseek = pde->proc_fops->llseek;
if (!llseek)
llseek = default_llseek;
@@ -212,10 +213,11 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
- ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
+ typeof_member(struct file_operations, read) read;
+
read = pde->proc_fops->read;
if (read)
rv = read(file, buf, count, ppos);
@@ -226,10 +228,11 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count,
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
- ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
if (use_pde(pde)) {
+ typeof_member(struct file_operations, write) write;
+
write = pde->proc_fops->write;
if (write)
rv = write(file, buf, count, ppos);
@@ -242,8 +245,9 @@ static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
__poll_t rv = DEFAULT_POLLMASK;
- __poll_t (*poll)(struct file *, struct poll_table_struct *);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, poll) poll;
+
poll = pde->proc_fops->poll;
if (poll)
rv = poll(file, pts);
@@ -256,8 +260,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
- long (*ioctl)(struct file *, unsigned int, unsigned long);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, unlocked_ioctl) ioctl;
+
ioctl = pde->proc_fops->unlocked_ioctl;
if (ioctl)
rv = ioctl(file, cmd, arg);
@@ -271,8 +276,9 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
{
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
- long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, compat_ioctl) compat_ioctl;
+
compat_ioctl = pde->proc_fops->compat_ioctl;
if (compat_ioctl)
rv = compat_ioctl(file, cmd, arg);
@@ -286,8 +292,9 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
- int (*mmap)(struct file *, struct vm_area_struct *);
if (use_pde(pde)) {
+ typeof_member(struct file_operations, mmap) mmap;
+
mmap = pde->proc_fops->mmap;
if (mmap)
rv = mmap(file, vma);
@@ -305,7 +312,7 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr,
unsigned long rv = -EIO;
if (use_pde(pde)) {
- typeof(proc_reg_get_unmapped_area) *get_area;
+ typeof_member(struct file_operations, get_unmapped_area) get_area;
get_area = pde->proc_fops->get_unmapped_area;
#ifdef CONFIG_MMU
@@ -326,8 +333,8 @@ static int proc_reg_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
- int (*open)(struct inode *, struct file *);
- int (*release)(struct inode *, struct file *);
+ typeof_member(struct file_operations, open) open;
+ typeof_member(struct file_operations, release) release;
struct pde_opener *pdeo;
/*
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 568d90e17c17..ac9247371871 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -8,7 +8,6 @@
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
#include <linux/percpu.h>
-#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/vmstat.h>
@@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_zone_page_state(NR_KERNEL_STACK_KB));
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
- show_val_kb(m, "Quicklists: ", quicklist_total_size());
-#endif
show_val_kb(m, "NFS_Unstable: ",
global_node_page_state(NR_UNSTABLE_NFS));
@@ -120,7 +116,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Committed_AS: ", committed);
seq_printf(m, "VmallocTotal: %8lu kB\n",
(unsigned long)VMALLOC_TOTAL >> 10);
- show_val_kb(m, "VmallocUsed: ", 0ul);
+ show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages());
show_val_kb(m, "VmallocChunk: ", 0ul);
show_val_kb(m, "Percpu: ", pcpu_nr_pages());
@@ -136,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+ show_val_kb(m, "FileHugePages: ",
+ global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+ show_val_kb(m, "FilePmdMapped: ",
+ global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
#endif
#ifdef CONFIG_CMA
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c74570736b24..d80989b6c344 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -22,6 +22,10 @@ static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
+/* shared constants to be used in various sysctls */
+const int sysctl_vals[] = { 0, 1, INT_MAX };
+EXPORT_SYMBOL(sysctl_vals);
+
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
@@ -499,6 +503,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
if (root->set_ownership)
root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+ else {
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ }
return inode;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 8b145e7b9661..0b7c8dffc9ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -157,18 +157,14 @@ static int proc_get_tree(struct fs_context *fc)
{
struct proc_fs_context *ctx = fc->fs_private;
- put_user_ns(fc->user_ns);
- fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
- fc->s_fs_info = ctx->pid_ns;
- return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
+ return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns);
}
static void proc_fs_context_free(struct fs_context *fc)
{
struct proc_fs_context *ctx = fc->fs_private;
- if (ctx->pid_ns)
- put_pid_ns(ctx->pid_ns);
+ put_pid_ns(ctx->pid_ns);
kfree(ctx);
}
@@ -188,6 +184,8 @@ static int proc_init_fs_context(struct fs_context *fc)
return -ENOMEM;
ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
fc->fs_private = ctx;
fc->ops = &proc_fs_context_ops;
return 0;
@@ -211,7 +209,7 @@ static struct file_system_type proc_fs_type = {
.init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb,
- .fs_flags = FS_USERNS_MOUNT,
+ .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 01d4eb0e6bd1..9442631fd4af 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/vmacache.h>
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
@@ -166,7 +166,11 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
if (!mm || !mmget_not_zero(mm))
return NULL;
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ mmput(mm);
+ return ERR_PTR(-EINTR);
+ }
+
hold_task_mempolicy(priv);
priv->tail_vma = get_gate_vma(mm);
@@ -413,21 +417,58 @@ struct mem_size_stats {
unsigned long lazyfree;
unsigned long anonymous_thp;
unsigned long shmem_thp;
+ unsigned long file_thp;
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
u64 pss;
+ u64 pss_anon;
+ u64 pss_file;
+ u64 pss_shmem;
u64 pss_locked;
u64 swap_pss;
bool check_shmem_swap;
};
+static void smaps_page_accumulate(struct mem_size_stats *mss,
+ struct page *page, unsigned long size, unsigned long pss,
+ bool dirty, bool locked, bool private)
+{
+ mss->pss += pss;
+
+ if (PageAnon(page))
+ mss->pss_anon += pss;
+ else if (PageSwapBacked(page))
+ mss->pss_shmem += pss;
+ else
+ mss->pss_file += pss;
+
+ if (locked)
+ mss->pss_locked += pss;
+
+ if (dirty || PageDirty(page)) {
+ if (private)
+ mss->private_dirty += size;
+ else
+ mss->shared_dirty += size;
+ } else {
+ if (private)
+ mss->private_clean += size;
+ else
+ mss->shared_clean += size;
+ }
+}
+
static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked)
{
- int i, nr = compound ? 1 << compound_order(page) : 1;
+ int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
+ /*
+ * First accumulate quantities that depend only on |size| and the type
+ * of the compound page.
+ */
if (PageAnon(page)) {
mss->anonymous += size;
if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
@@ -440,42 +481,25 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
mss->referenced += size;
/*
+ * Then accumulate quantities that may depend on sharing, or that may
+ * differ page-by-page.
+ *
* page_count(page) == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate
* page_count().
*/
if (page_count(page) == 1) {
- if (dirty || PageDirty(page))
- mss->private_dirty += size;
- else
- mss->private_clean += size;
- mss->pss += (u64)size << PSS_SHIFT;
- if (locked)
- mss->pss_locked += (u64)size << PSS_SHIFT;
+ smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
+ locked, true);
return;
}
-
for (i = 0; i < nr; i++, page++) {
int mapcount = page_mapcount(page);
- unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
-
- if (mapcount >= 2) {
- if (dirty || PageDirty(page))
- mss->shared_dirty += PAGE_SIZE;
- else
- mss->shared_clean += PAGE_SIZE;
- mss->pss += pss / mapcount;
- if (locked)
- mss->pss_locked += pss / mapcount;
- } else {
- if (dirty || PageDirty(page))
- mss->private_dirty += PAGE_SIZE;
- else
- mss->private_clean += PAGE_SIZE;
- mss->pss += pss;
- if (locked)
- mss->pss_locked += pss;
- }
+ unsigned long pss = PAGE_SIZE << PSS_SHIFT;
+ if (mapcount >= 2)
+ pss /= mapcount;
+ smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
+ mapcount < 2);
}
}
@@ -490,7 +514,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,
return 0;
}
-#endif
+#else
+#define smaps_pte_hole NULL
+#endif /* CONFIG_SHMEM */
static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct mm_walk *walk)
@@ -563,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
else if (is_zone_device_page(page))
/* pass */;
else
- VM_BUG_ON_PAGE(1, page);
+ mss->file_thp += HPAGE_PMD_SIZE;
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
}
#else
@@ -706,21 +732,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
}
return 0;
}
+#else
+#define smaps_hugetlb_range NULL
#endif /* HUGETLB_PAGE */
+static const struct mm_walk_ops smaps_walk_ops = {
+ .pmd_entry = smaps_pte_range,
+ .hugetlb_entry = smaps_hugetlb_range,
+};
+
+static const struct mm_walk_ops smaps_shmem_walk_ops = {
+ .pmd_entry = smaps_pte_range,
+ .hugetlb_entry = smaps_hugetlb_range,
+ .pte_hole = smaps_pte_hole,
+};
+
static void smap_gather_stats(struct vm_area_struct *vma,
struct mem_size_stats *mss)
{
- struct mm_walk smaps_walk = {
- .pmd_entry = smaps_pte_range,
-#ifdef CONFIG_HUGETLB_PAGE
- .hugetlb_entry = smaps_hugetlb_range,
-#endif
- .mm = vma->vm_mm,
- };
-
- smaps_walk.private = mss;
-
#ifdef CONFIG_SHMEM
/* In case of smaps_rollup, reset the value from previous vma */
mss->check_shmem_swap = false;
@@ -742,22 +771,36 @@ static void smap_gather_stats(struct vm_area_struct *vma,
mss->swap += shmem_swapped;
} else {
mss->check_shmem_swap = true;
- smaps_walk.pte_hole = smaps_pte_hole;
+ walk_page_vma(vma, &smaps_shmem_walk_ops, mss);
+ return;
}
}
#endif
/* mmap_sem is held in m_start */
- walk_page_vma(vma, &smaps_walk);
+ walk_page_vma(vma, &smaps_walk_ops, mss);
}
#define SEQ_PUT_DEC(str, val) \
seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
/* Show the contents common for smaps and smaps_rollup */
-static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
+static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
+ bool rollup_mode)
{
SEQ_PUT_DEC("Rss: ", mss->resident);
SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
+ if (rollup_mode) {
+ /*
+ * These are meaningful only for smaps_rollup, otherwise two of
+ * them are zero, and the other one is the same as Pss.
+ */
+ SEQ_PUT_DEC(" kB\nPss_Anon: ",
+ mss->pss_anon >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nPss_File: ",
+ mss->pss_file >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nPss_Shmem: ",
+ mss->pss_shmem >> PSS_SHIFT);
+ }
SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
@@ -767,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
mss->private_hugetlb >> 10, 7);
@@ -794,9 +838,10 @@ static int show_smap(struct seq_file *m, void *v)
SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
seq_puts(m, " kB\n");
- __show_smap(m, &mss);
+ __show_smap(m, &mss, false);
- seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma));
+ seq_printf(m, "THPeligible: %d\n",
+ transparent_hugepage_enabled(vma));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
@@ -828,7 +873,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
memset(&mss, 0, sizeof(mss));
- down_read(&mm->mmap_sem);
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret)
+ goto out_put_mm;
+
hold_task_mempolicy(priv);
for (vma = priv->mm->mmap; vma; vma = vma->vm_next) {
@@ -841,12 +889,13 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
seq_pad(m, ' ');
seq_puts(m, "[rollup]\n");
- __show_smap(m, &mss);
+ __show_smap(m, &mss, true);
release_task_mempolicy(priv);
up_read(&mm->mmap_sem);
- mmput(mm);
+out_put_mm:
+ mmput(mm);
out_put_task:
put_task_struct(priv->task);
priv->task = NULL;
@@ -1077,6 +1126,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
return 0;
}
+static const struct mm_walk_ops clear_refs_walk_ops = {
+ .pmd_entry = clear_refs_pte_range,
+ .test_walk = clear_refs_test_walk,
+};
+
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -1110,12 +1164,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
struct clear_refs_private cp = {
.type = type,
};
- struct mm_walk clear_refs_walk = {
- .pmd_entry = clear_refs_pte_range,
- .test_walk = clear_refs_test_walk,
- .mm = mm,
- .private = &cp,
- };
if (type == CLEAR_REFS_MM_HIWATER_RSS) {
if (down_write_killable(&mm->mmap_sem)) {
@@ -1132,7 +1180,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
goto out_mm;
}
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ count = -EINTR;
+ goto out_mm;
+ }
tlb_gather_mmu(&tlb, mm, 0, -1);
if (type == CLEAR_REFS_SOFT_DIRTY) {
for (vma = mm->mmap; vma; vma = vma->vm_next) {
@@ -1173,7 +1224,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
- walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
+ walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
+ &cp);
if (type == CLEAR_REFS_SOFT_DIRTY)
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, 0, -1);
@@ -1279,7 +1331,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (pm->show_pfn)
frame = pte_pfn(pte);
flags |= PM_PRESENT;
- page = _vm_normal_page(vma, addr, pte, true);
+ page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
@@ -1445,8 +1497,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
return err;
}
+#else
+#define pagemap_hugetlb_range NULL
#endif /* HUGETLB_PAGE */
+static const struct mm_walk_ops pagemap_ops = {
+ .pmd_entry = pagemap_pmd_range,
+ .pte_hole = pagemap_pte_hole,
+ .hugetlb_entry = pagemap_hugetlb_range,
+};
+
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
@@ -1478,7 +1538,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
{
struct mm_struct *mm = file->private_data;
struct pagemapread pm;
- struct mm_walk pagemap_walk = {};
unsigned long src;
unsigned long svpfn;
unsigned long start_vaddr;
@@ -1506,14 +1565,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!pm.buffer)
goto out_mm;
- pagemap_walk.pmd_entry = pagemap_pmd_range;
- pagemap_walk.pte_hole = pagemap_pte_hole;
-#ifdef CONFIG_HUGETLB_PAGE
- pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
-#endif
- pagemap_walk.mm = mm;
- pagemap_walk.private = &pm;
-
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
start_vaddr = svpfn << PAGE_SHIFT;
@@ -1539,8 +1590,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* overflow ? */
if (end < start_vaddr || end > end_vaddr)
end = end_vaddr;
- down_read(&mm->mmap_sem);
- ret = walk_page_range(start_vaddr, end, &pagemap_walk);
+ ret = down_read_killable(&mm->mmap_sem);
+ if (ret)
+ goto out_free;
+ ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
up_read(&mm->mmap_sem);
start_vaddr = end;
@@ -1752,6 +1805,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
}
#endif
+static const struct mm_walk_ops show_numa_ops = {
+ .hugetlb_entry = gather_hugetlb_stats,
+ .pmd_entry = gather_pte_stats,
+};
+
/*
* Display pages allocated per node and memory policy via /proc.
*/
@@ -1763,12 +1821,6 @@ static int show_numa_map(struct seq_file *m, void *v)
struct numa_maps *md = &numa_priv->md;
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
- struct mm_walk walk = {
- .hugetlb_entry = gather_hugetlb_stats,
- .pmd_entry = gather_pte_stats,
- .private = md,
- .mm = mm,
- };
struct mempolicy *pol;
char buffer[64];
int nid;
@@ -1802,7 +1854,7 @@ static int show_numa_map(struct seq_file *m, void *v)
seq_puts(m, " huge");
/* mmap_sem is held by m_start */
- walk_page_vma(vma, &walk);
+ walk_page_vma(vma, &show_numa_ops, md);
if (!md->pages)
goto out;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 36bf0f2e102e..7907e6419e57 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -211,7 +211,11 @@ static void *m_start(struct seq_file *m, loff_t *pos)
if (!mm || !mmget_not_zero(mm))
return NULL;
- down_read(&mm->mmap_sem);
+ if (down_read_killable(&mm->mmap_sem)) {
+ mmput(mm);
+ return ERR_PTR(-EINTR);
+ }
+
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7bb96fdd38ad..7b13988796e1 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -21,6 +21,7 @@
#include <linux/init.h>
#include <linux/crash_dump.h>
#include <linux/list.h>
+#include <linux/moduleparam.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
@@ -54,6 +55,9 @@ static struct proc_dir_entry *proc_vmcore;
/* Device Dump list and mutex to synchronize access to list */
static LIST_HEAD(vmcoredd_list);
static DEFINE_MUTEX(vmcoredd_mutex);
+
+static bool vmcoredd_disabled;
+core_param(novmcoredd, vmcoredd_disabled, bool, 0);
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
/* Device Dump Size */
@@ -100,9 +104,9 @@ static int pfn_is_ram(unsigned long pfn)
}
/* Reads a page from the oldmem device from given offset. */
-static ssize_t read_from_oldmem(char *buf, size_t count,
- u64 *ppos, int userbuf,
- bool encrypted)
+ssize_t read_from_oldmem(char *buf, size_t count,
+ u64 *ppos, int userbuf,
+ bool encrypted)
{
unsigned long pfn, offset;
size_t nr_bytes;
@@ -174,7 +178,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
*/
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, sme_active());
+ return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active());
}
/*
@@ -374,7 +378,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
buflen);
start = m->paddr + *fpos - m->offset;
tmp = read_from_oldmem(buffer, tsz, &start,
- userbuf, sme_active());
+ userbuf, mem_encrypt_active());
if (tmp < 0)
return tmp;
buflen -= tsz;
@@ -1452,6 +1456,11 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
size_t data_size;
int ret;
+ if (vmcoredd_disabled) {
+ pr_err_once("Device dump is disabled\n");
+ return -EINVAL;
+ }
+
if (!data || !strlen(data->dump_name) ||
!data->vmcoredd_callback || !data->size)
return -EINVAL;