diff options
Diffstat (limited to 'fs/proc/inode.c')
| -rw-r--r-- | fs/proc/inode.c | 488 |
1 files changed, 315 insertions, 173 deletions
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index da649ccd6804..b7634f975d98 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -24,31 +24,25 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/mount.h> -#include <linux/magic.h> - -#include <linux/uaccess.h> +#include <linux/bug.h> #include "internal.h" static void proc_evict_inode(struct inode *inode) { - struct proc_dir_entry *de; struct ctl_table_header *head; + struct proc_inode *ei = PROC_I(inode); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); /* Stop tracking associated processes */ - put_pid(PROC_I(inode)->pid); + if (ei->pid) + proc_pid_evict_inode(ei); - /* Let go of any associated proc directory entry */ - de = PDE(inode); - if (de) - pde_put(de); - - head = PROC_I(inode)->sysctl; + head = ei->sysctl; if (head) { - RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); + WRITE_ONCE(ei->sysctl, NULL); proc_sys_evict_inode(inode, head); } } @@ -60,7 +54,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) { struct proc_inode *ei; - ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); + ei = alloc_inode_sb(sb, proc_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->pid = NULL; @@ -69,19 +63,21 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->pde = NULL; ei->sysctl = NULL; ei->sysctl_entry = NULL; + INIT_HLIST_NODE(&ei->sibling_inodes); ei->ns_ops = NULL; return &ei->vfs_inode; } -static void proc_i_callback(struct rcu_head *head) +static void proc_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(proc_inode_cachep, PROC_I(inode)); -} + struct proc_inode *ei = PROC_I(inode); -static void proc_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, proc_i_callback); + if (ei->pid) + put_pid(ei->pid); + /* Let go of any associated proc directory entry */ + if (ei->pde) + pde_put(ei->pde); + kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } static void init_once(void *foo) @@ -96,7 +92,7 @@ void __init proc_init_kmemcache(void) proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT| + SLAB_ACCOUNT| SLAB_PANIC), init_once); pde_opener_cache = @@ -109,26 +105,91 @@ void __init proc_init_kmemcache(void) BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE); } +void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock) +{ + struct hlist_node *node; + struct super_block *old_sb = NULL; + + rcu_read_lock(); + while ((node = hlist_first_rcu(inodes))) { + struct proc_inode *ei = hlist_entry(node, struct proc_inode, sibling_inodes); + struct super_block *sb; + struct inode *inode; + + spin_lock(lock); + hlist_del_init_rcu(&ei->sibling_inodes); + spin_unlock(lock); + + inode = &ei->vfs_inode; + sb = inode->i_sb; + if ((sb != old_sb) && !atomic_inc_not_zero(&sb->s_active)) + continue; + inode = igrab(inode); + rcu_read_unlock(); + if (sb != old_sb) { + if (old_sb) + deactivate_super(old_sb); + old_sb = sb; + } + if (unlikely(!inode)) { + rcu_read_lock(); + continue; + } + + if (S_ISDIR(inode->i_mode)) { + struct dentry *dir = d_find_any_alias(inode); + if (dir) { + d_invalidate(dir); + dput(dir); + } + } else { + struct dentry *dentry; + while ((dentry = d_find_alias(inode))) { + d_invalidate(dentry); + dput(dentry); + } + } + iput(inode); + + rcu_read_lock(); + } + rcu_read_unlock(); + if (old_sb) + deactivate_super(old_sb); +} + +static inline const char *hidepid2str(enum proc_hidepid v) +{ + switch (v) { + case HIDEPID_OFF: return "off"; + case HIDEPID_NO_ACCESS: return "noaccess"; + case HIDEPID_INVISIBLE: return "invisible"; + case HIDEPID_NOT_PTRACEABLE: return "ptraceable"; + } + WARN_ONCE(1, "bad hide_pid value: %d\n", v); + return "unknown"; +} + static int proc_show_options(struct seq_file *seq, struct dentry *root) { - struct super_block *sb = root->d_sb; - struct pid_namespace *pid = sb->s_fs_info; + struct proc_fs_info *fs_info = proc_sb_info(root->d_sb); - if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) - seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); - if (pid->hide_pid != HIDEPID_OFF) - seq_printf(seq, ",hidepid=%u", pid->hide_pid); + if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid)); + if (fs_info->hide_pid != HIDEPID_OFF) + seq_printf(seq, ",hidepid=%s", hidepid2str(fs_info->hide_pid)); + if (fs_info->pidonly != PROC_PIDONLY_OFF) + seq_printf(seq, ",subset=pid"); return 0; } -static const struct super_operations proc_sops = { +const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, - .destroy_inode = proc_destroy_inode, - .drop_inode = generic_delete_inode, + .free_inode = proc_free_inode, + .drop_inode = inode_just_drop, .evict_inode = proc_evict_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, .show_options = proc_show_options, }; @@ -145,8 +206,17 @@ static void unuse_pde(struct proc_dir_entry *pde) complete(pde->pde_unload_completion); } -/* pde is locked on entry, unlocked on exit */ +/* + * At most 2 contexts can enter this function: the one doing the last + * close on the descriptor and whoever is deleting PDE itself. + * + * First to enter calls ->proc_release hook and signals its completion + * to the second one which waits and then does nothing. + * + * PDE is locked on entry, unlocked on exit. + */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) + __releases(&pde->pde_unload_lock) { /* * close() (proc_reg_release()) can't delete an entry and proceed: @@ -154,9 +224,6 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) * * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: * "struct file" needs to be available at the right moment. - * - * Therefore, first process to enter this function does ->release() and - * signals its completion to the other process which does nothing. */ if (pdeo->closing) { /* somebody else is doing that, just wait */ @@ -170,10 +237,12 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); + file = pdeo->file; - pde->proc_fops->release(file_inode(file), file); + pde->proc_ops->proc_release(file_inode(file), file); + spin_lock(&pde->pde_unload_lock); - /* After ->release. */ + /* Strictly after ->proc_release, see above. */ list_del(&pdeo->lh); c = pdeo->c; spin_unlock(&pde->pde_unload_lock); @@ -207,124 +276,191 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) { struct proc_dir_entry *pde = PDE(file_inode(file)); loff_t rv = -EINVAL; - if (use_pde(pde)) { - loff_t (*llseek)(struct file *, loff_t, int); - llseek = pde->proc_fops->llseek; - if (!llseek) - llseek = default_llseek; - rv = llseek(file, offset, whence); + + if (pde_is_permanent(pde)) { + return pde->proc_ops->proc_lseek(file, offset, whence); + } else if (use_pde(pde)) { + rv = pde->proc_ops->proc_lseek(file, offset, whence); unuse_pde(pde); } return rv; } +static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp)); + ssize_t ret; + + if (pde_is_permanent(pde)) + return pde->proc_ops->proc_read_iter(iocb, iter); + + if (!use_pde(pde)) + return -EIO; + ret = pde->proc_ops->proc_read_iter(iocb, iter); + unuse_pde(pde); + return ret; +} + +static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + const auto read = pde->proc_ops->proc_read; + if (read) + return read(file, buf, count, ppos); + return -EIO; +} + static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; - if (use_pde(pde)) { - read = pde->proc_fops->read; - if (read) - rv = read(file, buf, count, ppos); + + if (pde_is_permanent(pde)) { + return pde_read(pde, file, buf, count, ppos); + } else if (use_pde(pde)) { + rv = pde_read(pde, file, buf, count, ppos); unuse_pde(pde); } return rv; } +static ssize_t pde_write(struct proc_dir_entry *pde, struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + const auto write = pde->proc_ops->proc_write; + if (write) + return write(file, buf, count, ppos); + return -EIO; +} + static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); struct proc_dir_entry *pde = PDE(file_inode(file)); ssize_t rv = -EIO; - if (use_pde(pde)) { - write = pde->proc_fops->write; - if (write) - rv = write(file, buf, count, ppos); + + if (pde_is_permanent(pde)) { + return pde_write(pde, file, buf, count, ppos); + } else if (use_pde(pde)) { + rv = pde_write(pde, file, buf, count, ppos); unuse_pde(pde); } return rv; } +static __poll_t pde_poll(struct proc_dir_entry *pde, struct file *file, struct poll_table_struct *pts) +{ + const auto poll = pde->proc_ops->proc_poll; + if (poll) + return poll(file, pts); + return DEFAULT_POLLMASK; +} + static __poll_t proc_reg_poll(struct file *file, struct poll_table_struct *pts) { struct proc_dir_entry *pde = PDE(file_inode(file)); __poll_t rv = DEFAULT_POLLMASK; - __poll_t (*poll)(struct file *, struct poll_table_struct *); - if (use_pde(pde)) { - poll = pde->proc_fops->poll; - if (poll) - rv = poll(file, pts); + + if (pde_is_permanent(pde)) { + return pde_poll(pde, file, pts); + } else if (use_pde(pde)) { + rv = pde_poll(pde, file, pts); unuse_pde(pde); } return rv; } +static long pde_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) +{ + const auto ioctl = pde->proc_ops->proc_ioctl; + if (ioctl) + return ioctl(file, cmd, arg); + return -ENOTTY; +} + static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; - long (*ioctl)(struct file *, unsigned int, unsigned long); - if (use_pde(pde)) { - ioctl = pde->proc_fops->unlocked_ioctl; - if (ioctl) - rv = ioctl(file, cmd, arg); + + if (pde_is_permanent(pde)) { + return pde_ioctl(pde, file, cmd, arg); + } else if (use_pde(pde)) { + rv = pde_ioctl(pde, file, cmd, arg); unuse_pde(pde); } return rv; } #ifdef CONFIG_COMPAT +static long pde_compat_ioctl(struct proc_dir_entry *pde, struct file *file, unsigned int cmd, unsigned long arg) +{ + const auto compat_ioctl = pde->proc_ops->proc_compat_ioctl; + if (compat_ioctl) + return compat_ioctl(file, cmd, arg); + return -ENOTTY; +} + static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct proc_dir_entry *pde = PDE(file_inode(file)); long rv = -ENOTTY; - long (*compat_ioctl)(struct file *, unsigned int, unsigned long); - if (use_pde(pde)) { - compat_ioctl = pde->proc_fops->compat_ioctl; - if (compat_ioctl) - rv = compat_ioctl(file, cmd, arg); + if (pde_is_permanent(pde)) { + return pde_compat_ioctl(pde, file, cmd, arg); + } else if (use_pde(pde)) { + rv = pde_compat_ioctl(pde, file, cmd, arg); unuse_pde(pde); } return rv; } #endif +static int pde_mmap(struct proc_dir_entry *pde, struct file *file, struct vm_area_struct *vma) +{ + const auto mmap = pde->proc_ops->proc_mmap; + if (mmap) + return mmap(file, vma); + return -EIO; +} + static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) { struct proc_dir_entry *pde = PDE(file_inode(file)); int rv = -EIO; - int (*mmap)(struct file *, struct vm_area_struct *); - if (use_pde(pde)) { - mmap = pde->proc_fops->mmap; - if (mmap) - rv = mmap(file, vma); + + if (pde_is_permanent(pde)) { + return pde_mmap(pde, file, vma); + } else if (use_pde(pde)) { + rv = pde_mmap(pde, file, vma); unuse_pde(pde); } return rv; } static unsigned long -proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, +pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - struct proc_dir_entry *pde = PDE(file_inode(file)); - unsigned long rv = -EIO; - - if (use_pde(pde)) { - typeof(proc_reg_get_unmapped_area) *get_area; + if (pde->proc_ops->proc_get_unmapped_area) + return pde->proc_ops->proc_get_unmapped_area(file, orig_addr, len, pgoff, flags); - get_area = pde->proc_fops->get_unmapped_area; #ifdef CONFIG_MMU - if (!get_area) - get_area = current->mm->get_unmapped_area; + return mm_get_unmapped_area(file, orig_addr, len, pgoff, flags); #endif - if (get_area) - rv = get_area(file, orig_addr, len, pgoff, flags); - else - rv = orig_addr; + return orig_addr; +} + +static unsigned long +proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct proc_dir_entry *pde = PDE(file_inode(file)); + unsigned long rv = -EIO; + + if (pde_is_permanent(pde)) { + return pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags); + } else if (use_pde(pde)) { + rv = pde_get_unmapped_area(pde, file, orig_addr, len, pgoff, flags); unuse_pde(pde); } return rv; @@ -334,10 +470,19 @@ static int proc_reg_open(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; - int (*open)(struct inode *, struct file *); - int (*release)(struct inode *, struct file *); + typeof_member(struct proc_ops, proc_open) open; struct pde_opener *pdeo; + if (!pde_has_proc_lseek(pde)) + file->f_mode &= ~FMODE_LSEEK; + + if (pde_is_permanent(pde)) { + open = pde->proc_ops->proc_open; + if (open) + rv = open(inode, file); + return rv; + } + /* * Ensure that * 1) PDE's ->release hook will be called no matter what @@ -352,7 +497,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (!use_pde(pde)) return -ENOENT; - release = pde->proc_fops->release; + const auto release = pde->proc_ops->proc_release; if (release) { pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); if (!pdeo) { @@ -361,7 +506,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) } } - open = pde->proc_fops->open; + open = pde->proc_ops->proc_open; if (open) rv = open(inode, file); @@ -387,6 +532,14 @@ static int proc_reg_release(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); struct pde_opener *pdeo; + + if (pde_is_permanent(pde)) { + const auto release = pde->proc_ops->proc_release; + if (release) + return release(inode, file); + return 0; + } + spin_lock(&pde->pde_unload_lock); list_for_each_entry(pdeo, &pde->pde_openers, lh) { if (pdeo->file == file) { @@ -404,9 +557,19 @@ static const struct file_operations proc_reg_file_ops = { .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = proc_reg_compat_ioctl, -#endif + .mmap = proc_reg_mmap, + .get_unmapped_area = proc_reg_get_unmapped_area, + .open = proc_reg_open, + .release = proc_reg_release, +}; + +static const struct file_operations proc_iter_file_ops = { + .llseek = proc_reg_llseek, + .read_iter = proc_reg_read_iter, + .write = proc_reg_write, + .splice_read = copy_splice_read, + .poll = proc_reg_poll, + .unlocked_ioctl = proc_reg_unlocked_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, @@ -414,12 +577,27 @@ static const struct file_operations proc_reg_file_ops = { }; #ifdef CONFIG_COMPAT -static const struct file_operations proc_reg_file_ops_no_compat = { +static const struct file_operations proc_reg_file_ops_compat = { .llseek = proc_reg_llseek, .read = proc_reg_read, .write = proc_reg_write, .poll = proc_reg_poll, .unlocked_ioctl = proc_reg_unlocked_ioctl, + .compat_ioctl = proc_reg_compat_ioctl, + .mmap = proc_reg_mmap, + .get_unmapped_area = proc_reg_get_unmapped_area, + .open = proc_reg_open, + .release = proc_reg_release, +}; + +static const struct file_operations proc_iter_file_ops_compat = { + .llseek = proc_reg_llseek, + .read_iter = proc_reg_read_iter, + .splice_read = copy_splice_read, + .write = proc_reg_write, + .poll = proc_reg_poll, + .unlocked_ioctl = proc_reg_unlocked_ioctl, + .compat_ioctl = proc_reg_compat_ioctl, .mmap = proc_reg_mmap, .get_unmapped_area = proc_reg_get_unmapped_area, .open = proc_reg_open, @@ -449,90 +627,54 @@ const struct inode_operations proc_link_inode_operations = { struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { - struct inode *inode = new_inode_pseudo(sb); - - if (inode) { - inode->i_ino = de->low_ino; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); - PROC_I(inode)->pde = de; - - if (is_empty_pde(de)) { - make_empty_dir_inode(inode); - return inode; - } - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - set_nlink(inode, de->nlink); - WARN_ON(!de->proc_iops); - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { -#ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else -#endif - inode->i_fop = &proc_reg_file_ops; - } else { - inode->i_fop = de->proc_fops; - } - } - } else - pde_put(de); - return inode; -} - -int proc_fill_super(struct super_block *s, void *data, int silent) -{ - struct pid_namespace *ns = get_pid_ns(s->s_fs_info); - struct inode *root_inode; - int ret; - - if (!proc_parse_options(data, ns)) - return -EINVAL; + struct inode *inode = new_inode(sb); - /* User space would break if executables or devices appear on proc */ - s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; - s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; - s->s_magic = PROC_SUPER_MAGIC; - s->s_op = &proc_sops; - s->s_time_gran = 1; + if (!inode) { + pde_put(de); + return NULL; + } - /* - * procfs isn't actually a stacking filesystem; however, there is - * too much magic going on inside it to permit stacking things on - * top of it - */ - s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; - - /* procfs dentries and inodes don't require IO to create */ - s->s_shrink.seeks = 0; - - pde_get(&proc_root); - root_inode = proc_get_inode(s, &proc_root); - if (!root_inode) { - pr_err("proc_fill_super: get root inode failed\n"); - return -ENOMEM; + inode->i_private = de->data; + inode->i_ino = de->low_ino; + simple_inode_init_ts(inode); + PROC_I(inode)->pde = de; + if (is_empty_pde(de)) { + make_empty_dir_inode(inode); + return inode; } - s->s_root = d_make_root(root_inode); - if (!s->s_root) { - pr_err("proc_fill_super: allocate dentry failed\n"); - return -ENOMEM; + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + set_nlink(inode, de->nlink); - ret = proc_setup_self(s); - if (ret) { - return ret; + if (S_ISREG(inode->i_mode)) { + inode->i_op = de->proc_iops; + if (pde_has_proc_read_iter(de)) + inode->i_fop = &proc_iter_file_ops; + else + inode->i_fop = &proc_reg_file_ops; +#ifdef CONFIG_COMPAT + if (pde_has_proc_compat_ioctl(de)) { + if (pde_has_proc_read_iter(de)) + inode->i_fop = &proc_iter_file_ops_compat; + else + inode->i_fop = &proc_reg_file_ops_compat; + } +#endif + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = de->proc_dir_ops; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = de->proc_iops; + inode->i_fop = NULL; + } else { + BUG(); } - return proc_setup_thread_self(s); + return inode; } |
