From 84d17192d2afd52aeba88c71ae4959a015f56a38 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 15 Mar 2013 10:53:28 -0400
Subject: get rid of full-hash scan on detaching vfsmounts

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h     |   7 ++
 fs/namespace.c | 229 ++++++++++++++++++++++++++++++++++-----------------------
 fs/pnode.c     |   6 +-
 fs/pnode.h     |   4 +-
 4 files changed, 149 insertions(+), 97 deletions(-)

(limited to 'fs')

diff --git a/fs/mount.h b/fs/mount.h
index cd5007980400..64a858143ff9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -18,6 +18,12 @@ struct mnt_pcp {
 	int mnt_writers;
 };
 
+struct mountpoint {
+	struct list_head m_hash;
+	struct dentry *m_dentry;
+	int m_count;
+};
+
 struct mount {
 	struct list_head mnt_hash;
 	struct mount *mnt_parent;
@@ -40,6 +46,7 @@ struct mount {
 	struct list_head mnt_slave;	/* slave list entry */
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
+	struct mountpoint *mnt_mp;	/* where is it mounted */
 #ifdef CONFIG_FSNOTIFY
 	struct hlist_head mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 6c7d31eebba4..d7bb5a55cf36 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -36,6 +36,7 @@ static int mnt_id_start = 0;
 static int mnt_group_start = 1;
 
 static struct list_head *mount_hashtable __read_mostly;
+static struct list_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
@@ -605,6 +606,51 @@ struct vfsmount *lookup_mnt(struct path *path)
 	}
 }
 
+static struct mountpoint *new_mountpoint(struct dentry *dentry)
+{
+	struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
+	struct mountpoint *mp;
+
+	list_for_each_entry(mp, chain, m_hash) {
+		if (mp->m_dentry == dentry) {
+			/* might be worth a WARN_ON() */
+			if (d_unlinked(dentry))
+				return ERR_PTR(-ENOENT);
+			mp->m_count++;
+			return mp;
+		}
+	}
+
+	mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+	if (!mp)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock(&dentry->d_lock);
+	if (d_unlinked(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		kfree(mp);
+		return ERR_PTR(-ENOENT);
+	}
+	dentry->d_flags |= DCACHE_MOUNTED;
+	spin_unlock(&dentry->d_lock);
+	mp->m_dentry = dentry;
+	mp->m_count = 1;
+	list_add(&mp->m_hash, chain);
+	return mp;
+}
+
+static void put_mountpoint(struct mountpoint *mp)
+{
+	if (!--mp->m_count) {
+		struct dentry *dentry = mp->m_dentry;
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_MOUNTED;
+		spin_unlock(&dentry->d_lock);
+		list_del(&mp->m_hash);
+		kfree(mp);
+	}
+}
+
 static inline int check_mnt(struct mount *mnt)
 {
 	return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -632,27 +678,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
 	}
 }
 
-/*
- * Clear dentry's mounted state if it has no remaining mounts.
- * vfsmount_lock must be held for write.
- */
-static void dentry_reset_mounted(struct dentry *dentry)
-{
-	unsigned u;
-
-	for (u = 0; u < HASH_SIZE; u++) {
-		struct mount *p;
-
-		list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
-			if (p->mnt_mountpoint == dentry)
-				return;
-		}
-	}
-	spin_lock(&dentry->d_lock);
-	dentry->d_flags &= ~DCACHE_MOUNTED;
-	spin_unlock(&dentry->d_lock);
-}
-
 /*
  * vfsmount lock must be held for write
  */
@@ -664,32 +689,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
-	dentry_reset_mounted(old_path->dentry);
+	put_mountpoint(mnt->mnt_mp);
+	mnt->mnt_mp = NULL;
 }
 
 /*
  * vfsmount lock must be held for write
  */
-void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
+void mnt_set_mountpoint(struct mount *mnt,
+			struct mountpoint *mp,
 			struct mount *child_mnt)
 {
+	mp->m_count++;
 	mnt_add_count(mnt, 1);	/* essentially, that's mntget */
-	child_mnt->mnt_mountpoint = dget(dentry);
+	child_mnt->mnt_mountpoint = dget(mp->m_dentry);
 	child_mnt->mnt_parent = mnt;
-	spin_lock(&dentry->d_lock);
-	dentry->d_flags |= DCACHE_MOUNTED;
-	spin_unlock(&dentry->d_lock);
+	child_mnt->mnt_mp = mp;
 }
 
 /*
  * vfsmount lock must be held for write
  */
-static void attach_mnt(struct mount *mnt, struct path *path)
+static void attach_mnt(struct mount *mnt,
+			struct mount *parent,
+			struct mountpoint *mp)
 {
-	mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
+	mnt_set_mountpoint(parent, mp, mnt);
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
-			hash(path->mnt, path->dentry));
-	list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
+			hash(&parent->mnt, mp->m_dentry));
+	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
 }
 
 /*
@@ -1138,7 +1166,8 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		list_del_init(&p->mnt_child);
 		if (mnt_has_parent(p)) {
 			p->mnt_parent->mnt_ghosts++;
-			dentry_reset_mounted(p->mnt_mountpoint);
+			put_mountpoint(p->mnt_mp);
+			p->mnt_mp = NULL;
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
@@ -1323,8 +1352,7 @@ static bool mnt_ns_loop(struct path *path)
 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 					int flag)
 {
-	struct mount *res, *p, *q, *r;
-	struct path path;
+	struct mount *res, *p, *q, *r, *parent;
 
 	if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
 		return ERR_PTR(-EINVAL);
@@ -1351,14 +1379,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 				q = q->mnt_parent;
 			}
 			p = s;
-			path.mnt = &q->mnt;
-			path.dentry = p->mnt_mountpoint;
+			parent = q;
 			q = clone_mnt(p, p->mnt.mnt_root, flag);
 			if (IS_ERR(q))
 				goto out;
 			br_write_lock(&vfsmount_lock);
 			list_add_tail(&q->mnt_list, &res->mnt_list);
-			attach_mnt(q, &path);
+			attach_mnt(q, parent, p->mnt_mp);
 			br_write_unlock(&vfsmount_lock);
 		}
 	}
@@ -1505,11 +1532,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
  * in allocations.
  */
 static int attach_recursive_mnt(struct mount *source_mnt,
-			struct path *path, struct path *parent_path)
+			struct mount *dest_mnt,
+			struct mountpoint *dest_mp,
+			struct path *parent_path)
 {
 	LIST_HEAD(tree_list);
-	struct mount *dest_mnt = real_mount(path->mnt);
-	struct dentry *dest_dentry = path->dentry;
 	struct mount *child, *p;
 	int err;
 
@@ -1518,7 +1545,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 		if (err)
 			goto out;
 	}
-	err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
+	err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
 	if (err)
 		goto out_cleanup_ids;
 
@@ -1530,10 +1557,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 	}
 	if (parent_path) {
 		detach_mnt(source_mnt, parent_path);
-		attach_mnt(source_mnt, path);
+		attach_mnt(source_mnt, dest_mnt, dest_mp);
 		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
-		mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
+		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
 		commit_tree(source_mnt);
 	}
 
@@ -1552,46 +1579,53 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 	return err;
 }
 
-static int lock_mount(struct path *path)
+static struct mountpoint *lock_mount(struct path *path)
 {
 	struct vfsmount *mnt;
+	struct dentry *dentry = path->dentry;
 retry:
-	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (unlikely(cant_mount(path->dentry))) {
-		mutex_unlock(&path->dentry->d_inode->i_mutex);
-		return -ENOENT;
+	mutex_lock(&dentry->d_inode->i_mutex);
+	if (unlikely(cant_mount(dentry))) {
+		mutex_unlock(&dentry->d_inode->i_mutex);
+		return ERR_PTR(-ENOENT);
 	}
 	down_write(&namespace_sem);
 	mnt = lookup_mnt(path);
-	if (likely(!mnt))
-		return 0;
+	if (likely(!mnt)) {
+		struct mountpoint *mp = new_mountpoint(dentry);
+		if (IS_ERR(mp)) {
+			up_write(&namespace_sem);
+			mutex_unlock(&dentry->d_inode->i_mutex);
+			return mp;
+		}
+		return mp;
+	}
 	up_write(&namespace_sem);
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
 	path_put(path);
 	path->mnt = mnt;
-	path->dentry = dget(mnt->mnt_root);
+	dentry = path->dentry = dget(mnt->mnt_root);
 	goto retry;
 }
 
-static void unlock_mount(struct path *path)
+static void unlock_mount(struct mountpoint *where)
 {
+	struct dentry *dentry = where->m_dentry;
+	put_mountpoint(where);
 	up_write(&namespace_sem);
-	mutex_unlock(&path->dentry->d_inode->i_mutex);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
-static int graft_tree(struct mount *mnt, struct path *path)
+static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
 {
 	if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
 		return -EINVAL;
 
-	if (S_ISDIR(path->dentry->d_inode->i_mode) !=
+	if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
 	      S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
 		return -ENOTDIR;
 
-	if (d_unlinked(path->dentry))
-		return -ENOENT;
-
-	return attach_recursive_mnt(mnt, path, NULL);
+	return attach_recursive_mnt(mnt, p, mp, NULL);
 }
 
 /*
@@ -1654,7 +1688,8 @@ static int do_loopback(struct path *path, const char *old_name,
 {
 	LIST_HEAD(umount_list);
 	struct path old_path;
-	struct mount *mnt = NULL, *old;
+	struct mount *mnt = NULL, *old, *parent;
+	struct mountpoint *mp;
 	int err;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -1666,17 +1701,19 @@ static int do_loopback(struct path *path, const char *old_name,
 	if (mnt_ns_loop(&old_path))
 		goto out; 
 
-	err = lock_mount(path);
-	if (err)
+	mp = lock_mount(path);
+	err = PTR_ERR(mp);
+	if (IS_ERR(mp))
 		goto out;
 
 	old = real_mount(old_path.mnt);
+	parent = real_mount(path->mnt);
 
 	err = -EINVAL;
 	if (IS_MNT_UNBINDABLE(old))
 		goto out2;
 
-	if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
+	if (!check_mnt(parent) || !check_mnt(old))
 		goto out2;
 
 	if (recurse)
@@ -1689,14 +1726,14 @@ static int do_loopback(struct path *path, const char *old_name,
 		goto out2;
 	}
 
-	err = graft_tree(mnt, path);
+	err = graft_tree(mnt, parent, mp);
 	if (err) {
 		br_write_lock(&vfsmount_lock);
 		umount_tree(mnt, 0, &umount_list);
 		br_write_unlock(&vfsmount_lock);
 	}
 out2:
-	unlock_mount(path);
+	unlock_mount(mp);
 	release_mounts(&umount_list);
 out:
 	path_put(&old_path);
@@ -1779,6 +1816,7 @@ static int do_move_mount(struct path *path, const char *old_name)
 	struct path old_path, parent_path;
 	struct mount *p;
 	struct mount *old;
+	struct mountpoint *mp;
 	int err;
 	if (!old_name || !*old_name)
 		return -EINVAL;
@@ -1786,8 +1824,9 @@ static int do_move_mount(struct path *path, const char *old_name)
 	if (err)
 		return err;
 
-	err = lock_mount(path);
-	if (err < 0)
+	mp = lock_mount(path);
+	err = PTR_ERR(mp);
+	if (IS_ERR(mp))
 		goto out;
 
 	old = real_mount(old_path.mnt);
@@ -1797,9 +1836,6 @@ static int do_move_mount(struct path *path, const char *old_name)
 	if (!check_mnt(p) || !check_mnt(old))
 		goto out1;
 
-	if (d_unlinked(path->dentry))
-		goto out1;
-
 	err = -EINVAL;
 	if (old_path.dentry != old_path.mnt->mnt_root)
 		goto out1;
@@ -1826,7 +1862,7 @@ static int do_move_mount(struct path *path, const char *old_name)
 		if (p == old)
 			goto out1;
 
-	err = attach_recursive_mnt(old, path, &parent_path);
+	err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
 	if (err)
 		goto out1;
 
@@ -1834,7 +1870,7 @@ static int do_move_mount(struct path *path, const char *old_name)
 	 * automatically */
 	list_del_init(&old->mnt_expire);
 out1:
-	unlock_mount(path);
+	unlock_mount(mp);
 out:
 	if (!err)
 		path_put(&parent_path);
@@ -1870,21 +1906,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
  */
 static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 {
+	struct mountpoint *mp;
+	struct mount *parent;
 	int err;
 
 	mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
 
-	err = lock_mount(path);
-	if (err)
-		return err;
+	mp = lock_mount(path);
+	if (IS_ERR(mp))
+		return PTR_ERR(mp);
 
+	parent = real_mount(path->mnt);
 	err = -EINVAL;
-	if (unlikely(!check_mnt(real_mount(path->mnt)))) {
+	if (unlikely(!check_mnt(parent))) {
 		/* that's acceptable only for automounts done in private ns */
 		if (!(mnt_flags & MNT_SHRINKABLE))
 			goto unlock;
 		/* ... and for those we'd better have mountpoint still alive */
-		if (!real_mount(path->mnt)->mnt_ns)
+		if (!parent->mnt_ns)
 			goto unlock;
 	}
 
@@ -1899,10 +1938,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 		goto unlock;
 
 	newmnt->mnt.mnt_flags = mnt_flags;
-	err = graft_tree(newmnt, path);
+	err = graft_tree(newmnt, parent, mp);
 
 unlock:
-	unlock_mount(path);
+	unlock_mount(mp);
 	return err;
 }
 
@@ -2543,7 +2582,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 		const char __user *, put_old)
 {
 	struct path new, old, parent_path, root_parent, root;
-	struct mount *new_mnt, *root_mnt;
+	struct mount *new_mnt, *root_mnt, *old_mnt;
+	struct mountpoint *old_mp, *root_mp;
 	int error;
 
 	if (!may_mount())
@@ -2562,14 +2602,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 		goto out2;
 
 	get_fs_root(current->fs, &root);
-	error = lock_mount(&old);
-	if (error)
+	old_mp = lock_mount(&old);
+	error = PTR_ERR(old_mp);
+	if (IS_ERR(old_mp))
 		goto out3;
 
 	error = -EINVAL;
 	new_mnt = real_mount(new.mnt);
 	root_mnt = real_mount(root.mnt);
-	if (IS_MNT_SHARED(real_mount(old.mnt)) ||
+	old_mnt = real_mount(old.mnt);
+	if (IS_MNT_SHARED(old_mnt) ||
 		IS_MNT_SHARED(new_mnt->mnt_parent) ||
 		IS_MNT_SHARED(root_mnt->mnt_parent))
 		goto out4;
@@ -2578,37 +2620,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	error = -ENOENT;
 	if (d_unlinked(new.dentry))
 		goto out4;
-	if (d_unlinked(old.dentry))
-		goto out4;
 	error = -EBUSY;
-	if (new.mnt == root.mnt ||
-	    old.mnt == root.mnt)
+	if (new_mnt == root_mnt || old_mnt == root_mnt)
 		goto out4; /* loop, on the same file system  */
 	error = -EINVAL;
 	if (root.mnt->mnt_root != root.dentry)
 		goto out4; /* not a mountpoint */
 	if (!mnt_has_parent(root_mnt))
 		goto out4; /* not attached */
+	root_mp = root_mnt->mnt_mp;
 	if (new.mnt->mnt_root != new.dentry)
 		goto out4; /* not a mountpoint */
 	if (!mnt_has_parent(new_mnt))
 		goto out4; /* not attached */
 	/* make sure we can reach put_old from new_root */
-	if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
+	if (!is_path_reachable(old_mnt, old.dentry, &new))
 		goto out4;
+	root_mp->m_count++; /* pin it so it won't go away */
 	br_write_lock(&vfsmount_lock);
 	detach_mnt(new_mnt, &parent_path);
 	detach_mnt(root_mnt, &root_parent);
 	/* mount old root on put_old */
-	attach_mnt(root_mnt, &old);
+	attach_mnt(root_mnt, old_mnt, old_mp);
 	/* mount new_root on / */
-	attach_mnt(new_mnt, &root_parent);
+	attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	br_write_unlock(&vfsmount_lock);
 	chroot_fs_refs(&root, &new);
+	put_mountpoint(root_mp);
 	error = 0;
 out4:
-	unlock_mount(&old);
+	unlock_mount(old_mp);
 	if (!error) {
 		path_put(&root_parent);
 		path_put(&parent_path);
@@ -2663,14 +2705,17 @@ void __init mnt_init(void)
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
 
 	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+	mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
 
-	if (!mount_hashtable)
+	if (!mount_hashtable || !mountpoint_hashtable)
 		panic("Failed to allocate mount hash table\n");
 
 	printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
 
 	for (u = 0; u < HASH_SIZE; u++)
 		INIT_LIST_HEAD(&mount_hashtable[u]);
+	for (u = 0; u < HASH_SIZE; u++)
+		INIT_LIST_HEAD(&mountpoint_hashtable[u]);
 
 	br_lock_init(&vfsmount_lock);
 
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..98e0d3a23fac 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -217,7 +217,7 @@ static struct mount *get_source(struct mount *dest,
  * @source_mnt: source mount.
  * @tree_list : list of heads of trees to be attached.
  */
-int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
+int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
 		    struct mount *source_mnt, struct list_head *tree_list)
 {
 	struct mount *m, *child;
@@ -244,8 +244,8 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
 			goto out;
 		}
 
-		if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
-			mnt_set_mountpoint(m, dest_dentry, child);
+		if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+			mnt_set_mountpoint(m, dest_mp, child);
 			list_add_tail(&child->mnt_hash, tree_list);
 		} else {
 			/*
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..f4357d3a0a44 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -31,14 +31,14 @@ static inline void set_mnt_shared(struct mount *mnt)
 }
 
 void change_mnt_propagation(struct mount *, int);
-int propagate_mnt(struct mount *, struct dentry *, struct mount *,
+int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
 		struct list_head *);
 int propagate_umount(struct list_head *);
 int propagate_mount_busy(struct mount *, int);
 void mnt_release_group_id(struct mount *);
 int get_dominating_id(struct mount *mnt, const struct path *root);
 unsigned int mnt_get_count(struct mount *mnt);
-void mnt_set_mountpoint(struct mount *, struct dentry *,
+void mnt_set_mountpoint(struct mount *, struct mountpoint *,
 			struct mount *);
 void release_mounts(struct list_head *);
 void umount_tree(struct mount *, int, struct list_head *);
-- 
cgit 


From e3197d83d6f5b9bd0e57a05592437ffa459ee106 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Mar 2013 14:35:16 -0400
Subject: saner umount_tree()/release_mounts(), part 1

global list of release_mounts() fodder, protected by namespace_sem;
eventually, all umount_tree() callers will use it as kill list.
Helper picking the contents of that list, releasing namespace_sem
and doing release_mounts() on what it got.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index d7bb5a55cf36..0d91711a3160 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1119,6 +1119,8 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
+static LIST_HEAD(unmounted);	/* protected by namespace_sem */
+
 void release_mounts(struct list_head *head)
 {
 	struct mount *mnt;
@@ -1143,6 +1145,14 @@ void release_mounts(struct list_head *head)
 	}
 }
 
+static void namespace_unlock(void)
+{
+	LIST_HEAD(head);
+	list_splice_init(&unmounted, &head);
+	up_write(&namespace_sem);
+	release_mounts(&head);
+}
+
 /*
  * vfsmount lock must be held for write
  * namespace_sem must be held for write
@@ -1252,17 +1262,16 @@ static int do_umount(struct mount *mnt, int flags)
 	event++;
 
 	if (!(flags & MNT_DETACH))
-		shrink_submounts(mnt, &umount_list);
+		shrink_submounts(mnt, &unmounted);
 
 	retval = -EBUSY;
 	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
 		if (!list_empty(&mnt->mnt_list))
-			umount_tree(mnt, 1, &umount_list);
+			umount_tree(mnt, 1, &unmounted);
 		retval = 0;
 	}
 	br_write_unlock(&vfsmount_lock);
-	up_write(&namespace_sem);
-	release_mounts(&umount_list);
+	namespace_unlock();
 	return retval;
 }
 
-- 
cgit 


From b54b9be7824d84158cd90305820e2c3914f74ad9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Mar 2013 14:39:34 -0400
Subject: get rid of the second argument of shrink_submounts()

... it's always &unmounted.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 0d91711a3160..c04afaf0e3dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1184,7 +1184,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 	list_splice(&tmp_list, kill);
 }
 
-static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
+static void shrink_submounts(struct mount *mnt);
 
 static int do_umount(struct mount *mnt, int flags)
 {
@@ -1262,7 +1262,7 @@ static int do_umount(struct mount *mnt, int flags)
 	event++;
 
 	if (!(flags & MNT_DETACH))
-		shrink_submounts(mnt, &unmounted);
+		shrink_submounts(mnt);
 
 	retval = -EBUSY;
 	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
@@ -2145,7 +2145,7 @@ resume:
  *
  * vfsmount_lock must be held for write
  */
-static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
+static void shrink_submounts(struct mount *mnt)
 {
 	LIST_HEAD(graveyard);
 	struct mount *m;
@@ -2156,7 +2156,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
 			m = list_first_entry(&graveyard, struct mount,
 						mnt_expire);
 			touch_mnt_namespace(m->mnt_ns);
-			umount_tree(m, 1, umounts);
+			umount_tree(m, 1, &unmounted);
 		}
 	}
 }
-- 
cgit 


From 3ab6abee59ac9ca84cc4a1e31224f1dccd44394c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Mar 2013 14:42:19 -0400
Subject: more conversions to namespace_unlock()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index c04afaf0e3dc..7563270a43ab 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1426,13 +1426,11 @@ struct vfsmount *collect_mounts(struct path *path)
 
 void drop_collected_mounts(struct vfsmount *mnt)
 {
-	LIST_HEAD(umount_list);
 	down_write(&namespace_sem);
 	br_write_lock(&vfsmount_lock);
-	umount_tree(real_mount(mnt), 0, &umount_list);
+	umount_tree(real_mount(mnt), 0, &unmounted);
 	br_write_unlock(&vfsmount_lock);
-	up_write(&namespace_sem);
-	release_mounts(&umount_list);
+	namespace_unlock();
 }
 
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
@@ -2060,7 +2058,6 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 {
 	struct mount *mnt, *next;
 	LIST_HEAD(graveyard);
-	LIST_HEAD(umounts);
 
 	if (list_empty(mounts))
 		return;
@@ -2083,12 +2080,10 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	while (!list_empty(&graveyard)) {
 		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
 		touch_mnt_namespace(mnt->mnt_ns);
-		umount_tree(mnt, 1, &umounts);
+		umount_tree(mnt, 1, &unmounted);
 	}
 	br_write_unlock(&vfsmount_lock);
-	up_write(&namespace_sem);
-
-	release_mounts(&umounts);
+	namespace_unlock();
 }
 
 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -2741,16 +2736,13 @@ void __init mnt_init(void)
 
 void put_mnt_ns(struct mnt_namespace *ns)
 {
-	LIST_HEAD(umount_list);
-
 	if (!atomic_dec_and_test(&ns->count))
 		return;
 	down_write(&namespace_sem);
 	br_write_lock(&vfsmount_lock);
-	umount_tree(ns->root, 0, &umount_list);
+	umount_tree(ns->root, 0, &unmounted);
 	br_write_unlock(&vfsmount_lock);
-	up_write(&namespace_sem);
-	release_mounts(&umount_list);
+	namespace_unlock();
 	free_mnt_ns(ns);
 }
 
-- 
cgit 


From 328e6d9014636afc2b3c979403b36faadb412657 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Mar 2013 14:49:45 -0400
Subject: switch unlock_mount() to namespace_unlock(), convert all
 umount_tree() callers

which allows to kill the last argument of umount_tree() and make release_mounts()
static.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 33 ++++++++++++++-------------------
 fs/pnode.c     |  4 +---
 fs/pnode.h     |  3 +--
 3 files changed, 16 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7563270a43ab..fa93d54d21e8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1121,7 +1121,7 @@ EXPORT_SYMBOL(may_umount);
 
 static LIST_HEAD(unmounted);	/* protected by namespace_sem */
 
-void release_mounts(struct list_head *head)
+static void release_mounts(struct list_head *head)
 {
 	struct mount *mnt;
 	while (!list_empty(head)) {
@@ -1157,7 +1157,7 @@ static void namespace_unlock(void)
  * vfsmount lock must be held for write
  * namespace_sem must be held for write
  */
-void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
+void umount_tree(struct mount *mnt, int propagate)
 {
 	LIST_HEAD(tmp_list);
 	struct mount *p;
@@ -1181,7 +1181,7 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
 	}
-	list_splice(&tmp_list, kill);
+	list_splice(&tmp_list, &unmounted);
 }
 
 static void shrink_submounts(struct mount *mnt);
@@ -1190,7 +1190,6 @@ static int do_umount(struct mount *mnt, int flags)
 {
 	struct super_block *sb = mnt->mnt.mnt_sb;
 	int retval;
-	LIST_HEAD(umount_list);
 
 	retval = security_sb_umount(&mnt->mnt, flags);
 	if (retval)
@@ -1267,7 +1266,7 @@ static int do_umount(struct mount *mnt, int flags)
 	retval = -EBUSY;
 	if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
 		if (!list_empty(&mnt->mnt_list))
-			umount_tree(mnt, 1, &unmounted);
+			umount_tree(mnt, 1);
 		retval = 0;
 	}
 	br_write_unlock(&vfsmount_lock);
@@ -1401,11 +1400,9 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 	return res;
 out:
 	if (res) {
-		LIST_HEAD(umount_list);
 		br_write_lock(&vfsmount_lock);
-		umount_tree(res, 0, &umount_list);
+		umount_tree(res, 0);
 		br_write_unlock(&vfsmount_lock);
-		release_mounts(&umount_list);
 	}
 	return q;
 }
@@ -1418,7 +1415,7 @@ struct vfsmount *collect_mounts(struct path *path)
 	down_write(&namespace_sem);
 	tree = copy_tree(real_mount(path->mnt), path->dentry,
 			 CL_COPY_ALL | CL_PRIVATE);
-	up_write(&namespace_sem);
+	namespace_unlock();
 	if (IS_ERR(tree))
 		return NULL;
 	return &tree->mnt;
@@ -1428,7 +1425,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
 	down_write(&namespace_sem);
 	br_write_lock(&vfsmount_lock);
-	umount_tree(real_mount(mnt), 0, &unmounted);
+	umount_tree(real_mount(mnt), 0);
 	br_write_unlock(&vfsmount_lock);
 	namespace_unlock();
 }
@@ -1619,7 +1616,7 @@ static void unlock_mount(struct mountpoint *where)
 {
 	struct dentry *dentry = where->m_dentry;
 	put_mountpoint(where);
-	up_write(&namespace_sem);
+	namespace_unlock();
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
@@ -1693,7 +1690,6 @@ static int do_change_type(struct path *path, int flag)
 static int do_loopback(struct path *path, const char *old_name,
 				int recurse)
 {
-	LIST_HEAD(umount_list);
 	struct path old_path;
 	struct mount *mnt = NULL, *old, *parent;
 	struct mountpoint *mp;
@@ -1736,12 +1732,11 @@ static int do_loopback(struct path *path, const char *old_name,
 	err = graft_tree(mnt, parent, mp);
 	if (err) {
 		br_write_lock(&vfsmount_lock);
-		umount_tree(mnt, 0, &umount_list);
+		umount_tree(mnt, 0);
 		br_write_unlock(&vfsmount_lock);
 	}
 out2:
 	unlock_mount(mp);
-	release_mounts(&umount_list);
 out:
 	path_put(&old_path);
 	return err;
@@ -2080,7 +2075,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	while (!list_empty(&graveyard)) {
 		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
 		touch_mnt_namespace(mnt->mnt_ns);
-		umount_tree(mnt, 1, &unmounted);
+		umount_tree(mnt, 1);
 	}
 	br_write_unlock(&vfsmount_lock);
 	namespace_unlock();
@@ -2151,7 +2146,7 @@ static void shrink_submounts(struct mount *mnt)
 			m = list_first_entry(&graveyard, struct mount,
 						mnt_expire);
 			touch_mnt_namespace(m->mnt_ns);
-			umount_tree(m, 1, &unmounted);
+			umount_tree(m, 1);
 		}
 	}
 }
@@ -2385,7 +2380,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		copy_flags |= CL_SHARED_TO_SLAVE;
 	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
 	if (IS_ERR(new)) {
-		up_write(&namespace_sem);
+		namespace_unlock();
 		free_mnt_ns(new_ns);
 		return ERR_CAST(new);
 	}
@@ -2416,7 +2411,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		p = next_mnt(p, old);
 		q = next_mnt(q, new);
 	}
-	up_write(&namespace_sem);
+	namespace_unlock();
 
 	if (rootmnt)
 		mntput(rootmnt);
@@ -2740,7 +2735,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
 		return;
 	down_write(&namespace_sem);
 	br_write_lock(&vfsmount_lock);
-	umount_tree(ns->root, 0, &unmounted);
+	umount_tree(ns->root, 0);
 	br_write_unlock(&vfsmount_lock);
 	namespace_unlock();
 	free_mnt_ns(ns);
diff --git a/fs/pnode.c b/fs/pnode.c
index 98e0d3a23fac..43617258fa6a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -225,7 +225,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
 	struct mount *prev_dest_mnt = dest_mnt;
 	struct mount *prev_src_mnt  = source_mnt;
 	LIST_HEAD(tmp_list);
-	LIST_HEAD(umount_list);
 
 	for (m = propagation_next(dest_mnt, dest_mnt); m;
 			m = propagation_next(m, dest_mnt)) {
@@ -261,10 +260,9 @@ out:
 	br_write_lock(&vfsmount_lock);
 	while (!list_empty(&tmp_list)) {
 		child = list_first_entry(&tmp_list, struct mount, mnt_hash);
-		umount_tree(child, 0, &umount_list);
+		umount_tree(child, 0);
 	}
 	br_write_unlock(&vfsmount_lock);
-	release_mounts(&umount_list);
 	return ret;
 }
 
diff --git a/fs/pnode.h b/fs/pnode.h
index f4357d3a0a44..9eb00ee65bbe 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -40,8 +40,7 @@ int get_dominating_id(struct mount *mnt, const struct path *root);
 unsigned int mnt_get_count(struct mount *mnt);
 void mnt_set_mountpoint(struct mount *, struct mountpoint *,
 			struct mount *);
-void release_mounts(struct list_head *);
-void umount_tree(struct mount *, int, struct list_head *);
+void umount_tree(struct mount *, int);
 struct mount *copy_tree(struct mount *, struct dentry *, int);
 bool is_path_reachable(struct mount *, struct dentry *,
 			 const struct path *root);
-- 
cgit 


From 97216be09efd41414725068212e3af0f05cde11a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 16 Mar 2013 15:12:40 -0400
Subject: fold release_mounts() into namespace_unlock()

... and provide namespace_lock() as a trivial wrapper;
switch to those two consistently.

Result is patterned after rtnl_lock/rtnl_unlock pair.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 53 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 30 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index fa93d54d21e8..ed0708f2415f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1121,11 +1121,21 @@ EXPORT_SYMBOL(may_umount);
 
 static LIST_HEAD(unmounted);	/* protected by namespace_sem */
 
-static void release_mounts(struct list_head *head)
+static void namespace_unlock(void)
 {
 	struct mount *mnt;
-	while (!list_empty(head)) {
-		mnt = list_first_entry(head, struct mount, mnt_hash);
+	LIST_HEAD(head);
+
+	if (likely(list_empty(&unmounted))) {
+		up_write(&namespace_sem);
+		return;
+	}
+
+	list_splice_init(&unmounted, &head);
+	up_write(&namespace_sem);
+
+	while (!list_empty(&head)) {
+		mnt = list_first_entry(&head, struct mount, mnt_hash);
 		list_del_init(&mnt->mnt_hash);
 		if (mnt_has_parent(mnt)) {
 			struct dentry *dentry;
@@ -1145,12 +1155,9 @@ static void release_mounts(struct list_head *head)
 	}
 }
 
-static void namespace_unlock(void)
+static inline void namespace_lock(void)
 {
-	LIST_HEAD(head);
-	list_splice_init(&unmounted, &head);
-	up_write(&namespace_sem);
-	release_mounts(&head);
+	down_write(&namespace_sem);
 }
 
 /*
@@ -1256,7 +1263,7 @@ static int do_umount(struct mount *mnt, int flags)
 		return retval;
 	}
 
-	down_write(&namespace_sem);
+	namespace_lock();
 	br_write_lock(&vfsmount_lock);
 	event++;
 
@@ -1412,7 +1419,7 @@ out:
 struct vfsmount *collect_mounts(struct path *path)
 {
 	struct mount *tree;
-	down_write(&namespace_sem);
+	namespace_lock();
 	tree = copy_tree(real_mount(path->mnt), path->dentry,
 			 CL_COPY_ALL | CL_PRIVATE);
 	namespace_unlock();
@@ -1423,7 +1430,7 @@ struct vfsmount *collect_mounts(struct path *path)
 
 void drop_collected_mounts(struct vfsmount *mnt)
 {
-	down_write(&namespace_sem);
+	namespace_lock();
 	br_write_lock(&vfsmount_lock);
 	umount_tree(real_mount(mnt), 0);
 	br_write_unlock(&vfsmount_lock);
@@ -1593,18 +1600,18 @@ retry:
 		mutex_unlock(&dentry->d_inode->i_mutex);
 		return ERR_PTR(-ENOENT);
 	}
-	down_write(&namespace_sem);
+	namespace_lock();
 	mnt = lookup_mnt(path);
 	if (likely(!mnt)) {
 		struct mountpoint *mp = new_mountpoint(dentry);
 		if (IS_ERR(mp)) {
-			up_write(&namespace_sem);
+			namespace_unlock();
 			mutex_unlock(&dentry->d_inode->i_mutex);
 			return mp;
 		}
 		return mp;
 	}
-	up_write(&namespace_sem);
+	namespace_unlock();
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
 	path_put(path);
 	path->mnt = mnt;
@@ -1667,7 +1674,7 @@ static int do_change_type(struct path *path, int flag)
 	if (!type)
 		return -EINVAL;
 
-	down_write(&namespace_sem);
+	namespace_lock();
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
 		if (err)
@@ -1680,7 +1687,7 @@ static int do_change_type(struct path *path, int flag)
 	br_write_unlock(&vfsmount_lock);
 
  out_unlock:
-	up_write(&namespace_sem);
+	namespace_unlock();
 	return err;
 }
 
@@ -2016,11 +2023,11 @@ int finish_automount(struct vfsmount *m, struct path *path)
 fail:
 	/* remove m from any expiration list it may be on */
 	if (!list_empty(&mnt->mnt_expire)) {
-		down_write(&namespace_sem);
+		namespace_lock();
 		br_write_lock(&vfsmount_lock);
 		list_del_init(&mnt->mnt_expire);
 		br_write_unlock(&vfsmount_lock);
-		up_write(&namespace_sem);
+		namespace_unlock();
 	}
 	mntput(m);
 	mntput(m);
@@ -2034,13 +2041,13 @@ fail:
  */
 void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
 {
-	down_write(&namespace_sem);
+	namespace_lock();
 	br_write_lock(&vfsmount_lock);
 
 	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
 
 	br_write_unlock(&vfsmount_lock);
-	up_write(&namespace_sem);
+	namespace_unlock();
 }
 EXPORT_SYMBOL(mnt_set_expiry);
 
@@ -2057,7 +2064,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	if (list_empty(mounts))
 		return;
 
-	down_write(&namespace_sem);
+	namespace_lock();
 	br_write_lock(&vfsmount_lock);
 
 	/* extract from the expiration list every vfsmount that matches the
@@ -2373,7 +2380,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	if (IS_ERR(new_ns))
 		return new_ns;
 
-	down_write(&namespace_sem);
+	namespace_lock();
 	/* First pass: copy the tree topology */
 	copy_flags = CL_COPY_ALL | CL_EXPIRE;
 	if (user_ns != mnt_ns->user_ns)
@@ -2733,7 +2740,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
 {
 	if (!atomic_dec_and_test(&ns->count))
 		return;
-	down_write(&namespace_sem);
+	namespace_lock();
 	br_write_lock(&vfsmount_lock);
 	umount_tree(ns->root, 0);
 	br_write_unlock(&vfsmount_lock);
-- 
cgit 


From d5daaaff24026d59130e97a406f2999118bafdc3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Mar 2013 19:46:45 -0400
Subject: reiserfs: don't wank with EFBIG before calling do_sync_write()

look for file_capable() in there...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/file.c | 61 +-----------------------------------------------------
 1 file changed, 1 insertion(+), 60 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6165bd4784f6..dcaafcfc23b0 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -234,68 +234,9 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
 	return ret;
 }
 
-/* Write @count bytes at position @ppos in a file indicated by @file
-   from the buffer @buf.
-
-   generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
-   something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was
-   written for (ext2/3).  This is for several reasons:
-
-   * It has no understanding of any filesystem specific optimizations.
-
-   * It enters the filesystem repeatedly for each page that is written.
-
-   * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
-   * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
-   * to reiserfs which allows for fewer tree traversals.
-
-   * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
-
-   * Asking the block allocation code for blocks one at a time is slightly less efficient.
-
-   All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
-   use it, but we were in a hurry to make code freeze, and so it couldn't be revised then.  This new code should make
-   things right finally.
-
-   Future Features: providing search_by_key with hints.
-
-*/
-static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going to write into */
-				   const char __user * buf,	/*  pointer to user supplied data
-								   (in userspace) */
-				   size_t count,	/* amount of bytes to write */
-				   loff_t * ppos	/* pointer to position in file that we start writing at. Should be updated to
-							 * new current position before returning. */
-				   )
-{
-	struct inode *inode = file_inode(file);	// Inode of the file that we are writing to.
-	/* To simplify coding at this time, we store
-	   locked pages in array for now */
-	struct reiserfs_transaction_handle th;
-	th.t_trans_id = 0;
-
-	/* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
-	* lying around (most of the disk, in fact). Despite the filesystem
-	* now being a v3.6 format, the old items still can't support large
-	* file sizes. Catch this case here, as the rest of the VFS layer is
-	* oblivious to the different limitations between old and new items.
-	* reiserfs_setattr catches this for truncates. This chunk is lifted
-	* from generic_write_checks. */
-	if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
-	    *ppos + count > MAX_NON_LFS) {
-		if (*ppos >= MAX_NON_LFS) {
-			return -EFBIG;
-		}
-		if (count > MAX_NON_LFS - (unsigned long)*ppos)
-			count = MAX_NON_LFS - (unsigned long)*ppos;
-	}
-
-	return do_sync_write(file, buf, count, ppos);
-}
-
 const struct file_operations reiserfs_file_operations = {
 	.read = do_sync_read,
-	.write = reiserfs_file_write,
+	.write = do_sync_write,
 	.unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
-- 
cgit 


From 5f2e354f5212a49fc639c25de2581cc8ae90d11b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Mar 2013 20:35:00 -0400
Subject: hpfs: move setting hpfs-private i_dirty to ->write_end()

... so that writev(2) doesn't miss it.  Get rid of hpfs_file_write().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hpfs/file.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9f9dbeceeee7..3027f4dbbab5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -131,6 +131,24 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
 	return ret;
 }
 
+static int hpfs_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *pagep, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	int err;
+	err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+	if (err < len)
+		hpfs_write_failed(mapping, pos + len);
+	if (!(err < 0)) {
+		/* make sure we write it on close, if not earlier */
+		hpfs_lock(inode->i_sb);
+		hpfs_i(inode)->i_dirty = 1;
+		hpfs_unlock(inode->i_sb);
+	}
+	return err;
+}
+
 static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,hpfs_get_block);
@@ -140,30 +158,16 @@ const struct address_space_operations hpfs_aops = {
 	.readpage = hpfs_readpage,
 	.writepage = hpfs_writepage,
 	.write_begin = hpfs_write_begin,
-	.write_end = generic_write_end,
+	.write_end = hpfs_write_end,
 	.bmap = _hpfs_bmap
 };
 
-static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
-			size_t count, loff_t *ppos)
-{
-	ssize_t retval;
-
-	retval = do_sync_write(file, buf, count, ppos);
-	if (retval > 0) {
-		hpfs_lock(file->f_path.dentry->d_sb);
-		hpfs_i(file_inode(file))->i_dirty = 1;
-		hpfs_unlock(file->f_path.dentry->d_sb);
-	}
-	return retval;
-}
-
 const struct file_operations hpfs_file_ops =
 {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
-	.write		= hpfs_file_write,
+	.write		= do_sync_write,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.release	= hpfs_file_release,
-- 
cgit 


From 8d71db4f0890605d44815a2b2da4ca003f1bb142 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 19 Mar 2013 21:01:03 -0400
Subject: lift sb_start_write/sb_end_write out of ->aio_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c          | 4 ++++
 fs/btrfs/file.c   | 3 ---
 fs/cifs/file.c    | 3 ---
 fs/compat.c       | 6 ++++--
 fs/fuse/file.c    | 2 --
 fs/ntfs/file.c    | 2 --
 fs/ocfs2/file.c   | 3 ---
 fs/read_write.c   | 8 ++++++--
 fs/xfs/xfs_file.c | 3 ---
 9 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 3f941f2a3059..4ec28f13a92e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1324,6 +1324,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
 	if (iocb->ki_pos < 0)
 		return -EINVAL;
 
+	if (opcode == IOCB_CMD_PWRITEV)
+		file_start_write(file);
 	do {
 		ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
 			    iocb->ki_nr_segs - iocb->ki_cur_seg,
@@ -1336,6 +1338,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
 	} while (ret > 0 && iocb->ki_left > 0 &&
 		 (opcode == IOCB_CMD_PWRITEV ||
 		  (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
+	if (opcode == IOCB_CMD_PWRITEV)
+		file_end_write(file);
 
 	/* This means we must have transferred all that we could */
 	/* No need to retry anymore */
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b4ea5f55b8f..254aeb72915f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1514,8 +1514,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	size_t count, ocount;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 
-	sb_start_write(inode->i_sb);
-
 	mutex_lock(&inode->i_mutex);
 
 	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -1617,7 +1615,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	if (sync)
 		atomic_dec(&BTRFS_I(inode)->sync_writers);
 out:
-	sb_end_write(inode->i_sb);
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7a0dd99e4507..2d4a231dd70b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2520,8 +2520,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	sb_start_write(inode->i_sb);
-
 	/*
 	 * We need to hold the sem to be sure nobody modifies lock list
 	 * with a brlock that prevents writing.
@@ -2545,7 +2543,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
 	}
 
 	up_read(&cinode->lock_sem);
-	sb_end_write(inode->i_sb);
 	return rc;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index d487985dd0ea..daa3b771d64d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1103,10 +1103,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 		fnv = file->f_op->aio_write;
 	}
 
-	if (fnv)
+	if (fnv) {
+		file_start_write(file);
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
-	else
+		file_end_write(file);
+	} else
 		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 
 out:
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34b80ba95bad..d15c6f21c17f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -971,7 +971,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		return err;
 
 	count = ocount;
-	sb_start_write(inode->i_sb);
 	mutex_lock(&inode->i_mutex);
 
 	/* We can write back this queue in page reclaim */
@@ -1030,7 +1029,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 out:
 	current->backing_dev_info = NULL;
 	mutex_unlock(&inode->i_mutex);
-	sb_end_write(inode->i_sb);
 
 	return written ? written : err;
 }
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0853ac..1da4b81e6f76 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2129,7 +2129,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	sb_start_write(inode->i_sb);
 	mutex_lock(&inode->i_mutex);
 	ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
@@ -2138,7 +2137,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		if (err < 0)
 			ret = err;
 	}
-	sb_end_write(inode->i_sb);
 	return ret;
 }
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6474cb44004d..1c93e771e950 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2248,8 +2248,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	if (iocb->ki_left == 0)
 		return 0;
 
-	sb_start_write(inode->i_sb);
-
 	appending = file->f_flags & O_APPEND ? 1 : 0;
 	direct_io = file->f_flags & O_DIRECT ? 1 : 0;
 
@@ -2423,7 +2421,6 @@ out_sems:
 		ocfs2_iocb_clear_sem_locked(iocb);
 
 	mutex_unlock(&inode->i_mutex);
-	sb_end_write(inode->i_sb);
 
 	if (written)
 		ret = written;
diff --git a/fs/read_write.c b/fs/read_write.c
index f7b5a23b804b..3e1791a2cfd6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -398,6 +398,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 	struct kiocb kiocb;
 	ssize_t ret;
 
+	file_start_write(filp);
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
 	kiocb.ki_left = len;
@@ -413,6 +414,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
+	file_end_write(filp);
 	return ret;
 }
 
@@ -758,10 +760,12 @@ static ssize_t do_readv_writev(int type, struct file *file,
 		fnv = file->f_op->aio_write;
 	}
 
-	if (fnv)
+	if (fnv) {
+		file_start_write(file);
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
-	else
+		file_end_write(file);
+	} else
 		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 
 out:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f03bf1a456fb..3800128d2171 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -775,8 +775,6 @@ xfs_file_aio_write(
 	if (ocount == 0)
 		return 0;
 
-	sb_start_write(inode->i_sb);
-
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		ret = -EIO;
 		goto out;
@@ -800,7 +798,6 @@ xfs_file_aio_write(
 	}
 
 out:
-	sb_end_write(inode->i_sb);
 	return ret;
 }
 
-- 
cgit 


From bdaec334bbe7d234ca6ddd81aa74b2938d40e6b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Mar 2013 09:33:23 -0400
Subject: f2fs: use mnt_want_write_file() in ioctl

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/f2fs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 958a46da19ae..db626282d424 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,7 +590,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	{
 		unsigned int oldflags;
 
-		ret = mnt_want_write(filp->f_path.mnt);
+		ret = mnt_want_write_file(filp);
 		if (ret)
 			return ret;
 
@@ -627,7 +627,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		inode->i_ctime = CURRENT_TIME;
 		mark_inode_dirty(inode);
 out:
-		mnt_drop_write(filp->f_path.mnt);
+		mnt_drop_write_file(filp);
 		return ret;
 	}
 	default:
-- 
cgit 


From 72ec35163f9f728ba1579fd80682e51e933dfa8a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Mar 2013 10:42:10 -0400
Subject: switch compat readv/writev variants to COMPAT_SYSCALL_DEFINE

... and take to fs/read_write.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c     | 186 ----------------------------------------------------
 fs/read_write.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/read_write.h |   4 --
 3 files changed, 195 insertions(+), 192 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index daa3b771d64d..5058345dc279 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1069,192 +1069,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 }
 #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
 
-static ssize_t compat_do_readv_writev(int type, struct file *file,
-			       const struct compat_iovec __user *uvector,
-			       unsigned long nr_segs, loff_t *pos)
-{
-	compat_ssize_t tot_len;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	ssize_t ret;
-	io_fn_t fn;
-	iov_fn_t fnv;
-
-	ret = -EINVAL;
-	if (!file->f_op)
-		goto out;
-
-	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-					       UIO_FASTIOV, iovstack, &iov);
-	if (ret <= 0)
-		goto out;
-
-	tot_len = ret;
-	ret = rw_verify_area(type, file, pos, tot_len);
-	if (ret < 0)
-		goto out;
-
-	fnv = NULL;
-	if (type == READ) {
-		fn = file->f_op->read;
-		fnv = file->f_op->aio_read;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		fnv = file->f_op->aio_write;
-	}
-
-	if (fnv) {
-		file_start_write(file);
-		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-						pos, fnv);
-		file_end_write(file);
-	} else
-		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
-
-out:
-	if (iov != iovstack)
-		kfree(iov);
-	if ((ret + (type == READ)) > 0) {
-		if (type == READ)
-			fsnotify_access(file);
-		else
-			fsnotify_modify(file);
-	}
-	return ret;
-}
-
-static size_t compat_readv(struct file *file,
-			   const struct compat_iovec __user *vec,
-			   unsigned long vlen, loff_t *pos)
-{
-	ssize_t ret = -EBADF;
-
-	if (!(file->f_mode & FMODE_READ))
-		goto out;
-
-	ret = -EINVAL;
-	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
-		goto out;
-
-	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
-
-out:
-	if (ret > 0)
-		add_rchar(current, ret);
-	inc_syscr(current);
-	return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
-		 unsigned long vlen)
-{
-	struct fd f = fdget(fd);
-	ssize_t ret;
-	loff_t pos;
-
-	if (!f.file)
-		return -EBADF;
-	pos = f.file->f_pos;
-	ret = compat_readv(f.file, vec, vlen, &pos);
-	f.file->f_pos = pos;
-	fdput(f);
-	return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
-		    unsigned long vlen, loff_t pos)
-{
-	struct fd f;
-	ssize_t ret;
-
-	if (pos < 0)
-		return -EINVAL;
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
-	ret = -ESPIPE;
-	if (f.file->f_mode & FMODE_PREAD)
-		ret = compat_readv(f.file, vec, vlen, &pos);
-	fdput(f);
-	return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
-		  unsigned long vlen, u32 pos_low, u32 pos_high)
-{
-	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
-	return compat_sys_preadv64(fd, vec, vlen, pos);
-}
-
-static size_t compat_writev(struct file *file,
-			    const struct compat_iovec __user *vec,
-			    unsigned long vlen, loff_t *pos)
-{
-	ssize_t ret = -EBADF;
-
-	if (!(file->f_mode & FMODE_WRITE))
-		goto out;
-
-	ret = -EINVAL;
-	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
-		goto out;
-
-	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
-
-out:
-	if (ret > 0)
-		add_wchar(current, ret);
-	inc_syscw(current);
-	return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
-		  unsigned long vlen)
-{
-	struct fd f = fdget(fd);
-	ssize_t ret;
-	loff_t pos;
-
-	if (!f.file)
-		return -EBADF;
-	pos = f.file->f_pos;
-	ret = compat_writev(f.file, vec, vlen, &pos);
-	f.file->f_pos = pos;
-	fdput(f);
-	return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
-		     unsigned long vlen, loff_t pos)
-{
-	struct fd f;
-	ssize_t ret;
-
-	if (pos < 0)
-		return -EINVAL;
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
-	ret = -ESPIPE;
-	if (f.file->f_mode & FMODE_PWRITE)
-		ret = compat_writev(f.file, vec, vlen, &pos);
-	fdput(f);
-	return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
-		   unsigned long vlen, u32 pos_low, u32 pos_high)
-{
-	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
-	return compat_sys_pwritev64(fd, vec, vlen, pos);
-}
-
 asmlinkage long
 compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
 		    unsigned int nr_segs, unsigned int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 3e1791a2cfd6..e6dd1c2d0592 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -591,7 +591,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);
 
-ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
 	struct kiocb kiocb;
@@ -616,7 +616,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 }
 
 /* Do it by hand, with file-ops */
-ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 		unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 {
 	struct iovec *vector = iov;
@@ -898,6 +898,199 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+
+static ssize_t compat_do_readv_writev(int type, struct file *file,
+			       const struct compat_iovec __user *uvector,
+			       unsigned long nr_segs, loff_t *pos)
+{
+	compat_ssize_t tot_len;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	ssize_t ret;
+	io_fn_t fn;
+	iov_fn_t fnv;
+
+	ret = -EINVAL;
+	if (!file->f_op)
+		goto out;
+
+	ret = -EFAULT;
+	if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
+		goto out;
+
+	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
+					       UIO_FASTIOV, iovstack, &iov);
+	if (ret <= 0)
+		goto out;
+
+	tot_len = ret;
+	ret = rw_verify_area(type, file, pos, tot_len);
+	if (ret < 0)
+		goto out;
+
+	fnv = NULL;
+	if (type == READ) {
+		fn = file->f_op->read;
+		fnv = file->f_op->aio_read;
+	} else {
+		fn = (io_fn_t)file->f_op->write;
+		fnv = file->f_op->aio_write;
+	}
+
+	if (fnv) {
+		file_start_write(file);
+		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+						pos, fnv);
+		file_end_write(file);
+	} else
+		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+
+out:
+	if (iov != iovstack)
+		kfree(iov);
+	if ((ret + (type == READ)) > 0) {
+		if (type == READ)
+			fsnotify_access(file);
+		else
+			fsnotify_modify(file);
+	}
+	return ret;
+}
+
+static size_t compat_readv(struct file *file,
+			   const struct compat_iovec __user *vec,
+			   unsigned long vlen, loff_t *pos)
+{
+	ssize_t ret = -EBADF;
+
+	if (!(file->f_mode & FMODE_READ))
+		goto out;
+
+	ret = -EINVAL;
+	if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+		goto out;
+
+	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+
+out:
+	if (ret > 0)
+		add_rchar(current, ret);
+	inc_syscr(current);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen)
+{
+	struct fd f = fdget(fd);
+	ssize_t ret;
+	loff_t pos;
+
+	if (!f.file)
+		return -EBADF;
+	pos = f.file->f_pos;
+	ret = compat_readv(f.file, vec, vlen, &pos);
+	f.file->f_pos = pos;
+	fdput(f);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, loff_t, pos)
+{
+	struct fd f;
+	ssize_t ret;
+
+	if (pos < 0)
+		return -EINVAL;
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+	ret = -ESPIPE;
+	if (f.file->f_mode & FMODE_PREAD)
+		ret = compat_readv(f.file, vec, vlen, &pos);
+	fdput(f);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, u32, pos_low, u32, pos_high)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	return compat_sys_preadv64(fd, vec, vlen, pos);
+}
+
+static size_t compat_writev(struct file *file,
+			    const struct compat_iovec __user *vec,
+			    unsigned long vlen, loff_t *pos)
+{
+	ssize_t ret = -EBADF;
+
+	if (!(file->f_mode & FMODE_WRITE))
+		goto out;
+
+	ret = -EINVAL;
+	if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+		goto out;
+
+	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+
+out:
+	if (ret > 0)
+		add_wchar(current, ret);
+	inc_syscw(current);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
+		const struct compat_iovec __user *, vec,
+		unsigned long, vlen)
+{
+	struct fd f = fdget(fd);
+	ssize_t ret;
+	loff_t pos;
+
+	if (!f.file)
+		return -EBADF;
+	pos = f.file->f_pos;
+	ret = compat_writev(f.file, vec, vlen, &pos);
+	f.file->f_pos = pos;
+	fdput(f);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, loff_t, pos)
+{
+	struct fd f;
+	ssize_t ret;
+
+	if (pos < 0)
+		return -EINVAL;
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+	ret = -ESPIPE;
+	if (f.file->f_mode & FMODE_PWRITE)
+		ret = compat_writev(f.file, vec, vlen, &pos);
+	fdput(f);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, u32, pos_low, u32, pos_high)
+{
+	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+	return compat_sys_pwritev64(fd, vec, vlen, pos);
+}
+#endif
+
 ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
 		    loff_t max)
 {
diff --git a/fs/read_write.h b/fs/read_write.h
index d3e00ef67420..b98780664ffa 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -8,9 +8,5 @@ typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t);
 
-ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
-		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
-ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
-		unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
 ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
 		    loff_t max);
-- 
cgit 


From 03d95eb2f2578083a3f6286262e1cb5d88a00c02 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Mar 2013 13:04:20 -0400
Subject: lift sb_start_write() out of ->write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cachefiles/rdwr.c |  2 ++
 fs/coda/file.c       |  2 ++
 fs/coredump.c        |  2 ++
 fs/read_write.c      | 24 ++++++++++++++----------
 fs/splice.c          |  2 ++
 5 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 480992259707..317f9ee9c991 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -962,12 +962,14 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
 			}
 
 			data = kmap(page);
+			file_start_write(file);
 			old_fs = get_fs();
 			set_fs(KERNEL_DS);
 			ret = file->f_op->write(
 				file, (const void __user *) data, len, &pos);
 			set_fs(old_fs);
 			kunmap(page);
+			file_end_write(file);
 			if (ret != len)
 				ret = -EIO;
 		}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index fa4c100bdc7d..380b798f8443 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -79,6 +79,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
 		return -EINVAL;
 
 	host_inode = file_inode(host_file);
+	file_start_write(host_file);
 	mutex_lock(&coda_inode->i_mutex);
 
 	ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -87,6 +88,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
 	coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
 	coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
 	mutex_unlock(&coda_inode->i_mutex);
+	file_end_write(host_file);
 
 	return ret;
 }
diff --git a/fs/coredump.c b/fs/coredump.c
index c6479658d487..288e5c9f9bbe 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -629,9 +629,11 @@ void do_coredump(siginfo_t *siginfo)
 		goto close_fail;
 	if (displaced)
 		put_files_struct(displaced);
+	file_start_write(cprm.file);
 	retval = binfmt->core_dump(&cprm);
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
+	file_end_write(cprm.file);
 
 	if (ispipe && core_pipe_limit)
 		wait_for_dump_helpers(cprm.file);
diff --git a/fs/read_write.c b/fs/read_write.c
index e6dd1c2d0592..a1f4d44cbc03 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -398,7 +398,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 	struct kiocb kiocb;
 	ssize_t ret;
 
-	file_start_write(filp);
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
 	kiocb.ki_left = len;
@@ -414,7 +413,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
-	file_end_write(filp);
 	return ret;
 }
 
@@ -458,6 +456,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 	ret = rw_verify_area(WRITE, file, pos, count);
 	if (ret >= 0) {
 		count = ret;
+		file_start_write(file);
 		if (file->f_op->write)
 			ret = file->f_op->write(file, buf, count, pos);
 		else
@@ -467,6 +466,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 			add_wchar(current, ret);
 		}
 		inc_syscw(current);
+		file_end_write(file);
 	}
 
 	return ret;
@@ -758,16 +758,18 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		file_start_write(file);
 	}
 
-	if (fnv) {
-		file_start_write(file);
+	if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
-		file_end_write(file);
-	} else
+	else
 		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 
+	if (type != READ)
+		file_end_write(file);
+
 out:
 	if (iov != iovstack)
 		kfree(iov);
@@ -936,16 +938,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		file_start_write(file);
 	}
 
-	if (fnv) {
-		file_start_write(file);
+	if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
-		file_end_write(file);
-	} else
+	else
 		ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
 
+	if (type != READ)
+		file_end_write(file);
+
 out:
 	if (iov != iovstack)
 		kfree(iov);
diff --git a/fs/splice.c b/fs/splice.c
index 29e394e49ddd..e78a749064db 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1052,7 +1052,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	loff_t tmp = sd->pos;
 
 	data = buf->ops->map(pipe, buf, 0);
+	file_start_write(sd->u.file);
 	ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
+	file_end_write(sd->u.file);
 	buf->ops->unmap(pipe, buf, data);
 
 	return ret;
-- 
cgit 


From 17338fccb28ec38097041074dcdc2016df538290 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Mar 2013 13:19:30 -0400
Subject: lift sb_start_write into default_file_splice_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/splice.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index e78a749064db..17d7323bc2c5 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1052,9 +1052,7 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	loff_t tmp = sd->pos;
 
 	data = buf->ops->map(pipe, buf, 0);
-	file_start_write(sd->u.file);
 	ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
-	file_end_write(sd->u.file);
 	buf->ops->unmap(pipe, buf, data);
 
 	return ret;
@@ -1066,7 +1064,9 @@ static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
 {
 	ssize_t ret;
 
+	file_start_write(out);
 	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+	file_end_write(out);
 	if (ret > 0)
 		*ppos += ret;
 
-- 
cgit 


From 2dd8c9ad376ccc5d2980b38e96372a8e252ae8d0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 20 Mar 2013 13:21:32 -0400
Subject: lift sb_start_write out of ->splice_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/splice.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 17d7323bc2c5..7efc2f5057fb 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1000,8 +1000,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 	};
 	ssize_t ret;
 
-	sb_start_write(inode->i_sb);
-
 	pipe_lock(pipe);
 
 	splice_from_pipe_begin(&sd);
@@ -1037,7 +1035,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			*ppos += ret;
 		balance_dirty_pages_ratelimited(mapping);
 	}
-	sb_end_write(inode->i_sb);
 
 	return ret;
 }
@@ -1064,9 +1061,7 @@ static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
 {
 	ssize_t ret;
 
-	file_start_write(out);
 	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
-	file_end_write(out);
 	if (ret > 0)
 		*ppos += ret;
 
@@ -1119,7 +1114,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	else
 		splice_write = default_file_splice_write;
 
-	return splice_write(pipe, out, ppos, len, flags);
+	file_start_write(out);
+	ret = splice_write(pipe, out, ppos, len, flags);
+	file_end_write(out);
+	return ret;
 }
 
 /*
-- 
cgit 


From f776c738883bc949e654568a565aee5a7d3fe133 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 12 Mar 2013 09:46:27 -0400
Subject: fold fifo.c into pipe.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile |   2 +-
 fs/fifo.c   | 153 ------------------------------------------------------------
 fs/pipe.c   | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 139 insertions(+), 154 deletions(-)
 delete mode 100644 fs/fifo.c

(limited to 'fs')

diff --git a/fs/Makefile b/fs/Makefile
index 9d53192236fc..b691a965dc1a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -7,7 +7,7 @@
 
 obj-y :=	open.o read_write.o file_table.o super.o \
 		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
-		ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
+		ioctl.o readdir.o select.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o drop_caches.o splice.o sync.o utimes.o \
diff --git a/fs/fifo.c b/fs/fifo.c
deleted file mode 100644
index cf6f4345ceb0..000000000000
--- a/fs/fifo.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *  linux/fs/fifo.c
- *
- *  written by Paul H. Hargrove
- *
- *  Fixes:
- *	10-06-1999, AV: fixed OOM handling in fifo_open(), moved
- *			initialization there, switched to external
- *			allocation of pipe_inode_info.
- */
-
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
-
-static int wait_for_partner(struct inode* inode, unsigned int *cnt)
-{
-	int cur = *cnt;	
-
-	while (cur == *cnt) {
-		pipe_wait(inode->i_pipe);
-		if (signal_pending(current))
-			break;
-	}
-	return cur == *cnt ? -ERESTARTSYS : 0;
-}
-
-static void wake_up_partner(struct inode* inode)
-{
-	wake_up_interruptible(&inode->i_pipe->wait);
-}
-
-static int fifo_open(struct inode *inode, struct file *filp)
-{
-	struct pipe_inode_info *pipe;
-	int ret;
-
-	mutex_lock(&inode->i_mutex);
-	pipe = inode->i_pipe;
-	if (!pipe) {
-		ret = -ENOMEM;
-		pipe = alloc_pipe_info(inode);
-		if (!pipe)
-			goto err_nocleanup;
-		inode->i_pipe = pipe;
-	}
-	filp->f_version = 0;
-
-	/* We can only do regular read/write on fifos */
-	filp->f_mode &= (FMODE_READ | FMODE_WRITE);
-
-	switch (filp->f_mode) {
-	case FMODE_READ:
-	/*
-	 *  O_RDONLY
-	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
-	 *  opened, even when there is no process writing the FIFO.
-	 */
-		filp->f_op = &read_pipefifo_fops;
-		pipe->r_counter++;
-		if (pipe->readers++ == 0)
-			wake_up_partner(inode);
-
-		if (!pipe->writers) {
-			if ((filp->f_flags & O_NONBLOCK)) {
-				/* suppress POLLHUP until we have
-				 * seen a writer */
-				filp->f_version = pipe->w_counter;
-			} else {
-				if (wait_for_partner(inode, &pipe->w_counter))
-					goto err_rd;
-			}
-		}
-		break;
-	
-	case FMODE_WRITE:
-	/*
-	 *  O_WRONLY
-	 *  POSIX.1 says that O_NONBLOCK means return -1 with
-	 *  errno=ENXIO when there is no process reading the FIFO.
-	 */
-		ret = -ENXIO;
-		if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
-			goto err;
-
-		filp->f_op = &write_pipefifo_fops;
-		pipe->w_counter++;
-		if (!pipe->writers++)
-			wake_up_partner(inode);
-
-		if (!pipe->readers) {
-			if (wait_for_partner(inode, &pipe->r_counter))
-				goto err_wr;
-		}
-		break;
-	
-	case FMODE_READ | FMODE_WRITE:
-	/*
-	 *  O_RDWR
-	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
-	 *  This implementation will NEVER block on a O_RDWR open, since
-	 *  the process can at least talk to itself.
-	 */
-		filp->f_op = &rdwr_pipefifo_fops;
-
-		pipe->readers++;
-		pipe->writers++;
-		pipe->r_counter++;
-		pipe->w_counter++;
-		if (pipe->readers == 1 || pipe->writers == 1)
-			wake_up_partner(inode);
-		break;
-
-	default:
-		ret = -EINVAL;
-		goto err;
-	}
-
-	/* Ok! */
-	mutex_unlock(&inode->i_mutex);
-	return 0;
-
-err_rd:
-	if (!--pipe->readers)
-		wake_up_interruptible(&pipe->wait);
-	ret = -ERESTARTSYS;
-	goto err;
-
-err_wr:
-	if (!--pipe->writers)
-		wake_up_interruptible(&pipe->wait);
-	ret = -ERESTARTSYS;
-	goto err;
-
-err:
-	if (!pipe->readers && !pipe->writers)
-		free_pipe_info(inode);
-
-err_nocleanup:
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/*
- * Dummy default file-operations: the only thing this does
- * is contain the open that then fills in the correct operations
- * depending on the access mode of the file...
- */
-const struct file_operations def_fifo_fops = {
-	.open		= fifo_open,	/* will set read_ or write_pipefifo_fops */
-	.llseek		= noop_llseek,
-};
diff --git a/fs/pipe.c b/fs/pipe.c
index 2234f3f61f8d..aed80c25cd40 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1144,6 +1144,144 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
 	return sys_pipe2(fildes, 0);
 }
 
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
+{
+	int cur = *cnt;	
+
+	while (cur == *cnt) {
+		pipe_wait(inode->i_pipe);
+		if (signal_pending(current))
+			break;
+	}
+	return cur == *cnt ? -ERESTARTSYS : 0;
+}
+
+static void wake_up_partner(struct inode* inode)
+{
+	wake_up_interruptible(&inode->i_pipe->wait);
+}
+
+static int fifo_open(struct inode *inode, struct file *filp)
+{
+	struct pipe_inode_info *pipe;
+	int ret;
+
+	mutex_lock(&inode->i_mutex);
+	pipe = inode->i_pipe;
+	if (!pipe) {
+		ret = -ENOMEM;
+		pipe = alloc_pipe_info(inode);
+		if (!pipe)
+			goto err_nocleanup;
+		inode->i_pipe = pipe;
+	}
+	filp->f_version = 0;
+
+	/* We can only do regular read/write on fifos */
+	filp->f_mode &= (FMODE_READ | FMODE_WRITE);
+
+	switch (filp->f_mode) {
+	case FMODE_READ:
+	/*
+	 *  O_RDONLY
+	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
+	 *  opened, even when there is no process writing the FIFO.
+	 */
+		filp->f_op = &read_pipefifo_fops;
+		pipe->r_counter++;
+		if (pipe->readers++ == 0)
+			wake_up_partner(inode);
+
+		if (!pipe->writers) {
+			if ((filp->f_flags & O_NONBLOCK)) {
+				/* suppress POLLHUP until we have
+				 * seen a writer */
+				filp->f_version = pipe->w_counter;
+			} else {
+				if (wait_for_partner(inode, &pipe->w_counter))
+					goto err_rd;
+			}
+		}
+		break;
+	
+	case FMODE_WRITE:
+	/*
+	 *  O_WRONLY
+	 *  POSIX.1 says that O_NONBLOCK means return -1 with
+	 *  errno=ENXIO when there is no process reading the FIFO.
+	 */
+		ret = -ENXIO;
+		if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
+			goto err;
+
+		filp->f_op = &write_pipefifo_fops;
+		pipe->w_counter++;
+		if (!pipe->writers++)
+			wake_up_partner(inode);
+
+		if (!pipe->readers) {
+			if (wait_for_partner(inode, &pipe->r_counter))
+				goto err_wr;
+		}
+		break;
+	
+	case FMODE_READ | FMODE_WRITE:
+	/*
+	 *  O_RDWR
+	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
+	 *  This implementation will NEVER block on a O_RDWR open, since
+	 *  the process can at least talk to itself.
+	 */
+		filp->f_op = &rdwr_pipefifo_fops;
+
+		pipe->readers++;
+		pipe->writers++;
+		pipe->r_counter++;
+		pipe->w_counter++;
+		if (pipe->readers == 1 || pipe->writers == 1)
+			wake_up_partner(inode);
+		break;
+
+	default:
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/* Ok! */
+	mutex_unlock(&inode->i_mutex);
+	return 0;
+
+err_rd:
+	if (!--pipe->readers)
+		wake_up_interruptible(&pipe->wait);
+	ret = -ERESTARTSYS;
+	goto err;
+
+err_wr:
+	if (!--pipe->writers)
+		wake_up_interruptible(&pipe->wait);
+	ret = -ERESTARTSYS;
+	goto err;
+
+err:
+	if (!pipe->readers && !pipe->writers)
+		free_pipe_info(inode);
+
+err_nocleanup:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
+/*
+ * Dummy default file-operations: the only thing this does
+ * is contain the open that then fills in the correct operations
+ * depending on the access mode of the file...
+ */
+const struct file_operations def_fifo_fops = {
+	.open		= fifo_open,	/* will set read_ or write_pipefifo_fops */
+	.llseek		= noop_llseek,
+};
+
 /*
  * Allocate a new array of pipe buffers and copy the info over. Returns the
  * pipe size if successful, or return -ERROR on error.
-- 
cgit 


From 599a0ac14e065b7c08471ef2e75a504b7dec9267 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 12 Mar 2013 09:58:10 -0400
Subject: pipe: fold file_operations instances in one

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c    |   2 +-
 fs/internal.h |   5 ++
 fs/pipe.c     | 221 +++++++++-------------------------------------------------
 3 files changed, 38 insertions(+), 190 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index f5f7c06c36fb..5b76d9b1a884 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1803,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
 		inode->i_fop = &def_blk_fops;
 		inode->i_rdev = rdev;
 	} else if (S_ISFIFO(mode))
-		inode->i_fop = &def_fifo_fops;
+		inode->i_fop = &pipefifo_fops;
 	else if (S_ISSOCK(mode))
 		inode->i_fop = &bad_sock_fops;
 	else
diff --git a/fs/internal.h b/fs/internal.h
index 4be78237d896..eaa75f75b625 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,3 +130,8 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
  * read_write.c
  */
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
+
+/*
+ * pipe.c
+ */
+extern const struct file_operations pipefifo_fops;
diff --git a/fs/pipe.c b/fs/pipe.c
index aed80c25cd40..099ac3bf89f9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,8 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
+#include "internal.h"
+
 /*
  * The max size that a non-root user is allowed to grow the pipe. Can
  * be set by root in /proc/sys/fs/pipe-max-size
@@ -662,19 +664,6 @@ out:
 	return ret;
 }
 
-static ssize_t
-bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
-	return -EBADF;
-}
-
-static ssize_t
-bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
-	   loff_t *ppos)
-{
-	return -EBADF;
-}
-
 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -734,14 +723,16 @@ pipe_poll(struct file *filp, poll_table *wait)
 }
 
 static int
-pipe_release(struct inode *inode, int decr, int decw)
+pipe_release(struct inode *inode, struct file *file)
 {
 	struct pipe_inode_info *pipe;
 
 	mutex_lock(&inode->i_mutex);
 	pipe = inode->i_pipe;
-	pipe->readers -= decr;
-	pipe->writers -= decw;
+	if (file->f_mode & FMODE_READ)
+		pipe->readers--;
+	if (file->f_mode & FMODE_WRITE)
+		pipe->writers--;
 
 	if (!pipe->readers && !pipe->writers) {
 		free_pipe_info(inode);
@@ -756,174 +747,25 @@ pipe_release(struct inode *inode, int decr, int decw)
 }
 
 static int
-pipe_read_fasync(int fd, struct file *filp, int on)
-{
-	struct inode *inode = file_inode(filp);
-	int retval;
-
-	mutex_lock(&inode->i_mutex);
-	retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
-	mutex_unlock(&inode->i_mutex);
-
-	return retval;
-}
-
-
-static int
-pipe_write_fasync(int fd, struct file *filp, int on)
-{
-	struct inode *inode = file_inode(filp);
-	int retval;
-
-	mutex_lock(&inode->i_mutex);
-	retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
-	mutex_unlock(&inode->i_mutex);
-
-	return retval;
-}
-
-
-static int
-pipe_rdwr_fasync(int fd, struct file *filp, int on)
+pipe_fasync(int fd, struct file *filp, int on)
 {
 	struct inode *inode = file_inode(filp);
 	struct pipe_inode_info *pipe = inode->i_pipe;
-	int retval;
+	int retval = 0;
 
 	mutex_lock(&inode->i_mutex);
-	retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
-	if (retval >= 0) {
+	if (filp->f_mode & FMODE_READ)
+		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
+	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
-		if (retval < 0) /* this can happen only if on == T */
+		if (retval < 0 && (filp->f_mode & FMODE_READ))
+			/* this can happen only if on == T */
 			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 	}
 	mutex_unlock(&inode->i_mutex);
 	return retval;
 }
 
-
-static int
-pipe_read_release(struct inode *inode, struct file *filp)
-{
-	return pipe_release(inode, 1, 0);
-}
-
-static int
-pipe_write_release(struct inode *inode, struct file *filp)
-{
-	return pipe_release(inode, 0, 1);
-}
-
-static int
-pipe_rdwr_release(struct inode *inode, struct file *filp)
-{
-	int decr, decw;
-
-	decr = (filp->f_mode & FMODE_READ) != 0;
-	decw = (filp->f_mode & FMODE_WRITE) != 0;
-	return pipe_release(inode, decr, decw);
-}
-
-static int
-pipe_read_open(struct inode *inode, struct file *filp)
-{
-	int ret = -ENOENT;
-
-	mutex_lock(&inode->i_mutex);
-
-	if (inode->i_pipe) {
-		ret = 0;
-		inode->i_pipe->readers++;
-	}
-
-	mutex_unlock(&inode->i_mutex);
-
-	return ret;
-}
-
-static int
-pipe_write_open(struct inode *inode, struct file *filp)
-{
-	int ret = -ENOENT;
-
-	mutex_lock(&inode->i_mutex);
-
-	if (inode->i_pipe) {
-		ret = 0;
-		inode->i_pipe->writers++;
-	}
-
-	mutex_unlock(&inode->i_mutex);
-
-	return ret;
-}
-
-static int
-pipe_rdwr_open(struct inode *inode, struct file *filp)
-{
-	int ret = -ENOENT;
-
-	if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
-		return -EINVAL;
-
-	mutex_lock(&inode->i_mutex);
-
-	if (inode->i_pipe) {
-		ret = 0;
-		if (filp->f_mode & FMODE_READ)
-			inode->i_pipe->readers++;
-		if (filp->f_mode & FMODE_WRITE)
-			inode->i_pipe->writers++;
-	}
-
-	mutex_unlock(&inode->i_mutex);
-
-	return ret;
-}
-
-/*
- * The file_operations structs are not static because they
- * are also used in linux/fs/fifo.c to do operations on FIFOs.
- *
- * Pipes reuse fifos' file_operations structs.
- */
-const struct file_operations read_pipefifo_fops = {
-	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= bad_pipe_w,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_read_open,
-	.release	= pipe_read_release,
-	.fasync		= pipe_read_fasync,
-};
-
-const struct file_operations write_pipefifo_fops = {
-	.llseek		= no_llseek,
-	.read		= bad_pipe_r,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_write_open,
-	.release	= pipe_write_release,
-	.fasync		= pipe_write_fasync,
-};
-
-const struct file_operations rdwr_pipefifo_fops = {
-	.llseek		= no_llseek,
-	.read		= do_sync_read,
-	.aio_read	= pipe_read,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
-	.poll		= pipe_poll,
-	.unlocked_ioctl	= pipe_ioctl,
-	.open		= pipe_rdwr_open,
-	.release	= pipe_rdwr_release,
-	.fasync		= pipe_rdwr_fasync,
-};
-
 struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 {
 	struct pipe_inode_info *pipe;
@@ -996,7 +838,7 @@ static struct inode * get_pipe_inode(void)
 	inode->i_pipe = pipe;
 
 	pipe->readers = pipe->writers = 1;
-	inode->i_fop = &rdwr_pipefifo_fops;
+	inode->i_fop = &pipefifo_fops;
 
 	/*
 	 * Mark the inode dirty from the very beginning,
@@ -1039,13 +881,13 @@ int create_pipe_files(struct file **res, int flags)
 	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
-	f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
+	f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
 	if (IS_ERR(f))
 		goto err_dentry;
 
 	f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
 
-	res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
+	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
 	if (IS_ERR(res[0]))
 		goto err_file;
 
@@ -1164,6 +1006,7 @@ static void wake_up_partner(struct inode* inode)
 static int fifo_open(struct inode *inode, struct file *filp)
 {
 	struct pipe_inode_info *pipe;
+	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
@@ -1187,12 +1030,11 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO
 	 *  opened, even when there is no process writing the FIFO.
 	 */
-		filp->f_op = &read_pipefifo_fops;
 		pipe->r_counter++;
 		if (pipe->readers++ == 0)
 			wake_up_partner(inode);
 
-		if (!pipe->writers) {
+		if (!is_pipe && !pipe->writers) {
 			if ((filp->f_flags & O_NONBLOCK)) {
 				/* suppress POLLHUP until we have
 				 * seen a writer */
@@ -1211,15 +1053,14 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  errno=ENXIO when there is no process reading the FIFO.
 	 */
 		ret = -ENXIO;
-		if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
+		if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
 			goto err;
 
-		filp->f_op = &write_pipefifo_fops;
 		pipe->w_counter++;
 		if (!pipe->writers++)
 			wake_up_partner(inode);
 
-		if (!pipe->readers) {
+		if (!is_pipe && !pipe->readers) {
 			if (wait_for_partner(inode, &pipe->r_counter))
 				goto err_wr;
 		}
@@ -1232,7 +1073,6 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 *  This implementation will NEVER block on a O_RDWR open, since
 	 *  the process can at least talk to itself.
 	 */
-		filp->f_op = &rdwr_pipefifo_fops;
 
 		pipe->readers++;
 		pipe->writers++;
@@ -1272,14 +1112,17 @@ err_nocleanup:
 	return ret;
 }
 
-/*
- * Dummy default file-operations: the only thing this does
- * is contain the open that then fills in the correct operations
- * depending on the access mode of the file...
- */
-const struct file_operations def_fifo_fops = {
-	.open		= fifo_open,	/* will set read_ or write_pipefifo_fops */
-	.llseek		= noop_llseek,
+const struct file_operations pipefifo_fops = {
+	.open		= fifo_open,
+	.llseek		= no_llseek,
+	.read		= do_sync_read,
+	.aio_read	= pipe_read,
+	.write		= do_sync_write,
+	.aio_write	= pipe_write,
+	.poll		= pipe_poll,
+	.unlocked_ioctl	= pipe_ioctl,
+	.release	= pipe_release,
+	.fasync		= pipe_fasync,
 };
 
 /*
-- 
cgit 


From fc7478a2bfa9abd19657d2bbc9ae24185a41e21b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 02:07:59 -0400
Subject: pipe: switch wait_for_partner() and wake_up_partner() to
 pipe_inode_info

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 099ac3bf89f9..105b0021b075 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -986,21 +986,21 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
 	return sys_pipe2(fildes, 0);
 }
 
-static int wait_for_partner(struct inode* inode, unsigned int *cnt)
+static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 {
 	int cur = *cnt;	
 
 	while (cur == *cnt) {
-		pipe_wait(inode->i_pipe);
+		pipe_wait(pipe);
 		if (signal_pending(current))
 			break;
 	}
 	return cur == *cnt ? -ERESTARTSYS : 0;
 }
 
-static void wake_up_partner(struct inode* inode)
+static void wake_up_partner(struct pipe_inode_info *pipe)
 {
-	wake_up_interruptible(&inode->i_pipe->wait);
+	wake_up_interruptible(&pipe->wait);
 }
 
 static int fifo_open(struct inode *inode, struct file *filp)
@@ -1032,7 +1032,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	 */
 		pipe->r_counter++;
 		if (pipe->readers++ == 0)
-			wake_up_partner(inode);
+			wake_up_partner(pipe);
 
 		if (!is_pipe && !pipe->writers) {
 			if ((filp->f_flags & O_NONBLOCK)) {
@@ -1040,7 +1040,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 				 * seen a writer */
 				filp->f_version = pipe->w_counter;
 			} else {
-				if (wait_for_partner(inode, &pipe->w_counter))
+				if (wait_for_partner(pipe, &pipe->w_counter))
 					goto err_rd;
 			}
 		}
@@ -1058,10 +1058,10 @@ static int fifo_open(struct inode *inode, struct file *filp)
 
 		pipe->w_counter++;
 		if (!pipe->writers++)
-			wake_up_partner(inode);
+			wake_up_partner(pipe);
 
 		if (!is_pipe && !pipe->readers) {
-			if (wait_for_partner(inode, &pipe->r_counter))
+			if (wait_for_partner(pipe, &pipe->r_counter))
 				goto err_wr;
 		}
 		break;
@@ -1079,7 +1079,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 		pipe->r_counter++;
 		pipe->w_counter++;
 		if (pipe->readers == 1 || pipe->writers == 1)
-			wake_up_partner(inode);
+			wake_up_partner(pipe);
 		break;
 
 	default:
-- 
cgit 


From 18c03cfd403b88852f75f200206983ee6df28423 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 02:16:30 -0400
Subject: pipe: preparation to new locking rules

* use the fact that file_inode(file)->i_pipe doesn't change
  while the file is opened - no locks needed to access that.
* switch to pipe_lock/pipe_unlock where it's easy to do

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 38 +++++++++++++++-----------------------
 1 file changed, 15 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 105b0021b075..357471db890d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -363,8 +363,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 	   unsigned long nr_segs, loff_t pos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = file_inode(filp);
-	struct pipe_inode_info *pipe;
+	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
 	int do_wakeup;
 	ssize_t ret;
 	struct iovec *iov = (struct iovec *)_iov;
@@ -377,8 +376,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 
 	do_wakeup = 0;
 	ret = 0;
-	mutex_lock(&inode->i_mutex);
-	pipe = inode->i_pipe;
+	pipe_lock(pipe);
 	for (;;) {
 		int bufs = pipe->nrbufs;
 		if (bufs) {
@@ -466,7 +464,7 @@ redo:
 		}
 		pipe_wait(pipe);
 	}
-	mutex_unlock(&inode->i_mutex);
+	pipe_unlock(pipe);
 
 	/* Signal writers asynchronously that there is more room. */
 	if (do_wakeup) {
@@ -488,8 +486,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 	    unsigned long nr_segs, loff_t ppos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct inode *inode = file_inode(filp);
-	struct pipe_inode_info *pipe;
+	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
 	ssize_t ret;
 	int do_wakeup;
 	struct iovec *iov = (struct iovec *)_iov;
@@ -503,8 +500,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 
 	do_wakeup = 0;
 	ret = 0;
-	mutex_lock(&inode->i_mutex);
-	pipe = inode->i_pipe;
+	pipe_lock(pipe);
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -651,7 +647,7 @@ redo2:
 		pipe->waiting_writers--;
 	}
 out:
-	mutex_unlock(&inode->i_mutex);
+	pipe_unlock(pipe);
 	if (do_wakeup) {
 		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
@@ -666,14 +662,12 @@ out:
 
 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct inode *inode = file_inode(filp);
-	struct pipe_inode_info *pipe;
+	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
 	int count, buf, nrbufs;
 
 	switch (cmd) {
 		case FIONREAD:
-			mutex_lock(&inode->i_mutex);
-			pipe = inode->i_pipe;
+			pipe_lock(pipe);
 			count = 0;
 			buf = pipe->curbuf;
 			nrbufs = pipe->nrbufs;
@@ -681,7 +675,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				count += pipe->bufs[buf].len;
 				buf = (buf+1) & (pipe->buffers - 1);
 			}
-			mutex_unlock(&inode->i_mutex);
+			pipe_unlock(pipe);
 
 			return put_user(count, (int __user *)arg);
 		default:
@@ -694,8 +688,7 @@ static unsigned int
 pipe_poll(struct file *filp, poll_table *wait)
 {
 	unsigned int mask;
-	struct inode *inode = file_inode(filp);
-	struct pipe_inode_info *pipe = inode->i_pipe;
+	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
 	int nrbufs;
 
 	poll_wait(filp, &pipe->wait, wait);
@@ -749,11 +742,10 @@ pipe_release(struct inode *inode, struct file *file)
 static int
 pipe_fasync(int fd, struct file *filp, int on)
 {
-	struct inode *inode = file_inode(filp);
-	struct pipe_inode_info *pipe = inode->i_pipe;
+	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
 	int retval = 0;
 
-	mutex_lock(&inode->i_mutex);
+	pipe_lock(pipe);
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -762,7 +754,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 			/* this can happen only if on == T */
 			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 	}
-	mutex_unlock(&inode->i_mutex);
+	pipe_unlock(pipe);
 	return retval;
 }
 
@@ -1224,7 +1216,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (!pipe)
 		return -EBADF;
 
-	mutex_lock(&pipe->inode->i_mutex);
+	pipe_lock(pipe);
 
 	switch (cmd) {
 	case F_SETPIPE_SZ: {
@@ -1253,7 +1245,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 
 out:
-	mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 	return ret;
 }
 
-- 
cgit 


From ba5bb147330a8737b6b5a812cc774c79c070704b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 02:21:19 -0400
Subject: pipe: take allocation and freeing of pipe_inode_info out of ->i_mutex

* new field - pipe->files; number of struct file over that pipe (all
  sharing the same inode, of course); protected by inode->i_lock.
* pipe_release() decrements pipe->files, clears inode->i_pipe when
  if the counter has reached 0 (all under ->i_lock) and, in that case,
  frees pipe after having done pipe_unlock()
* fifo_open() starts with grabbing ->i_lock, and either bumps pipe->files
  if ->i_pipe was non-NULL or allocates a new pipe (dropping and regaining
  ->i_lock) and rechecks ->i_pipe; if it's still NULL, inserts new pipe
  there, otherwise bumps ->i_pipe->files and frees the one we'd allocated.
  At that point we know that ->i_pipe is non-NULL and won't go away, so
  we can do pipe_lock() on it and proceed as we used to.  If we end up
  failing, decrement pipe->files and if it reaches 0 clear ->i_pipe and
  free the sucker after pipe_unlock().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 72 ++++++++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 357471db890d..abaa9234d27b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -718,23 +718,30 @@ pipe_poll(struct file *filp, poll_table *wait)
 static int
 pipe_release(struct inode *inode, struct file *file)
 {
-	struct pipe_inode_info *pipe;
+	struct pipe_inode_info *pipe = inode->i_pipe;
+	int kill = 0;
 
-	mutex_lock(&inode->i_mutex);
-	pipe = inode->i_pipe;
+	pipe_lock(pipe);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
 	if (file->f_mode & FMODE_WRITE)
 		pipe->writers--;
 
-	if (!pipe->readers && !pipe->writers) {
-		free_pipe_info(inode);
-	} else {
+	if (pipe->readers || pipe->writers) {
 		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
-	mutex_unlock(&inode->i_mutex);
+	spin_lock(&inode->i_lock);
+	if (!--pipe->files) {
+		inode->i_pipe = NULL;
+		kill = 1;
+	}
+	spin_unlock(&inode->i_lock);
+	pipe_unlock(pipe);
+
+	if (kill)
+		__free_pipe_info(pipe);
 
 	return 0;
 }
@@ -827,8 +834,9 @@ static struct inode * get_pipe_inode(void)
 	pipe = alloc_pipe_info(inode);
 	if (!pipe)
 		goto fail_iput;
-	inode->i_pipe = pipe;
 
+	inode->i_pipe = pipe;
+	pipe->files = 2;
 	pipe->readers = pipe->writers = 1;
 	inode->i_fop = &pipefifo_fops;
 
@@ -999,18 +1007,36 @@ static int fifo_open(struct inode *inode, struct file *filp)
 {
 	struct pipe_inode_info *pipe;
 	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
+	int kill = 0;
 	int ret;
 
-	mutex_lock(&inode->i_mutex);
-	pipe = inode->i_pipe;
-	if (!pipe) {
-		ret = -ENOMEM;
+	filp->f_version = 0;
+
+	spin_lock(&inode->i_lock);
+	if (inode->i_pipe) {
+		pipe = inode->i_pipe;
+		pipe->files++;
+		spin_unlock(&inode->i_lock);
+	} else {
+		spin_unlock(&inode->i_lock);
 		pipe = alloc_pipe_info(inode);
 		if (!pipe)
-			goto err_nocleanup;
-		inode->i_pipe = pipe;
+			return -ENOMEM;
+		pipe->files = 1;
+		spin_lock(&inode->i_lock);
+		if (unlikely(inode->i_pipe)) {
+			inode->i_pipe->files++;
+			spin_unlock(&inode->i_lock);
+			__free_pipe_info(pipe);
+			pipe = inode->i_pipe;
+		} else {
+			inode->i_pipe = pipe;
+			spin_unlock(&inode->i_lock);
+		}
 	}
-	filp->f_version = 0;
+	/* OK, we have a pipe and it's pinned down */
+
+	pipe_lock(pipe);
 
 	/* We can only do regular read/write on fifos */
 	filp->f_mode &= (FMODE_READ | FMODE_WRITE);
@@ -1080,7 +1106,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	}
 
 	/* Ok! */
-	mutex_unlock(&inode->i_mutex);
+	pipe_unlock(pipe);
 	return 0;
 
 err_rd:
@@ -1096,11 +1122,15 @@ err_wr:
 	goto err;
 
 err:
-	if (!pipe->readers && !pipe->writers)
-		free_pipe_info(inode);
-
-err_nocleanup:
-	mutex_unlock(&inode->i_mutex);
+	spin_lock(&inode->i_lock);
+	if (!--pipe->files) {
+		inode->i_pipe = NULL;
+		kill = 1;
+	}
+	spin_unlock(&inode->i_lock);
+	pipe_unlock(pipe);
+	if (kill)
+		__free_pipe_info(pipe);
 	return ret;
 }
 
-- 
cgit 


From 72b0d9aacb89f3759931ec440e1b535671145bb4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 02:32:24 -0400
Subject: pipe: don't use ->i_mutex

now it can be done - put mutex into pipe_inode_info, use it instead
of ->i_mutex

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/file.c | 6 ++----
 fs/pipe.c       | 5 +++--
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1c93e771e950..8a7509f9e6f5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2465,8 +2465,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 			out->f_path.dentry->d_name.len,
 			out->f_path.dentry->d_name.name, len);
 
-	if (pipe->inode)
-		mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
+	pipe_lock(pipe);
 
 	splice_from_pipe_begin(&sd);
 	do {
@@ -2486,8 +2485,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 	} while (ret > 0);
 	splice_from_pipe_end(pipe, &sd);
 
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 
 	if (sd.num_spliced)
 		ret = sd.num_spliced;
diff --git a/fs/pipe.c b/fs/pipe.c
index abaa9234d27b..d4b97e4e37c5 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -56,7 +56,7 @@ unsigned int pipe_min_size = PAGE_SIZE;
 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 {
 	if (pipe->inode)
-		mutex_lock_nested(&pipe->inode->i_mutex, subclass);
+		mutex_lock_nested(&pipe->mutex, subclass);
 }
 
 void pipe_lock(struct pipe_inode_info *pipe)
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(pipe_lock);
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
 	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+		mutex_unlock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_unlock);
 
@@ -777,6 +777,7 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 			pipe->r_counter = pipe->w_counter = 1;
 			pipe->inode = inode;
 			pipe->buffers = PIPE_DEF_BUFFERS;
+			mutex_init(&pipe->mutex);
 			return pipe;
 		}
 		kfree(pipe);
-- 
cgit 


From de32ec4cfeb3b3afd2abf5116068deace10e420f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 11:16:56 -0400
Subject: pipe: set file->private_data to ->i_pipe

simplify get_pipe_info(), while we are at it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coredump.c |  4 +---
 fs/pipe.c     | 17 +++++++++--------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/coredump.c b/fs/coredump.c
index 288e5c9f9bbe..a987f3d39d93 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -409,9 +409,7 @@ static void coredump_finish(struct mm_struct *mm)
 
 static void wait_for_dump_helpers(struct file *file)
 {
-	struct pipe_inode_info *pipe;
-
-	pipe = file_inode(file)->i_pipe;
+	struct pipe_inode_info *pipe = file->private_data;
 
 	pipe_lock(pipe);
 	pipe->readers++;
diff --git a/fs/pipe.c b/fs/pipe.c
index d4b97e4e37c5..161b2099a7af 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -363,7 +363,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 	   unsigned long nr_segs, loff_t pos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
+	struct pipe_inode_info *pipe = filp->private_data;
 	int do_wakeup;
 	ssize_t ret;
 	struct iovec *iov = (struct iovec *)_iov;
@@ -486,7 +486,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 	    unsigned long nr_segs, loff_t ppos)
 {
 	struct file *filp = iocb->ki_filp;
-	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
+	struct pipe_inode_info *pipe = filp->private_data;
 	ssize_t ret;
 	int do_wakeup;
 	struct iovec *iov = (struct iovec *)_iov;
@@ -662,7 +662,7 @@ out:
 
 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
+	struct pipe_inode_info *pipe = filp->private_data;
 	int count, buf, nrbufs;
 
 	switch (cmd) {
@@ -688,7 +688,7 @@ static unsigned int
 pipe_poll(struct file *filp, poll_table *wait)
 {
 	unsigned int mask;
-	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
+	struct pipe_inode_info *pipe = filp->private_data;
 	int nrbufs;
 
 	poll_wait(filp, &pipe->wait, wait);
@@ -749,7 +749,7 @@ pipe_release(struct inode *inode, struct file *file)
 static int
 pipe_fasync(int fd, struct file *filp, int on)
 {
-	struct pipe_inode_info *pipe = file_inode(filp)->i_pipe;
+	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
 	pipe_lock(pipe);
@@ -887,12 +887,14 @@ int create_pipe_files(struct file **res, int flags)
 		goto err_dentry;
 
 	f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
+	f->private_data = inode->i_pipe;
 
 	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
 	if (IS_ERR(res[0]))
 		goto err_file;
 
 	path_get(&path);
+	res[0]->private_data = inode->i_pipe;
 	res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	res[1] = f;
 	return 0;
@@ -1035,6 +1037,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 			spin_unlock(&inode->i_lock);
 		}
 	}
+	filp->private_data = pipe;
 	/* OK, we have a pipe and it's pinned down */
 
 	pipe_lock(pipe);
@@ -1233,9 +1236,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
  */
 struct pipe_inode_info *get_pipe_info(struct file *file)
 {
-	struct inode *i = file_inode(file);
-
-	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
+	return file->f_op == &pipefifo_fops ? file->private_data : NULL;
 }
 
 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
-- 
cgit 


From ebec73f4752b777b79b384bd52e5240203cb9b00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 12:24:01 -0400
Subject: introduce variants of pipe_lock/pipe_unlock for real pipes/FIFOs

fs/pipe.c file_operations methods *know* that pipe is not an internal one;
no need to check pipe->inode for those callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 161b2099a7af..e2fc5ccb0d49 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -75,6 +75,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+	mutex_unlock(&pipe->mutex);
+}
+
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
@@ -376,7 +386,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 
 	do_wakeup = 0;
 	ret = 0;
-	pipe_lock(pipe);
+	__pipe_lock(pipe);
 	for (;;) {
 		int bufs = pipe->nrbufs;
 		if (bufs) {
@@ -464,7 +474,7 @@ redo:
 		}
 		pipe_wait(pipe);
 	}
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 
 	/* Signal writers asynchronously that there is more room. */
 	if (do_wakeup) {
@@ -500,7 +510,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 
 	do_wakeup = 0;
 	ret = 0;
-	pipe_lock(pipe);
+	__pipe_lock(pipe);
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -647,7 +657,7 @@ redo2:
 		pipe->waiting_writers--;
 	}
 out:
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 	if (do_wakeup) {
 		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
@@ -667,7 +677,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 		case FIONREAD:
-			pipe_lock(pipe);
+			__pipe_lock(pipe);
 			count = 0;
 			buf = pipe->curbuf;
 			nrbufs = pipe->nrbufs;
@@ -675,7 +685,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				count += pipe->bufs[buf].len;
 				buf = (buf+1) & (pipe->buffers - 1);
 			}
-			pipe_unlock(pipe);
+			__pipe_unlock(pipe);
 
 			return put_user(count, (int __user *)arg);
 		default:
@@ -721,7 +731,7 @@ pipe_release(struct inode *inode, struct file *file)
 	struct pipe_inode_info *pipe = inode->i_pipe;
 	int kill = 0;
 
-	pipe_lock(pipe);
+	__pipe_lock(pipe);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
 	if (file->f_mode & FMODE_WRITE)
@@ -738,7 +748,7 @@ pipe_release(struct inode *inode, struct file *file)
 		kill = 1;
 	}
 	spin_unlock(&inode->i_lock);
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 
 	if (kill)
 		__free_pipe_info(pipe);
@@ -752,7 +762,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
-	pipe_lock(pipe);
+	__pipe_lock(pipe);
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -761,7 +771,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 			/* this can happen only if on == T */
 			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 	}
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 	return retval;
 }
 
@@ -1040,7 +1050,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	filp->private_data = pipe;
 	/* OK, we have a pipe and it's pinned down */
 
-	pipe_lock(pipe);
+	__pipe_lock(pipe);
 
 	/* We can only do regular read/write on fifos */
 	filp->f_mode &= (FMODE_READ | FMODE_WRITE);
@@ -1110,7 +1120,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	}
 
 	/* Ok! */
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 	return 0;
 
 err_rd:
@@ -1132,7 +1142,7 @@ err:
 		kill = 1;
 	}
 	spin_unlock(&inode->i_lock);
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 	if (kill)
 		__free_pipe_info(pipe);
 	return ret;
@@ -1248,7 +1258,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (!pipe)
 		return -EBADF;
 
-	pipe_lock(pipe);
+	__pipe_lock(pipe);
 
 	switch (cmd) {
 	case F_SETPIPE_SZ: {
@@ -1277,7 +1287,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 
 out:
-	pipe_unlock(pipe);
+	__pipe_unlock(pipe);
 	return ret;
 }
 
-- 
cgit 


From 6447a3cf19da8c4653283d1c491e2e775633f348 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 11:01:38 -0400
Subject: get rid of pipe->inode

it's used only as a flag to distinguish normal pipes/FIFOs from the
internal per-task one used by file-to-file splice.  And pipe->files
would work just as well for that purpose...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/dev.c | 2 +-
 fs/pipe.c     | 5 ++---
 fs/splice.c   | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 11dfa0c3fb46..9bfd1a3214e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1319,7 +1319,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
 		page_nr++;
 		ret += buf->len;
 
-		if (pipe->inode)
+		if (pipe->files)
 			do_wakeup = 1;
 	}
 
diff --git a/fs/pipe.c b/fs/pipe.c
index e2fc5ccb0d49..39bdec06fe2b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -55,7 +55,7 @@ unsigned int pipe_min_size = PAGE_SIZE;
 
 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 {
-	if (pipe->inode)
+	if (pipe->files)
 		mutex_lock_nested(&pipe->mutex, subclass);
 }
 
@@ -70,7 +70,7 @@ EXPORT_SYMBOL(pipe_lock);
 
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
-	if (pipe->inode)
+	if (pipe->files)
 		mutex_unlock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_unlock);
@@ -785,7 +785,6 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 		if (pipe->bufs) {
 			init_waitqueue_head(&pipe->wait);
 			pipe->r_counter = pipe->w_counter = 1;
-			pipe->inode = inode;
 			pipe->buffers = PIPE_DEF_BUFFERS;
 			mutex_init(&pipe->mutex);
 			return pipe;
diff --git a/fs/splice.c b/fs/splice.c
index 7efc2f5057fb..9f2a4447da50 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -218,7 +218,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 			page_nr++;
 			ret += buf->len;
 
-			if (pipe->inode)
+			if (pipe->files)
 				do_wakeup = 1;
 
 			if (!--spd->nr_pages)
@@ -828,7 +828,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 			ops->release(pipe, buf);
 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
-			if (pipe->inode)
+			if (pipe->files)
 				sd->need_wakeup = true;
 		}
 
-- 
cgit 


From 7bee130e222dfb3a7a70c0404dc09f104cddd7d6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 11:04:15 -0400
Subject: get rid of alloc_pipe_info() argument

not used anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c   | 6 +++---
 fs/splice.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 39bdec06fe2b..6cac5ceeded0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -775,7 +775,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 	return retval;
 }
 
-struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
+struct pipe_inode_info *alloc_pipe_info(void)
 {
 	struct pipe_inode_info *pipe;
 
@@ -841,7 +841,7 @@ static struct inode * get_pipe_inode(void)
 
 	inode->i_ino = get_next_ino();
 
-	pipe = alloc_pipe_info(inode);
+	pipe = alloc_pipe_info();
 	if (!pipe)
 		goto fail_iput;
 
@@ -1031,7 +1031,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 		spin_unlock(&inode->i_lock);
 	} else {
 		spin_unlock(&inode->i_lock);
-		pipe = alloc_pipe_info(inode);
+		pipe = alloc_pipe_info();
 		if (!pipe)
 			return -ENOMEM;
 		pipe->files = 1;
diff --git a/fs/splice.c b/fs/splice.c
index 9f2a4447da50..45e645b15d92 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1183,7 +1183,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 	 */
 	pipe = current->splice_pipe;
 	if (unlikely(!pipe)) {
-		pipe = alloc_pipe_info(NULL);
+		pipe = alloc_pipe_info();
 		if (!pipe)
 			return -ENOMEM;
 
-- 
cgit 


From 4b8a8f1e4f94fd87747e6e3acef74cf0b4dc0dae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 21 Mar 2013 11:06:46 -0400
Subject: get rid of the last free_pipe_info() callers

and rename __free_pipe_info() to free_pipe_info()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 6cac5ceeded0..a029a14bacf1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -751,7 +751,7 @@ pipe_release(struct inode *inode, struct file *file)
 	__pipe_unlock(pipe);
 
 	if (kill)
-		__free_pipe_info(pipe);
+		free_pipe_info(pipe);
 
 	return 0;
 }
@@ -795,7 +795,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 	return NULL;
 }
 
-void __free_pipe_info(struct pipe_inode_info *pipe)
+void free_pipe_info(struct pipe_inode_info *pipe)
 {
 	int i;
 
@@ -810,12 +810,6 @@ void __free_pipe_info(struct pipe_inode_info *pipe)
 	kfree(pipe);
 }
 
-void free_pipe_info(struct inode *inode)
-{
-	__free_pipe_info(inode->i_pipe);
-	inode->i_pipe = NULL;
-}
-
 static struct vfsmount *pipe_mnt __read_mostly;
 
 /*
@@ -911,12 +905,12 @@ int create_pipe_files(struct file **res, int flags)
 err_file:
 	put_filp(f);
 err_dentry:
-	free_pipe_info(inode);
+	free_pipe_info(inode->i_pipe);
 	path_put(&path);
 	return err;
 
 err_inode:
-	free_pipe_info(inode);
+	free_pipe_info(inode->i_pipe);
 	iput(inode);
 	return err;
 }
@@ -1039,7 +1033,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 		if (unlikely(inode->i_pipe)) {
 			inode->i_pipe->files++;
 			spin_unlock(&inode->i_lock);
-			__free_pipe_info(pipe);
+			free_pipe_info(pipe);
 			pipe = inode->i_pipe;
 		} else {
 			inode->i_pipe = pipe;
@@ -1143,7 +1137,7 @@ err:
 	spin_unlock(&inode->i_lock);
 	__pipe_unlock(pipe);
 	if (kill)
-		__free_pipe_info(pipe);
+		free_pipe_info(pipe);
 	return ret;
 }
 
-- 
cgit 


From 0ecc833bac594099505a090cbca6ccd5b83d5975 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Mar 2013 12:23:28 -0400
Subject: mode_t, whack-a-mole at 11...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/f2fs/acl.c  | 2 +-
 fs/f2fs/dir.c  | 2 +-
 fs/proc/self.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 137af4255da6..44abc2f286e0 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -299,7 +299,7 @@ int f2fs_acl_chmod(struct inode *inode)
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	struct posix_acl *acl;
 	int error;
-	mode_t mode = get_inode_mode(inode);
+	umode_t mode = get_inode_mode(inode);
 
 	if (!test_opt(sbi, POSIX_ACL))
 		return 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a1f38443ecee..1be948768e2f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -60,7 +60,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
 
 static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
 {
-	mode_t mode = inode->i_mode;
+	umode_t mode = inode->i_mode;
 	de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
 }
 
diff --git a/fs/proc/self.c b/fs/proc/self.c
index aa5cc3bff140..d8a025296613 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -51,7 +51,7 @@ static const struct inode_operations proc_self_inode_operations = {
 void __init proc_self_init(void)
 {
 	struct proc_dir_entry *proc_self_symlink;
-	mode_t mode;
+	umode_t mode;
 
 	mode = S_IFLNK | S_IRWXUGO;
 	proc_self_symlink = proc_create("self", mode, NULL, NULL );
-- 
cgit 


From 021ada7dff22d0d9540ff596cb0f8bb866755ee1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Mar 2013 19:27:05 -0400
Subject: procfs: switch /proc/self away from proc_dir_entry

Just have it pinned in dcache all along and let procfs ->kill_sb()
drop it before kill_anon_super().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c     | 16 ++++++++++++----
 fs/proc/inode.c    |  2 +-
 fs/proc/internal.h |  1 +
 fs/proc/root.c     |  2 ++
 fs/proc/self.c     | 46 +++++++++++++++++++++++++++++++++++++++-------
 5 files changed, 55 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69078c7cef1f..593e7c5ddb49 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2794,7 +2794,7 @@ retry:
 	return iter;
 }
 
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
 
 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 	struct tgid_iter iter)
@@ -2817,13 +2817,21 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
 	filldir_t __filldir;
+	loff_t pos = filp->f_pos;
 
-	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
 		goto out;
 
-	ns = filp->f_dentry->d_sb->s_fs_info;
+	if (pos == TGID_OFFSET - 1) {
+		if (proc_fill_cache(filp, dirent, filldir, "self", 4,
+					NULL, NULL, NULL) < 0)
+			goto out;
+		iter.tgid = 0;
+	} else {
+		iter.tgid = pos - TGID_OFFSET;
+	}
 	iter.task = NULL;
-	iter.tgid = filp->f_pos - TGID_OFFSET;
+	ns = filp->f_dentry->d_sb->s_fs_info;
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 869116c2afbe..908e97457319 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -506,5 +506,5 @@ int proc_fill_super(struct super_block *s)
 		return -ENOMEM;
 	}
 
-	return 0;
+	return proc_setup_self(s);
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 85ff3a4598b3..9c93a53f371d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -205,3 +205,4 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr);
 extern const struct inode_operations proc_ns_dir_inode_operations;
 extern const struct file_operations proc_ns_dir_operations;
 
+extern int proc_setup_self(struct super_block *);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..20834b3c8ea3 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -137,6 +137,8 @@ static void proc_kill_sb(struct super_block *sb)
 	struct pid_namespace *ns;
 
 	ns = (struct pid_namespace *)sb->s_fs_info;
+	if (ns->proc_self)
+		dput(ns->proc_self);
 	kill_anon_super(sb);
 	put_pid_ns(ns);
 }
diff --git a/fs/proc/self.c b/fs/proc/self.c
index d8a025296613..21940d89977e 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,6 +1,7 @@
-#include <linux/proc_fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
 
 /*
  * /proc/self:
@@ -48,12 +49,43 @@ static const struct inode_operations proc_self_inode_operations = {
 	.put_link	= proc_self_put_link,
 };
 
-void __init proc_self_init(void)
+static unsigned self_inum;
+
+int proc_setup_self(struct super_block *s)
 {
-	struct proc_dir_entry *proc_self_symlink;
-	umode_t mode;
+	struct inode *root_inode = s->s_root->d_inode;
+	struct pid_namespace *ns = s->s_fs_info;
+	struct dentry *self;
+	
+	mutex_lock(&root_inode->i_mutex);
+	self = d_alloc_name(s->s_root, "self");
+	if (self) {
+		struct inode *inode = new_inode_pseudo(s);
+		if (inode) {
+			inode->i_ino = self_inum;
+			inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+			inode->i_mode = S_IFLNK | S_IRWXUGO;
+			inode->i_uid = GLOBAL_ROOT_UID;
+			inode->i_gid = GLOBAL_ROOT_GID;
+			inode->i_op = &proc_self_inode_operations;
+			d_add(self, inode);
+		} else {
+			dput(self);
+			self = ERR_PTR(-ENOMEM);
+		}
+	} else {
+		self = ERR_PTR(-ENOMEM);
+	}
+	mutex_unlock(&root_inode->i_mutex);
+	if (IS_ERR(self)) {
+		pr_err("proc_fill_super: can't allocate /proc/self\n");
+		return PTR_ERR(self);
+	}
+	ns->proc_self = self;
+	return 0;
+}
 
-	mode = S_IFLNK | S_IRWXUGO;
-	proc_self_symlink = proc_create("self", mode, NULL, NULL );
-	proc_self_symlink->proc_iops = &proc_self_inode_operations;
+void __init proc_self_init(void)
+{
+	proc_alloc_inum(&self_inum);
 }
-- 
cgit 


From 121daf5f8b4a60158e26f357eb286acf83eb33b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Mar 2013 20:16:20 -0400
Subject: reiserfs: use proc_remove_subtree()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/procfs.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9cc0740adffa..274adea363ff 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -499,29 +499,17 @@ int reiserfs_proc_info_init(struct super_block *sb)
 int reiserfs_proc_info_done(struct super_block *sb)
 {
 	struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
-	char b[BDEVNAME_SIZE];
-	char *s;
+	if (de) {
+		char b[BDEVNAME_SIZE];
+		char *s;
 
-	/* Some block devices use /'s */
-	strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
-	s = strchr(b, '/');
-	if (s)
-		*s = '!';
+		/* Some block devices use /'s */
+		strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+		s = strchr(b, '/');
+		if (s)
+			*s = '!';
 
-	if (de) {
-		remove_proc_entry("journal", de);
-		remove_proc_entry("oidmap", de);
-		remove_proc_entry("on-disk-super", de);
-		remove_proc_entry("bitmap", de);
-		remove_proc_entry("per-level", de);
-		remove_proc_entry("super", de);
-		remove_proc_entry("version", de);
-	}
-	spin_lock(&__PINFO(sb).lock);
-	__PINFO(sb).exiting = 1;
-	spin_unlock(&__PINFO(sb).lock);
-	if (proc_info_root) {
-		remove_proc_entry(b, proc_info_root);
+		remove_proc_subtree(b, proc_info_root);
 		REISERFS_SB(sb)->procdir = NULL;
 	}
 	return 0;
-- 
cgit 


From b6cdc7310338e204224f865918f774eb6db0b75d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 30 Mar 2013 21:20:14 -0400
Subject: procfs: don't allow to use proc_create, create_proc_entry, etc. for
 directories

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 57 ++++++++++++++++++++++++-------------------------------
 fs/proc/inode.c   |  4 ++--
 2 files changed, 27 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 21e1a8f1659d..6bce60703c76 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -541,19 +541,18 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 		return ret;
 
 	if (S_ISDIR(dp->mode)) {
-		if (dp->proc_iops == NULL) {
-			dp->proc_fops = &proc_dir_operations;
-			dp->proc_iops = &proc_dir_inode_operations;
-		}
+		dp->proc_fops = &proc_dir_operations;
+		dp->proc_iops = &proc_dir_inode_operations;
 		dir->nlink++;
 	} else if (S_ISLNK(dp->mode)) {
-		if (dp->proc_iops == NULL)
-			dp->proc_iops = &proc_link_inode_operations;
+		dp->proc_iops = &proc_link_inode_operations;
 	} else if (S_ISREG(dp->mode)) {
 		if (dp->proc_fops == NULL)
 			dp->proc_fops = &proc_file_operations;
-		if (dp->proc_iops == NULL)
-			dp->proc_iops = &proc_file_inode_operations;
+		dp->proc_iops = &proc_file_inode_operations;
+	} else {
+		WARN_ON(1);
+		return -EINVAL;
 	}
 
 	spin_lock(&proc_subdir_lock);
@@ -680,21 +679,19 @@ struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
 					 struct proc_dir_entry *parent)
 {
 	struct proc_dir_entry *ent;
-	nlink_t nlink;
 
-	if (S_ISDIR(mode)) {
-		if ((mode & S_IALLUGO) == 0)
-			mode |= S_IRUGO | S_IXUGO;
-		nlink = 2;
-	} else {
-		if ((mode & S_IFMT) == 0)
-			mode |= S_IFREG;
-		if ((mode & S_IALLUGO) == 0)
-			mode |= S_IRUGO;
-		nlink = 1;
+	if ((mode & S_IFMT) == 0)
+		mode |= S_IFREG;
+
+	if (!S_ISREG(mode)) {
+		WARN_ON(1);	/* use proc_mkdir(), damnit */
+		return NULL;
 	}
 
-	ent = __proc_create(&parent, name, mode, nlink);
+	if ((mode & S_IALLUGO) == 0)
+		mode |= S_IRUGO;
+
+	ent = __proc_create(&parent, name, mode, 1);
 	if (ent) {
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
@@ -711,21 +708,17 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 					void *data)
 {
 	struct proc_dir_entry *pde;
-	nlink_t nlink;
+	if ((mode & S_IFMT) == 0)
+		mode |= S_IFREG;
 
-	if (S_ISDIR(mode)) {
-		if ((mode & S_IALLUGO) == 0)
-			mode |= S_IRUGO | S_IXUGO;
-		nlink = 2;
-	} else {
-		if ((mode & S_IFMT) == 0)
-			mode |= S_IFREG;
-		if ((mode & S_IALLUGO) == 0)
-			mode |= S_IRUGO;
-		nlink = 1;
+	if (!S_ISREG(mode)) {
+		WARN_ON(1);	/* use proc_mkdir() */
+		return NULL;
 	}
 
-	pde = __proc_create(&parent, name, mode, nlink);
+	if ((mode & S_IALLUGO) == 0)
+		mode |= S_IRUGO;
+	pde = __proc_create(&parent, name, mode, 1);
 	if (!pde)
 		goto out;
 	pde->proc_fops = proc_fops;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 908e97457319..a4aaaeee3342 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -462,8 +462,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 			inode->i_size = de->size;
 		if (de->nlink)
 			set_nlink(inode, de->nlink);
-		if (de->proc_iops)
-			inode->i_op = de->proc_iops;
+		WARN_ON(!de->proc_iops);
+		inode->i_op = de->proc_iops;
 		if (de->proc_fops) {
 			if (S_ISREG(inode->i_mode)) {
 #ifdef CONFIG_COMPAT
-- 
cgit 


From 2043f495c7c1a06f7748b5bcd17656d93c95e1a6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 31 Mar 2013 13:43:23 -0400
Subject: new helper: single_open_size()

Same as single_open(), but preallocates the buffer of given size.
Doesn't make any sense for sizes up to PAGE_SIZE and doesn't make
sense if output of show() exceeds PAGE_SIZE only rarely - seq_read()
will take care of growing the buffer and redoing show().  If you
_know_ that it will be large, it might make more sense to look into
saner iterator, rather than go with single-shot one.  If that's
impossible, single_open_size() might be for you.

Again, don't use that without a good reason; occasionally that's really
the best way to go, but very often there are better solutions.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 38bb59f3f2ad..774c1eb7f1c9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -599,6 +599,24 @@ int single_open(struct file *file, int (*show)(struct seq_file *, void *),
 }
 EXPORT_SYMBOL(single_open);
 
+int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
+		void *data, size_t size)
+{
+	char *buf = kmalloc(size, GFP_KERNEL);
+	int ret;
+	if (!buf)
+		return -ENOMEM;
+	ret = single_open(file, show, data);
+	if (ret) {
+		kfree(buf);
+		return ret;
+	}
+	((struct seq_file *)file->private_data)->buf = buf;
+	((struct seq_file *)file->private_data)->size = size;
+	return 0;
+}
+EXPORT_SYMBOL(single_open_size);
+
 int single_release(struct inode *inode, struct file *file)
 {
 	const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
-- 
cgit 


From ee21ed0afc2f47007fbd8b22928ecb17316e13e2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 31 Mar 2013 15:30:40 -0400
Subject: procfs: kill ->write_proc()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 25 -------------------------
 1 file changed, 25 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6bce60703c76..51fcb201e289 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -196,30 +196,6 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 	return rv;
 }
 
-static ssize_t
-proc_file_write(struct file *file, const char __user *buffer,
-		size_t count, loff_t *ppos)
-{
-	struct proc_dir_entry *pde = PDE(file_inode(file));
-	ssize_t rv = -EIO;
-
-	if (pde->write_proc) {
-		spin_lock(&pde->pde_unload_lock);
-		if (!pde->proc_fops) {
-			spin_unlock(&pde->pde_unload_lock);
-			return rv;
-		}
-		pde->pde_users++;
-		spin_unlock(&pde->pde_unload_lock);
-
-		/* FIXME: does this routine need ppos?  probably... */
-		rv = pde->write_proc(file, buffer, count, pde->data);
-		pde_users_dec(pde);
-	}
-	return rv;
-}
-
-
 static loff_t
 proc_file_lseek(struct file *file, loff_t offset, int orig)
 {
@@ -239,7 +215,6 @@ proc_file_lseek(struct file *file, loff_t offset, int orig)
 static const struct file_operations proc_file_operations = {
 	.llseek		= proc_file_lseek,
 	.read		= proc_file_read,
-	.write		= proc_file_write,
 };
 
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
-- 
cgit 


From d9dda78bad879595d8c4220a067fc029d6484a16 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 31 Mar 2013 18:16:14 -0400
Subject: procfs: new helper - PDE_DATA(inode)

The only part of proc_dir_entry the code outside of fs/proc
really cares about is PDE(inode)->data.  Provide a helper
for that; static inline for now, eventually will be moved
to fs/proc, along with the knowledge of struct proc_dir_entry
layout.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/proc.c          | 8 ++++----
 fs/ext4/mballoc.c      | 2 +-
 fs/ext4/super.c        | 2 +-
 fs/jbd2/journal.c      | 2 +-
 fs/proc/generic.c      | 2 +-
 fs/proc/proc_devtree.c | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 096b23f821a1..526e4bbbde59 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -190,7 +190,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
 		return ret;
 
 	m = file->private_data;
-	m->private = PDE(inode)->data;
+	m->private = PDE_DATA(inode);
 
 	return 0;
 }
@@ -448,7 +448,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	cell = PDE(inode)->data;
+	cell = PDE_DATA(inode);
 	if (!cell)
 		return -ENOENT;
 
@@ -554,7 +554,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	cell = PDE(inode)->data;
+	cell = PDE_DATA(inode);
 	if (!cell)
 		return -ENOENT;
 
@@ -659,7 +659,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	cell = PDE(inode)->data;
+	cell = PDE_DATA(inode);
 	if (!cell)
 		return -ENOENT;
 
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ee6614bdb639..28e421c208a5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2149,7 +2149,7 @@ static const struct seq_operations ext4_mb_seq_groups_ops = {
 
 static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
 {
-	struct super_block *sb = PDE(inode)->data;
+	struct super_block *sb = PDE_DATA(inode);
 	int rc;
 
 	rc = seq_open(file, &ext4_mb_seq_groups_ops);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5d6d53578124..c65510548355 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1802,7 +1802,7 @@ static int options_seq_show(struct seq_file *seq, void *offset)
 
 static int options_open_fs(struct inode *inode, struct file *file)
 {
-	return single_open(file, options_seq_show, PDE(inode)->data);
+	return single_open(file, options_seq_show, PDE_DATA(inode));
 }
 
 static const struct file_operations ext4_seq_options_fops = {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ed10991ab006..154592ea5632 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -950,7 +950,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
 
 static int jbd2_seq_info_open(struct inode *inode, struct file *file)
 {
-	journal_t *journal = PDE(inode)->data;
+	journal_t *journal = PDE_DATA(inode);
 	struct jbd2_stats_proc_session *s;
 	int rc, size;
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 51fcb201e289..c0ad720c37b9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -346,7 +346,7 @@ void proc_free_inum(unsigned int inum)
 
 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, PDE(dentry->d_inode)->data);
+	nd_set_link(nd, PDE_DATA(dentry->d_inode));
 	return NULL;
 }
 
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 30b590f5bd35..e0043c7e7ab7 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -41,7 +41,7 @@ static int property_proc_show(struct seq_file *m, void *v)
 
 static int property_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, property_proc_show, PDE(inode)->data);
+	return single_open(file, property_proc_show, PDE_DATA(inode));
 }
 
 static const struct file_operations property_proc_fops = {
-- 
cgit 


From 75ef9de1267ba171ecefafca35758e2be0db10dc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Apr 2013 19:09:41 -0400
Subject: constify a bunch of struct file_operations instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfsctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f33455b4d957..5bee0313dffd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -177,7 +177,7 @@ static int export_features_open(struct inode *inode, struct file *file)
 	return single_open(file, export_features_show, NULL);
 }
 
-static struct file_operations export_features_operations = {
+static const struct file_operations export_features_operations = {
 	.open		= export_features_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -196,7 +196,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file)
 	return single_open(file, supported_enctypes_show, NULL);
 }
 
-static struct file_operations supported_enctypes_ops = {
+static const struct file_operations supported_enctypes_ops = {
 	.open		= supported_enctypes_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-- 
cgit 


From 80e928f7ebb958f4d79d4099d1c5c0a015a23b93 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 4 Apr 2013 17:02:03 +0100
Subject: proc: Kill create_proc_entry()

Kill create_proc_entry() in favour of create_proc_read_entry(), proc_create()
and proc_create_data().

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/proc/generic.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index c0ad720c37b9..5453f1c0b70c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -650,8 +650,9 @@ struct proc_dir_entry *proc_mkdir(const char *name,
 }
 EXPORT_SYMBOL(proc_mkdir);
 
-struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
-					 struct proc_dir_entry *parent)
+struct proc_dir_entry *create_proc_read_entry(
+	const char *name, umode_t mode, struct proc_dir_entry *parent, 
+	read_proc_t *read_proc, void *data)
 {
 	struct proc_dir_entry *ent;
 
@@ -668,6 +669,8 @@ struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
 
 	ent = __proc_create(&parent, name, mode, 1);
 	if (ent) {
+		ent->read_proc = read_proc;
+		ent->data = data;
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
 			ent = NULL;
@@ -675,7 +678,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
 	}
 	return ent;
 }
-EXPORT_SYMBOL(create_proc_entry);
+EXPORT_SYMBOL(create_proc_read_entry);
 
 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 					struct proc_dir_entry *parent,
-- 
cgit 


From ad147d011f4e9d4e4309f7974fd19c7f875ccb14 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 4 Apr 2013 16:32:28 +0100
Subject: procfs: Clean up huge if-statement in __proc_file_read()

Switch huge if-statement in __proc_file_read() around.  This then puts the
single line loop break immediately after the if-statement and allows us to
de-indent the huge comment and make it take fewer lines.  The code following
the if-statement then follows naturally from the call to dp->read_proc().

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/proc/generic.c | 98 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 47 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5453f1c0b70c..a6a1cb5d589d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -71,59 +71,55 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 		count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
 
 		start = NULL;
-		if (dp->read_proc) {
-			/*
-			 * How to be a proc read function
-			 * ------------------------------
-			 * Prototype:
-			 *    int f(char *buffer, char **start, off_t offset,
-			 *          int count, int *peof, void *dat)
-			 *
-			 * Assume that the buffer is "count" bytes in size.
-			 *
-			 * If you know you have supplied all the data you
-			 * have, set *peof.
-			 *
-			 * You have three ways to return data:
-			 * 0) Leave *start = NULL.  (This is the default.)
-			 *    Put the data of the requested offset at that
-			 *    offset within the buffer.  Return the number (n)
-			 *    of bytes there are from the beginning of the
-			 *    buffer up to the last byte of data.  If the
-			 *    number of supplied bytes (= n - offset) is 
-			 *    greater than zero and you didn't signal eof
-			 *    and the reader is prepared to take more data
-			 *    you will be called again with the requested
-			 *    offset advanced by the number of bytes 
-			 *    absorbed.  This interface is useful for files
-			 *    no larger than the buffer.
-			 * 1) Set *start = an unsigned long value less than
-			 *    the buffer address but greater than zero.
-			 *    Put the data of the requested offset at the
-			 *    beginning of the buffer.  Return the number of
-			 *    bytes of data placed there.  If this number is
-			 *    greater than zero and you didn't signal eof
-			 *    and the reader is prepared to take more data
-			 *    you will be called again with the requested
-			 *    offset advanced by *start.  This interface is
-			 *    useful when you have a large file consisting
-			 *    of a series of blocks which you want to count
-			 *    and return as wholes.
-			 *    (Hack by Paul.Russell@rustcorp.com.au)
-			 * 2) Set *start = an address within the buffer.
-			 *    Put the data of the requested offset at *start.
-			 *    Return the number of bytes of data placed there.
-			 *    If this number is greater than zero and you
-			 *    didn't signal eof and the reader is prepared to
-			 *    take more data you will be called again with the
-			 *    requested offset advanced by the number of bytes
-			 *    absorbed.
-			 */
-			n = dp->read_proc(page, &start, *ppos,
-					  count, &eof, dp->data);
-		} else
+		if (!dp->read_proc)
 			break;
 
+		/* How to be a proc read function
+		 * ------------------------------
+		 * Prototype:
+		 *    int f(char *buffer, char **start, off_t offset,
+		 *          int count, int *peof, void *dat)
+		 *
+		 * Assume that the buffer is "count" bytes in size.
+		 *
+		 * If you know you have supplied all the data you have, set
+		 * *peof.
+		 *
+		 * You have three ways to return data:
+		 *
+		 * 0) Leave *start = NULL.  (This is the default.)  Put the
+		 *    data of the requested offset at that offset within the
+		 *    buffer.  Return the number (n) of bytes there are from
+		 *    the beginning of the buffer up to the last byte of data.
+		 *    If the number of supplied bytes (= n - offset) is greater
+		 *    than zero and you didn't signal eof and the reader is
+		 *    prepared to take more data you will be called again with
+		 *    the requested offset advanced by the number of bytes
+		 *    absorbed.  This interface is useful for files no larger
+		 *    than the buffer.
+		 *
+		 * 1) Set *start = an unsigned long value less than the buffer
+		 *    address but greater than zero.  Put the data of the
+		 *    requested offset at the beginning of the buffer.  Return
+		 *    the number of bytes of data placed there.  If this number
+		 *    is greater than zero and you didn't signal eof and the
+		 *    reader is prepared to take more data you will be called
+		 *    again with the requested offset advanced by *start.  This
+		 *    interface is useful when you have a large file consisting
+		 *    of a series of blocks which you want to count and return
+		 *    as wholes.
+		 *    (Hack by Paul.Russell@rustcorp.com.au)
+		 *
+		 * 2) Set *start = an address within the buffer.  Put the data
+		 *    of the requested offset at *start.  Return the number of
+		 *    bytes of data placed there.  If this number is greater
+		 *    than zero and you didn't signal eof and the reader is
+		 *    prepared to take more data you will be called again with
+		 *    the requested offset advanced by the number of bytes
+		 *    absorbed.
+		 */
+		n = dp->read_proc(page, &start, *ppos, count, &eof, dp->data);
+
 		if (n == 0)   /* end of file */
 			break;
 		if (n < 0) {  /* error */
-- 
cgit 


From 866ad9a747bbf5461739fcae6d0a41c8971bbe1d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 3 Apr 2013 19:07:30 -0400
Subject: procfs: preparations for remove_proc_entry() race fixes

* leave ->proc_fops alone; make ->pde_users negative instead
* trim pde_opener
* move relevant code in fs/proc/inode.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c  |  83 +-----------------
 fs/proc/inode.c    | 248 +++++++++++++++++++++++++++--------------------------
 fs/proc/internal.h |   7 +-
 3 files changed, 133 insertions(+), 205 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a6a1cb5d589d..bec58323629c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -39,7 +39,7 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
 /* buffer size is one page but our output routines use some slack for overruns */
 #define PROC_BLOCK_SIZE	(PAGE_SIZE - 1024)
 
-static ssize_t
+ssize_t
 __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 	       loff_t *ppos)
 {
@@ -171,48 +171,6 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 	return retval;
 }
 
-static ssize_t
-proc_file_read(struct file *file, char __user *buf, size_t nbytes,
-	       loff_t *ppos)
-{
-	struct proc_dir_entry *pde = PDE(file_inode(file));
-	ssize_t rv = -EIO;
-
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
-	}
-	pde->pde_users++;
-	spin_unlock(&pde->pde_unload_lock);
-
-	rv = __proc_file_read(file, buf, nbytes, ppos);
-
-	pde_users_dec(pde);
-	return rv;
-}
-
-static loff_t
-proc_file_lseek(struct file *file, loff_t offset, int orig)
-{
-	loff_t retval = -EINVAL;
-	switch (orig) {
-	case 1:
-		offset += file->f_pos;
-	/* fallthrough */
-	case 0:
-		if (offset < 0 || offset > MAX_NON_LFS)
-			break;
-		file->f_pos = retval = offset;
-	}
-	return retval;
-}
-
-static const struct file_operations proc_file_operations = {
-	.llseek		= proc_file_lseek,
-	.read		= proc_file_read,
-};
-
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -722,41 +680,6 @@ void pde_put(struct proc_dir_entry *pde)
 		free_proc_entry(pde);
 }
 
-static void entry_rundown(struct proc_dir_entry *de)
-{
-	spin_lock(&de->pde_unload_lock);
-	/*
-	 * Stop accepting new callers into module. If you're
-	 * dynamically allocating ->proc_fops, save a pointer somewhere.
-	 */
-	de->proc_fops = NULL;
-	/* Wait until all existing callers into module are done. */
-	if (de->pde_users > 0) {
-		DECLARE_COMPLETION_ONSTACK(c);
-
-		if (!de->pde_unload_completion)
-			de->pde_unload_completion = &c;
-
-		spin_unlock(&de->pde_unload_lock);
-
-		wait_for_completion(de->pde_unload_completion);
-
-		spin_lock(&de->pde_unload_lock);
-	}
-
-	while (!list_empty(&de->pde_openers)) {
-		struct pde_opener *pdeo;
-
-		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
-		list_del(&pdeo->lh);
-		spin_unlock(&de->pde_unload_lock);
-		pdeo->release(pdeo->inode, pdeo->file);
-		kfree(pdeo);
-		spin_lock(&de->pde_unload_lock);
-	}
-	spin_unlock(&de->pde_unload_lock);
-}
-
 /*
  * Remove a /proc entry and free it if it's not currently in use.
  */
@@ -788,7 +711,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 		return;
 	}
 
-	entry_rundown(de);
+	proc_entry_rundown(de);
 
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
@@ -837,7 +760,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 		}
 		spin_unlock(&proc_subdir_lock);
 
-		entry_rundown(de);
+		proc_entry_rundown(de);
 		next = de->parent;
 		if (S_ISDIR(de->mode))
 			next->nlink--;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a4aaaeee3342..0cd9d80f28e8 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -129,96 +129,138 @@ static const struct super_operations proc_sops = {
 	.show_options	= proc_show_options,
 };
 
+enum {BIAS = -1U<<31};
+
+static inline int use_pde(struct proc_dir_entry *pde)
+{
+	int res = 1;
+	spin_lock(&pde->pde_unload_lock);
+	if (unlikely(pde->pde_users < 0))
+		res = 0;
+	else
+		pde->pde_users++;
+	spin_unlock(&pde->pde_unload_lock);
+	return res;
+}
+
 static void __pde_users_dec(struct proc_dir_entry *pde)
 {
-	pde->pde_users--;
-	if (pde->pde_unload_completion && pde->pde_users == 0)
+	if (--pde->pde_users == BIAS)
 		complete(pde->pde_unload_completion);
 }
 
-void pde_users_dec(struct proc_dir_entry *pde)
+static void unuse_pde(struct proc_dir_entry *pde)
 {
 	spin_lock(&pde->pde_unload_lock);
 	__pde_users_dec(pde);
 	spin_unlock(&pde->pde_unload_lock);
 }
 
-static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
+void proc_entry_rundown(struct proc_dir_entry *de)
 {
-	struct proc_dir_entry *pde = PDE(file_inode(file));
-	loff_t rv = -EINVAL;
-	loff_t (*llseek)(struct file *, loff_t, int);
+	spin_lock(&de->pde_unload_lock);
+	de->pde_users += BIAS;
+	/* Wait until all existing callers into module are done. */
+	if (de->pde_users != BIAS) {
+		DECLARE_COMPLETION_ONSTACK(c);
+		de->pde_unload_completion = &c;
+		spin_unlock(&de->pde_unload_lock);
 
-	spin_lock(&pde->pde_unload_lock);
-	/*
-	 * remove_proc_entry() is going to delete PDE (as part of module
-	 * cleanup sequence). No new callers into module allowed.
-	 */
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+		wait_for_completion(de->pde_unload_completion);
+
+		spin_lock(&de->pde_unload_lock);
 	}
-	/*
-	 * Bump refcount so that remove_proc_entry will wail for ->llseek to
-	 * complete.
-	 */
-	pde->pde_users++;
-	/*
-	 * Save function pointer under lock, to protect against ->proc_fops
-	 * NULL'ifying right after ->pde_unload_lock is dropped.
-	 */
-	llseek = pde->proc_fops->llseek;
-	spin_unlock(&pde->pde_unload_lock);
 
-	if (!llseek)
-		llseek = default_llseek;
-	rv = llseek(file, offset, whence);
+	while (!list_empty(&de->pde_openers)) {
+		struct pde_opener *pdeo;
+		struct file *file;
 
-	pde_users_dec(pde);
-	return rv;
+		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+		list_del(&pdeo->lh);
+		spin_unlock(&de->pde_unload_lock);
+		file = pdeo->file;
+		de->proc_fops->release(file_inode(file), file);
+		kfree(pdeo);
+		spin_lock(&de->pde_unload_lock);
+	}
+	spin_unlock(&de->pde_unload_lock);
 }
 
-static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+/* ->read_proc() users - legacy crap */
+static ssize_t
+proc_file_read(struct file *file, char __user *buf, size_t nbytes,
+	       loff_t *ppos)
 {
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
-	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+	if (use_pde(pde)) {
+		rv = __proc_file_read(file, buf, nbytes, ppos);
+		unuse_pde(pde);
+	}
+	return rv;
+}
 
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+static loff_t
+proc_file_lseek(struct file *file, loff_t offset, int orig)
+{
+	loff_t retval = -EINVAL;
+	switch (orig) {
+	case 1:
+		offset += file->f_pos;
+	/* fallthrough */
+	case 0:
+		if (offset < 0 || offset > MAX_NON_LFS)
+			break;
+		file->f_pos = retval = offset;
 	}
-	pde->pde_users++;
-	read = pde->proc_fops->read;
-	spin_unlock(&pde->pde_unload_lock);
+	return retval;
+}
 
-	if (read)
-		rv = read(file, buf, count, ppos);
+const struct file_operations proc_file_operations = {
+	.llseek		= proc_file_lseek,
+	.read		= proc_file_read,
+};
 
-	pde_users_dec(pde);
+static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct proc_dir_entry *pde = PDE(file_inode(file));
+	loff_t rv = -EINVAL;
+	if (use_pde(pde)) {
+		loff_t (*llseek)(struct file *, loff_t, int);
+		llseek = pde->proc_fops->llseek;
+		if (!llseek)
+			llseek = default_llseek;
+		rv = llseek(file, offset, whence);
+		unuse_pde(pde);
+	}
 	return rv;
 }
 
-static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
+	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	ssize_t rv = -EIO;
-	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
-
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+	if (use_pde(pde)) {
+		read = pde->proc_fops->read;
+		if (read)
+			rv = read(file, buf, count, ppos);
+		unuse_pde(pde);
 	}
-	pde->pde_users++;
-	write = pde->proc_fops->write;
-	spin_unlock(&pde->pde_unload_lock);
-
-	if (write)
-		rv = write(file, buf, count, ppos);
+	return rv;
+}
 
-	pde_users_dec(pde);
+static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+	struct proc_dir_entry *pde = PDE(file_inode(file));
+	ssize_t rv = -EIO;
+	if (use_pde(pde)) {
+		write = pde->proc_fops->write;
+		if (write)
+			rv = write(file, buf, count, ppos);
+		unuse_pde(pde);
+	}
 	return rv;
 }
 
@@ -227,20 +269,12 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	unsigned int rv = DEFAULT_POLLMASK;
 	unsigned int (*poll)(struct file *, struct poll_table_struct *);
-
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+	if (use_pde(pde)) {
+		poll = pde->proc_fops->poll;
+		if (poll)
+			rv = poll(file, pts);
+		unuse_pde(pde);
 	}
-	pde->pde_users++;
-	poll = pde->proc_fops->poll;
-	spin_unlock(&pde->pde_unload_lock);
-
-	if (poll)
-		rv = poll(file, pts);
-
-	pde_users_dec(pde);
 	return rv;
 }
 
@@ -249,20 +283,12 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	long rv = -ENOTTY;
 	long (*ioctl)(struct file *, unsigned int, unsigned long);
-
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+	if (use_pde(pde)) {
+		ioctl = pde->proc_fops->unlocked_ioctl;
+		if (ioctl)
+			rv = ioctl(file, cmd, arg);
+		unuse_pde(pde);
 	}
-	pde->pde_users++;
-	ioctl = pde->proc_fops->unlocked_ioctl;
-	spin_unlock(&pde->pde_unload_lock);
-
-	if (ioctl)
-		rv = ioctl(file, cmd, arg);
-
-	pde_users_dec(pde);
 	return rv;
 }
 
@@ -272,20 +298,12 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	long rv = -ENOTTY;
 	long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
-
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+	if (use_pde(pde)) {
+		compat_ioctl = pde->proc_fops->compat_ioctl;
+		if (compat_ioctl)
+			rv = compat_ioctl(file, cmd, arg);
+		unuse_pde(pde);
 	}
-	pde->pde_users++;
-	compat_ioctl = pde->proc_fops->compat_ioctl;
-	spin_unlock(&pde->pde_unload_lock);
-
-	if (compat_ioctl)
-		rv = compat_ioctl(file, cmd, arg);
-
-	pde_users_dec(pde);
 	return rv;
 }
 #endif
@@ -295,20 +313,12 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
 	struct proc_dir_entry *pde = PDE(file_inode(file));
 	int rv = -EIO;
 	int (*mmap)(struct file *, struct vm_area_struct *);
-
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
-		return rv;
+	if (use_pde(pde)) {
+		mmap = pde->proc_fops->mmap;
+		if (mmap)
+			rv = mmap(file, vma);
+		unuse_pde(pde);
 	}
-	pde->pde_users++;
-	mmap = pde->proc_fops->mmap;
-	spin_unlock(&pde->pde_unload_lock);
-
-	if (mmap)
-		rv = mmap(file, vma);
-
-	pde_users_dec(pde);
 	return rv;
 }
 
@@ -334,16 +344,12 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	if (!pdeo)
 		return -ENOMEM;
 
-	spin_lock(&pde->pde_unload_lock);
-	if (!pde->proc_fops) {
-		spin_unlock(&pde->pde_unload_lock);
+	if (!use_pde(pde)) {
 		kfree(pdeo);
 		return -ENOENT;
 	}
-	pde->pde_users++;
 	open = pde->proc_fops->open;
 	release = pde->proc_fops->release;
-	spin_unlock(&pde->pde_unload_lock);
 
 	if (open)
 		rv = open(inode, file);
@@ -351,10 +357,8 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	spin_lock(&pde->pde_unload_lock);
 	if (rv == 0 && release) {
 		/* To know what to release. */
-		pdeo->inode = inode;
 		pdeo->file = file;
 		/* Strictly for "too late" ->release in proc_reg_release(). */
-		pdeo->release = release;
 		list_add(&pdeo->lh, &pde->pde_openers);
 	} else
 		kfree(pdeo);
@@ -364,12 +368,12 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 }
 
 static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
-					struct inode *inode, struct file *file)
+					struct file *file)
 {
 	struct pde_opener *pdeo;
 
 	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
-		if (pdeo->inode == inode && pdeo->file == file)
+		if (pdeo->file == file)
 			return pdeo;
 	}
 	return NULL;
@@ -383,8 +387,8 @@ static int proc_reg_release(struct inode *inode, struct file *file)
 	struct pde_opener *pdeo;
 
 	spin_lock(&pde->pde_unload_lock);
-	pdeo = find_pde_opener(pde, inode, file);
-	if (!pde->proc_fops) {
+	pdeo = find_pde_opener(pde, file);
+	if (pde->pde_users < 0) {
 		/*
 		 * Can't simply exit, __fput() will think that everything is OK,
 		 * and move on to freeing struct file. remove_proc_entry() will
@@ -396,7 +400,7 @@ static int proc_reg_release(struct inode *inode, struct file *file)
 		if (pdeo) {
 			list_del(&pdeo->lh);
 			spin_unlock(&pde->pde_unload_lock);
-			rv = pdeo->release(inode, file);
+			rv = pde->proc_fops->release(inode, file);
 			kfree(pdeo);
 		} else
 			spin_unlock(&pde->pde_unload_lock);
@@ -413,7 +417,7 @@ static int proc_reg_release(struct inode *inode, struct file *file)
 	if (release)
 		rv = release(inode, file);
 
-	pde_users_dec(pde);
+	unuse_pde(pde);
 	return rv;
 }
 
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9c93a53f371d..c43d536f93b9 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -151,12 +151,13 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 		filldir_t filldir);
 
 struct pde_opener {
-	struct inode *inode;
 	struct file *file;
-	int (*release)(struct inode *, struct file *);
 	struct list_head lh;
 };
-void pde_users_dec(struct proc_dir_entry *pde);
+
+ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *);
+extern const struct file_operations proc_file_operations;
+void proc_entry_rundown(struct proc_dir_entry *);
 
 extern spinlock_t proc_subdir_lock;
 
-- 
cgit 


From ca469f35a8e9ef12571a4b80ac6d7fdc0260fb44 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 3 Apr 2013 19:57:00 -0400
Subject: deal with races between remove_proc_entry() and proc_reg_release()

* serialize the call of ->release() on per-pdeo mutex
* don't remove pdeo from per-pde list until we are through with it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/inode.c    | 85 ++++++++++++++++++++----------------------------------
 fs/proc/internal.h |  2 ++
 2 files changed, 34 insertions(+), 53 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 0cd9d80f28e8..b5b204d6b07f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -156,6 +156,29 @@ static void unuse_pde(struct proc_dir_entry *pde)
 	spin_unlock(&pde->pde_unload_lock);
 }
 
+/* pde is locked */
+static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
+{
+	pdeo->count++;
+	if (!mutex_trylock(&pdeo->mutex)) {
+		/* somebody else is doing that, just wait */
+		spin_unlock(&pde->pde_unload_lock);
+		mutex_lock(&pdeo->mutex);
+		spin_lock(&pde->pde_unload_lock);
+		WARN_ON(!list_empty(&pdeo->lh));
+	} else {
+		struct file *file;
+		spin_unlock(&pde->pde_unload_lock);
+		file = pdeo->file;
+		pde->proc_fops->release(file_inode(file), file);
+		spin_lock(&pde->pde_unload_lock);
+		list_del_init(&pdeo->lh);
+	}
+	mutex_unlock(&pdeo->mutex);
+	if (!--pdeo->count)
+		kfree(pdeo);
+}
+
 void proc_entry_rundown(struct proc_dir_entry *de)
 {
 	spin_lock(&de->pde_unload_lock);
@@ -173,15 +196,8 @@ void proc_entry_rundown(struct proc_dir_entry *de)
 
 	while (!list_empty(&de->pde_openers)) {
 		struct pde_opener *pdeo;
-		struct file *file;
-
 		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
-		list_del(&pdeo->lh);
-		spin_unlock(&de->pde_unload_lock);
-		file = pdeo->file;
-		de->proc_fops->release(file_inode(file), file);
-		kfree(pdeo);
-		spin_lock(&de->pde_unload_lock);
+		close_pdeo(de, pdeo);
 	}
 	spin_unlock(&de->pde_unload_lock);
 }
@@ -357,6 +373,8 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	spin_lock(&pde->pde_unload_lock);
 	if (rv == 0 && release) {
 		/* To know what to release. */
+		mutex_init(&pdeo->mutex);
+		pdeo->count = 0;
 		pdeo->file = file;
 		/* Strictly for "too late" ->release in proc_reg_release(). */
 		list_add(&pdeo->lh, &pde->pde_openers);
@@ -367,58 +385,19 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	return rv;
 }
 
-static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
-					struct file *file)
-{
-	struct pde_opener *pdeo;
-
-	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
-		if (pdeo->file == file)
-			return pdeo;
-	}
-	return NULL;
-}
-
 static int proc_reg_release(struct inode *inode, struct file *file)
 {
 	struct proc_dir_entry *pde = PDE(inode);
-	int rv = 0;
-	int (*release)(struct inode *, struct file *);
 	struct pde_opener *pdeo;
-
 	spin_lock(&pde->pde_unload_lock);
-	pdeo = find_pde_opener(pde, file);
-	if (pde->pde_users < 0) {
-		/*
-		 * Can't simply exit, __fput() will think that everything is OK,
-		 * and move on to freeing struct file. remove_proc_entry() will
-		 * find slacker in opener's list and will try to do non-trivial
-		 * things with struct file. Therefore, remove opener from list.
-		 *
-		 * But if opener is removed from list, who will ->release it?
-		 */
-		if (pdeo) {
-			list_del(&pdeo->lh);
-			spin_unlock(&pde->pde_unload_lock);
-			rv = pde->proc_fops->release(inode, file);
-			kfree(pdeo);
-		} else
-			spin_unlock(&pde->pde_unload_lock);
-		return rv;
-	}
-	pde->pde_users++;
-	release = pde->proc_fops->release;
-	if (pdeo) {
-		list_del(&pdeo->lh);
-		kfree(pdeo);
+	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+		if (pdeo->file == file) {
+			close_pdeo(pde, pdeo);
+			break;
+		}
 	}
 	spin_unlock(&pde->pde_unload_lock);
-
-	if (release)
-		rv = release(inode, file);
-
-	unuse_pde(pde);
-	return rv;
+	return 0;
 }
 
 static const struct file_operations proc_reg_file_ops = {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c43d536f93b9..e2fa9345a9a8 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -153,6 +153,8 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 struct pde_opener {
 	struct file *file;
 	struct list_head lh;
+	int count;	/* number of threads in close_pdeo() */
+	struct mutex mutex;
 };
 
 ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *);
-- 
cgit 


From 05c0ae21c034a6f7c6f4c0c63a31167ebb4b061f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 4 Apr 2013 16:28:47 -0400
Subject: try a saner locking for pde_opener...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/inode.c    | 62 ++++++++++++++++++------------------------------------
 fs/proc/internal.h |  4 ++--
 2 files changed, 23 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b5b204d6b07f..3b14a45870a9 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -133,67 +133,48 @@ enum {BIAS = -1U<<31};
 
 static inline int use_pde(struct proc_dir_entry *pde)
 {
-	int res = 1;
-	spin_lock(&pde->pde_unload_lock);
-	if (unlikely(pde->pde_users < 0))
-		res = 0;
-	else
-		pde->pde_users++;
-	spin_unlock(&pde->pde_unload_lock);
-	return res;
-}
-
-static void __pde_users_dec(struct proc_dir_entry *pde)
-{
-	if (--pde->pde_users == BIAS)
-		complete(pde->pde_unload_completion);
+	return atomic_inc_unless_negative(&pde->in_use);
 }
 
 static void unuse_pde(struct proc_dir_entry *pde)
 {
-	spin_lock(&pde->pde_unload_lock);
-	__pde_users_dec(pde);
-	spin_unlock(&pde->pde_unload_lock);
+	if (atomic_dec_return(&pde->in_use) == BIAS)
+		complete(pde->pde_unload_completion);
 }
 
 /* pde is locked */
 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 {
-	pdeo->count++;
-	if (!mutex_trylock(&pdeo->mutex)) {
+	if (pdeo->closing) {
 		/* somebody else is doing that, just wait */
+		DECLARE_COMPLETION_ONSTACK(c);
+		pdeo->c = &c;
 		spin_unlock(&pde->pde_unload_lock);
-		mutex_lock(&pdeo->mutex);
+		wait_for_completion(&c);
 		spin_lock(&pde->pde_unload_lock);
-		WARN_ON(!list_empty(&pdeo->lh));
 	} else {
 		struct file *file;
+		pdeo->closing = 1;
 		spin_unlock(&pde->pde_unload_lock);
 		file = pdeo->file;
 		pde->proc_fops->release(file_inode(file), file);
 		spin_lock(&pde->pde_unload_lock);
 		list_del_init(&pdeo->lh);
-	}
-	mutex_unlock(&pdeo->mutex);
-	if (!--pdeo->count)
+		if (pdeo->c)
+			complete(pdeo->c);
 		kfree(pdeo);
+	}
 }
 
 void proc_entry_rundown(struct proc_dir_entry *de)
 {
-	spin_lock(&de->pde_unload_lock);
-	de->pde_users += BIAS;
+	DECLARE_COMPLETION_ONSTACK(c);
 	/* Wait until all existing callers into module are done. */
-	if (de->pde_users != BIAS) {
-		DECLARE_COMPLETION_ONSTACK(c);
-		de->pde_unload_completion = &c;
-		spin_unlock(&de->pde_unload_lock);
-
-		wait_for_completion(de->pde_unload_completion);
-
-		spin_lock(&de->pde_unload_lock);
-	}
+	de->pde_unload_completion = &c;
+	if (atomic_add_return(BIAS, &de->in_use) != BIAS)
+		wait_for_completion(&c);
 
+	spin_lock(&de->pde_unload_lock);
 	while (!list_empty(&de->pde_openers)) {
 		struct pde_opener *pdeo;
 		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
@@ -356,7 +337,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	 * by hand in remove_proc_entry(). For this, save opener's credentials
 	 * for later.
 	 */
-	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+	pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
 	if (!pdeo)
 		return -ENOMEM;
 
@@ -370,18 +351,17 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	if (open)
 		rv = open(inode, file);
 
-	spin_lock(&pde->pde_unload_lock);
 	if (rv == 0 && release) {
 		/* To know what to release. */
-		mutex_init(&pdeo->mutex);
-		pdeo->count = 0;
 		pdeo->file = file;
 		/* Strictly for "too late" ->release in proc_reg_release(). */
+		spin_lock(&pde->pde_unload_lock);
 		list_add(&pdeo->lh, &pde->pde_openers);
+		spin_unlock(&pde->pde_unload_lock);
 	} else
 		kfree(pdeo);
-	__pde_users_dec(pde);
-	spin_unlock(&pde->pde_unload_lock);
+
+	unuse_pde(pde);
 	return rv;
 }
 
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e2fa9345a9a8..46a7e2a7b904 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -153,8 +153,8 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
 struct pde_opener {
 	struct file *file;
 	struct list_head lh;
-	int count;	/* number of threads in close_pdeo() */
-	struct mutex mutex;
+	int closing;
+	struct completion *c;
 };
 
 ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *);
-- 
cgit 


From 3dc20cb282ec03cc4c997130d680c800011ed479 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 13 Apr 2013 20:31:37 -0400
Subject: new helper: read_code()

switch binfmts that use ->read() to that (and to kernel_read()
in several cases in binfmt_flat - sure, it's nommu, but still,
doing ->read() into kmalloc'ed buffer...)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/binfmt_aout.c      | 25 ++++++-------------------
 fs/binfmt_elf_fdpic.c |  7 ++-----
 fs/binfmt_flat.c      | 37 ++++++++++++++++++-------------------
 fs/exec.c             |  9 +++++++++
 4 files changed, 35 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index bbc8f8827eac..b23253df8756 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -287,15 +287,12 @@ static int load_aout_binary(struct linux_binprm * bprm)
 			return error;
 		}
 
-		error = bprm->file->f_op->read(bprm->file,
-			  (char __user *)text_addr,
-			  ex.a_text+ex.a_data, &pos);
+		error = read_code(bprm->file, text_addr, pos,
+				  ex.a_text+ex.a_data);
 		if ((signed long)error < 0) {
 			send_sig(SIGKILL, current, 0);
 			return error;
 		}
-			 
-		flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
 	} else {
 		if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
 		    (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
@@ -311,14 +308,9 @@ static int load_aout_binary(struct linux_binprm * bprm)
 		}
 
 		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
-			loff_t pos = fd_offset;
 			vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
-			bprm->file->f_op->read(bprm->file,
-					(char __user *)N_TXTADDR(ex),
-					ex.a_text+ex.a_data, &pos);
-			flush_icache_range((unsigned long) N_TXTADDR(ex),
-					   (unsigned long) N_TXTADDR(ex) +
-					   ex.a_text+ex.a_data);
+			read_code(bprm->file, N_TXTADDR(ex), fd_offset,
+				  ex.a_text + ex.a_data);
 			goto beyond_if;
 		}
 
@@ -397,8 +389,6 @@ static int load_aout_library(struct file *file)
 	start_addr =  ex.a_entry & 0xfffff000;
 
 	if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
-		loff_t pos = N_TXTOFF(ex);
-
 		if (printk_ratelimit())
 		{
 			printk(KERN_WARNING 
@@ -407,11 +397,8 @@ static int load_aout_library(struct file *file)
 		}
 		vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
 		
-		file->f_op->read(file, (char __user *)start_addr,
-			ex.a_text + ex.a_data, &pos);
-		flush_icache_range((unsigned long) start_addr,
-				   (unsigned long) start_addr + ex.a_text + ex.a_data);
-
+		read_code(file, start_addr, N_TXTOFF(ex),
+			  ex.a_text + ex.a_data);
 		retval = 0;
 		goto out;
 	}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9c13e023e2b7..2711d9901632 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -926,7 +926,6 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 	struct elf32_fdpic_loadseg *seg;
 	struct elf32_phdr *phdr;
 	unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
-	loff_t fpos;
 	int loop, ret;
 
 	load_addr = params->load_addr;
@@ -964,14 +963,12 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 		if (params->phdrs[loop].p_type != PT_LOAD)
 			continue;
 
-		fpos = phdr->p_offset;
-
 		seg->addr = maddr + (phdr->p_vaddr - base);
 		seg->p_vaddr = phdr->p_vaddr;
 		seg->p_memsz = phdr->p_memsz;
 
-		ret = file->f_op->read(file, (void *) seg->addr,
-				       phdr->p_filesz, &fpos);
+		ret = read_code(file, seg->addr, phdr->p_offset,
+				       phdr->p_filesz);
 		if (ret < 0)
 			return ret;
 
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2036d21baaef..d50bbe59da1e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -207,11 +207,12 @@ static int decompress_exec(
 
 	/* Read in first chunk of data and parse gzip header. */
 	fpos = offset;
-	ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
+	ret = kernel_read(bprm->file, offset, buf, LBUFSIZE);
 
 	strm.next_in = buf;
 	strm.avail_in = ret;
 	strm.total_in = 0;
+	fpos += ret;
 
 	retval = -ENOEXEC;
 
@@ -277,7 +278,7 @@ static int decompress_exec(
 	}
 
 	while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) {
-		ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
+		ret = kernel_read(bprm->file, fpos, buf, LBUFSIZE);
 		if (ret <= 0)
 			break;
 		len -= ret;
@@ -285,6 +286,7 @@ static int decompress_exec(
 		strm.next_in = buf;
 		strm.avail_in = ret;
 		strm.total_in = 0;
+		fpos += ret;
 	}
 
 	if (ret < 0) {
@@ -428,6 +430,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 	unsigned long textpos = 0, datapos = 0, result;
 	unsigned long realdatastart = 0;
 	unsigned long text_len, data_len, bss_len, stack_len, flags;
+	unsigned long full_data;
 	unsigned long len, memp = 0;
 	unsigned long memp_size, extra, rlim;
 	unsigned long *reloc = 0, *rp;
@@ -451,6 +454,7 @@ static int load_flat_file(struct linux_binprm * bprm,
 	relocs    = ntohl(hdr->reloc_count);
 	flags     = ntohl(hdr->flags);
 	rev       = ntohl(hdr->rev);
+	full_data = data_len + relocs * sizeof(unsigned long);
 
 	if (strncmp(hdr->magic, "bFLT", 4)) {
 		/*
@@ -577,12 +581,12 @@ static int load_flat_file(struct linux_binprm * bprm,
 #ifdef CONFIG_BINFMT_ZFLAT
 		if (flags & FLAT_FLAG_GZDATA) {
 			result = decompress_exec(bprm, fpos, (char *) datapos, 
-						 data_len + (relocs * sizeof(unsigned long)), 0);
+						 full_data, 0);
 		} else
 #endif
 		{
-			result = bprm->file->f_op->read(bprm->file, (char *) datapos,
-					data_len + (relocs * sizeof(unsigned long)), &fpos);
+			result = read_code(bprm->file, datapos, fpos,
+					full_data);
 		}
 		if (IS_ERR_VALUE(result)) {
 			printk("Unable to read data+bss, errno %d\n", (int)-result);
@@ -627,30 +631,25 @@ static int load_flat_file(struct linux_binprm * bprm,
 		if (flags & FLAT_FLAG_GZIP) {
 			result = decompress_exec(bprm, sizeof (struct flat_hdr),
 					 (((char *) textpos) + sizeof (struct flat_hdr)),
-					 (text_len + data_len + (relocs * sizeof(unsigned long))
+					 (text_len + full_data
 						  - sizeof (struct flat_hdr)),
 					 0);
 			memmove((void *) datapos, (void *) realdatastart,
-					data_len + (relocs * sizeof(unsigned long)));
+					full_data);
 		} else if (flags & FLAT_FLAG_GZDATA) {
-			fpos = 0;
-			result = bprm->file->f_op->read(bprm->file,
-					(char *) textpos, text_len, &fpos);
+			result = read_code(bprm->file, textpos, 0, text_len);
 			if (!IS_ERR_VALUE(result))
 				result = decompress_exec(bprm, text_len, (char *) datapos,
-						 data_len + (relocs * sizeof(unsigned long)), 0);
+						 full_data, 0);
 		}
 		else
 #endif
 		{
-			fpos = 0;
-			result = bprm->file->f_op->read(bprm->file,
-					(char *) textpos, text_len, &fpos);
-			if (!IS_ERR_VALUE(result)) {
-				fpos = ntohl(hdr->data_start);
-				result = bprm->file->f_op->read(bprm->file, (char *) datapos,
-					data_len + (relocs * sizeof(unsigned long)), &fpos);
-			}
+			result = read_code(bprm->file, textpos, 0, text_len);
+			if (!IS_ERR_VALUE(result))
+				result = read_code(bprm->file, datapos,
+						   ntohl(hdr->data_start),
+						   full_data);
 		}
 		if (IS_ERR_VALUE(result)) {
 			printk("Unable to read code+data+bss, errno %d\n",(int)-result);
diff --git a/fs/exec.c b/fs/exec.c
index a96a4885bbbf..77dc9096440f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -802,6 +802,15 @@ int kernel_read(struct file *file, loff_t offset,
 
 EXPORT_SYMBOL(kernel_read);
 
+ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
+{
+	ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+	if (res > 0)
+		flush_icache_range(addr, addr + len);
+	return res;
+}
+EXPORT_SYMBOL(read_code);
+
 static int exec_mmap(struct mm_struct *mm)
 {
 	struct task_struct *tsk;
-- 
cgit 


From b5edfd27699de420f3af2c34fc7ad9686f169933 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Apr 2013 15:34:01 -0400
Subject: hppfs: fix the leaks on close()

we need to close the underlying procfs file and free ->private_data

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hppfs/hppfs.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 126d3c2e2dee..8ef57793c923 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -436,7 +436,6 @@ static int hppfs_open(struct inode *inode, struct file *file)
 	path.mnt = inode->i_sb->s_fs_info;
 	path.dentry = HPPFS_I(inode)->proc_dentry;
 
-	/* XXX This isn't closed anywhere */
 	data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
 	err = PTR_ERR(data->proc_file);
 	if (IS_ERR(data->proc_file))
@@ -523,12 +522,23 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
 	return default_llseek(file, off, where);
 }
 
+static int hppfs_release(struct inode *inode, struct file *file)
+{
+	struct hppfs_private *data = file->private_data;
+	struct file *proc_file = data->proc_file;
+	if (proc_file)
+		fput(proc_file);
+	kfree(data);
+	return 0;
+}
+
 static const struct file_operations hppfs_file_fops = {
 	.owner		= NULL,
 	.llseek		= hppfs_llseek,
 	.read		= hppfs_read,
 	.write		= hppfs_write,
 	.open		= hppfs_open,
+	.release	= hppfs_release,
 };
 
 struct hppfs_dirent {
@@ -582,6 +592,7 @@ static const struct file_operations hppfs_dir_fops = {
 	.open		= hppfs_dir_open,
 	.fsync		= hppfs_fsync,
 	.llseek		= default_llseek,
+	.release	= hppfs_release,
 };
 
 static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
-- 
cgit 


From 79d0a3e399576c380787be5dd36be51de763af62 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Apr 2013 15:35:44 -0400
Subject: hppfs: get rid of ->fsync()

it has grown by accident - directories there do *not* use page cache, so
there's nothing to write.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hppfs/hppfs.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 8ef57793c923..cd3e38972c86 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -580,17 +580,10 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	return err;
 }
 
-static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
-		       int datasync)
-{
-	return filemap_write_and_wait_range(file->f_mapping, start, end);
-}
-
 static const struct file_operations hppfs_dir_fops = {
 	.owner		= NULL,
 	.readdir	= hppfs_readdir,
 	.open		= hppfs_dir_open,
-	.fsync		= hppfs_fsync,
 	.llseek		= default_llseek,
 	.release	= hppfs_release,
 };
-- 
cgit 


From f269cad7f4bb19d4146fc1decc51f3da88257ffc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Apr 2013 20:01:50 -0400
Subject: fanotify: don't wank with FASYNC on ->release()

... it's done already by __fput()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/inotify/inotify_user.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e0f7c1241a6a..1db6d886cbf2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -287,9 +287,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
 
 	pr_debug("%s: group=%p\n", __func__, group);
 
-	if (file->f_flags & FASYNC)
-		fsnotify_fasync(-1, file, 0);
-
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
 	fsnotify_destroy_group(group);
 
-- 
cgit 


From 3cb5bf1bf947d325fcf6e9458952b51cfd7e6677 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Apr 2013 03:20:50 +0100
Subject: proc: Delete create_proc_read_entry()

Delete create_proc_read_entry() as it no longer has any users.

Also delete read_proc_t, write_proc_t, the read_proc member of the
proc_dir_entry struct and the support functions that use them.  This saves a
pointer for every PDE allocated.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c  | 168 +----------------------------------------------------
 fs/proc/inode.c    |  35 -----------
 fs/proc/internal.h |   2 -
 3 files changed, 1 insertion(+), 204 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index bec58323629c..1c07cadeb8db 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -36,141 +36,6 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
 	return !memcmp(name, de->name, len);
 }
 
-/* buffer size is one page but our output routines use some slack for overruns */
-#define PROC_BLOCK_SIZE	(PAGE_SIZE - 1024)
-
-ssize_t
-__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
-	       loff_t *ppos)
-{
-	struct inode * inode = file_inode(file);
-	char 	*page;
-	ssize_t	retval=0;
-	int	eof=0;
-	ssize_t	n, count;
-	char	*start;
-	struct proc_dir_entry * dp;
-	unsigned long long pos;
-
-	/*
-	 * Gaah, please just use "seq_file" instead. The legacy /proc
-	 * interfaces cut loff_t down to off_t for reads, and ignore
-	 * the offset entirely for writes..
-	 */
-	pos = *ppos;
-	if (pos > MAX_NON_LFS)
-		return 0;
-	if (nbytes > MAX_NON_LFS - pos)
-		nbytes = MAX_NON_LFS - pos;
-
-	dp = PDE(inode);
-	if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
-		return -ENOMEM;
-
-	while ((nbytes > 0) && !eof) {
-		count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
-
-		start = NULL;
-		if (!dp->read_proc)
-			break;
-
-		/* How to be a proc read function
-		 * ------------------------------
-		 * Prototype:
-		 *    int f(char *buffer, char **start, off_t offset,
-		 *          int count, int *peof, void *dat)
-		 *
-		 * Assume that the buffer is "count" bytes in size.
-		 *
-		 * If you know you have supplied all the data you have, set
-		 * *peof.
-		 *
-		 * You have three ways to return data:
-		 *
-		 * 0) Leave *start = NULL.  (This is the default.)  Put the
-		 *    data of the requested offset at that offset within the
-		 *    buffer.  Return the number (n) of bytes there are from
-		 *    the beginning of the buffer up to the last byte of data.
-		 *    If the number of supplied bytes (= n - offset) is greater
-		 *    than zero and you didn't signal eof and the reader is
-		 *    prepared to take more data you will be called again with
-		 *    the requested offset advanced by the number of bytes
-		 *    absorbed.  This interface is useful for files no larger
-		 *    than the buffer.
-		 *
-		 * 1) Set *start = an unsigned long value less than the buffer
-		 *    address but greater than zero.  Put the data of the
-		 *    requested offset at the beginning of the buffer.  Return
-		 *    the number of bytes of data placed there.  If this number
-		 *    is greater than zero and you didn't signal eof and the
-		 *    reader is prepared to take more data you will be called
-		 *    again with the requested offset advanced by *start.  This
-		 *    interface is useful when you have a large file consisting
-		 *    of a series of blocks which you want to count and return
-		 *    as wholes.
-		 *    (Hack by Paul.Russell@rustcorp.com.au)
-		 *
-		 * 2) Set *start = an address within the buffer.  Put the data
-		 *    of the requested offset at *start.  Return the number of
-		 *    bytes of data placed there.  If this number is greater
-		 *    than zero and you didn't signal eof and the reader is
-		 *    prepared to take more data you will be called again with
-		 *    the requested offset advanced by the number of bytes
-		 *    absorbed.
-		 */
-		n = dp->read_proc(page, &start, *ppos, count, &eof, dp->data);
-
-		if (n == 0)   /* end of file */
-			break;
-		if (n < 0) {  /* error */
-			if (retval == 0)
-				retval = n;
-			break;
-		}
-
-		if (start == NULL) {
-			if (n > PAGE_SIZE)	/* Apparent buffer overflow */
-				n = PAGE_SIZE;
-			n -= *ppos;
-			if (n <= 0)
-				break;
-			if (n > count)
-				n = count;
-			start = page + *ppos;
-		} else if (start < page) {
-			if (n > PAGE_SIZE)	/* Apparent buffer overflow */
-				n = PAGE_SIZE;
-			if (n > count) {
-				/*
-				 * Don't reduce n because doing so might
-				 * cut off part of a data block.
-				 */
-				pr_warn("proc_file_read: count exceeded\n");
-			}
-		} else /* start >= page */ {
-			unsigned long startoff = (unsigned long)(start - page);
-			if (n > (PAGE_SIZE - startoff))	/* buffer overflow? */
-				n = PAGE_SIZE - startoff;
-			if (n > count)
-				n = count;
-		}
-		
- 		n -= copy_to_user(buf, start < page ? page : start, n);
-		if (n == 0) {
-			if (retval == 0)
-				retval = -EFAULT;
-			break;
-		}
-
-		*ppos += start < page ? (unsigned long)start : n;
-		nbytes -= n;
-		buf += n;
-		retval += n;
-	}
-	free_page((unsigned long) page);
-	return retval;
-}
-
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -476,8 +341,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 	} else if (S_ISLNK(dp->mode)) {
 		dp->proc_iops = &proc_link_inode_operations;
 	} else if (S_ISREG(dp->mode)) {
-		if (dp->proc_fops == NULL)
-			dp->proc_fops = &proc_file_operations;
+		BUG_ON(dp->proc_fops == NULL);
 		dp->proc_iops = &proc_file_inode_operations;
 	} else {
 		WARN_ON(1);
@@ -604,36 +468,6 @@ struct proc_dir_entry *proc_mkdir(const char *name,
 }
 EXPORT_SYMBOL(proc_mkdir);
 
-struct proc_dir_entry *create_proc_read_entry(
-	const char *name, umode_t mode, struct proc_dir_entry *parent, 
-	read_proc_t *read_proc, void *data)
-{
-	struct proc_dir_entry *ent;
-
-	if ((mode & S_IFMT) == 0)
-		mode |= S_IFREG;
-
-	if (!S_ISREG(mode)) {
-		WARN_ON(1);	/* use proc_mkdir(), damnit */
-		return NULL;
-	}
-
-	if ((mode & S_IALLUGO) == 0)
-		mode |= S_IRUGO;
-
-	ent = __proc_create(&parent, name, mode, 1);
-	if (ent) {
-		ent->read_proc = read_proc;
-		ent->data = data;
-		if (proc_register(parent, ent) < 0) {
-			kfree(ent);
-			ent = NULL;
-		}
-	}
-	return ent;
-}
-EXPORT_SYMBOL(create_proc_read_entry);
-
 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 					struct proc_dir_entry *parent,
 					const struct file_operations *proc_fops,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3b14a45870a9..d50224c70215 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -183,41 +183,6 @@ void proc_entry_rundown(struct proc_dir_entry *de)
 	spin_unlock(&de->pde_unload_lock);
 }
 
-/* ->read_proc() users - legacy crap */
-static ssize_t
-proc_file_read(struct file *file, char __user *buf, size_t nbytes,
-	       loff_t *ppos)
-{
-	struct proc_dir_entry *pde = PDE(file_inode(file));
-	ssize_t rv = -EIO;
-	if (use_pde(pde)) {
-		rv = __proc_file_read(file, buf, nbytes, ppos);
-		unuse_pde(pde);
-	}
-	return rv;
-}
-
-static loff_t
-proc_file_lseek(struct file *file, loff_t offset, int orig)
-{
-	loff_t retval = -EINVAL;
-	switch (orig) {
-	case 1:
-		offset += file->f_pos;
-	/* fallthrough */
-	case 0:
-		if (offset < 0 || offset > MAX_NON_LFS)
-			break;
-		file->f_pos = retval = offset;
-	}
-	return retval;
-}
-
-const struct file_operations proc_file_operations = {
-	.llseek		= proc_file_lseek,
-	.read		= proc_file_read,
-};
-
 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct proc_dir_entry *pde = PDE(file_inode(file));
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 46a7e2a7b904..4b13417acfc4 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -157,8 +157,6 @@ struct pde_opener {
 	struct completion *c;
 };
 
-ssize_t __proc_file_read(struct file *, char __user *, size_t, loff_t *);
-extern const struct file_operations proc_file_operations;
 void proc_entry_rundown(struct proc_dir_entry *);
 
 extern spinlock_t proc_subdir_lock;
-- 
cgit 


From 0d01ff2583086fd532181d2ee16112f5342eb78d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Apr 2013 23:51:01 +0100
Subject: Include missing linux/slab.h inclusions

Include missing linux/slab.h inclusions where the source file is currently
expecting to get kmalloc() and co. through linux/proc_fs.h.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: linux-s390@vger.kernel.org
cc: sparclinux@vger.kernel.org
cc: linux-efi@vger.kernel.org
cc: linux-mtd@lists.infradead.org
cc: devel@driverdev.osuosl.org
cc: x86@kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/self.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/proc/self.c b/fs/proc/self.c
index 21940d89977e..6b6a993b5c25 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,5 +1,6 @@
 #include <linux/sched.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/pid_namespace.h>
 #include "internal.h"
 
-- 
cgit 


From 303eb7e2c982fda734455f068633241db89d3175 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Apr 2013 23:55:54 +0100
Subject: Include missing linux/magic.h inclusions

Include missing linux/magic.h inclusions where the source file is currently
expecting to get magic numbers through linux/proc_fs.h.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-efi@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d50224c70215..bd2f76427fec 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 
-- 
cgit 


From 2f96b8c1d5d492c1d0457b253015330f844136f6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 00:10:25 +0100
Subject: proc: Split kcore bits from linux/procfs.h into linux/kcore.h

Split kcore bits from linux/procfs.h into linux/kcore.h.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
cc: linux-mips@linux-mips.org
cc: sparclinux@vger.kernel.org
cc: x86@kernel.org
cc: linux-mm@kvack.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/kcore.c  | 1 +
 fs/proc/vmcore.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index eda6f017f272..8e6ce830de44 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -11,6 +11,7 @@
 
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <linux/user.h>
 #include <linux/capability.h>
 #include <linux/elf.h>
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b870f740ab5a..38edddc25816 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/mm.h>
-#include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include "internal.h"
 
 /* List representing chunks of contiguous memory areas and their offsets in
  * vmcore file.
-- 
cgit 


From 271a15eabe094538d958dc68ccfc9c36b699247a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 00:38:51 +0100
Subject: proc: Supply PDE attribute setting accessor functions

Supply accessor functions to set attributes in proc_dir_entry structs.

The following are supplied: proc_set_size() and proc_set_user().

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
cc: linuxppc-dev@lists.ozlabs.org
cc: linux-media@vger.kernel.org
cc: netdev@vger.kernel.org
cc: linux-wireless@vger.kernel.org
cc: linux-pci@vger.kernel.org
cc: netfilter-devel@vger.kernel.org
cc: alsa-devel@alsa-project.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 1c07cadeb8db..5f6f6c38701f 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -498,6 +498,19 @@ out:
 	return NULL;
 }
 EXPORT_SYMBOL(proc_create_data);
+ 
+void proc_set_size(struct proc_dir_entry *de, loff_t size)
+{
+	de->size = size;
+}
+EXPORT_SYMBOL(proc_set_size);
+
+void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
+{
+	de->uid = uid;
+	de->gid = gid;
+}
+EXPORT_SYMBOL(proc_set_user);
 
 static void free_proc_entry(struct proc_dir_entry *de)
 {
-- 
cgit 


From 1dd704b6175f067781807ad4da1b878357dc9755 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 01:08:50 +0100
Subject: proc: Uninline pid_delete_dentry()

Uninline pid_delete_dentry() as it's only used by three function pointers.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c     |  9 +++++++++
 fs/proc/internal.h | 14 +++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 593e7c5ddb49..f2637c972160 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1621,6 +1621,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
 	return 0;
 }
 
+int pid_delete_dentry(const struct dentry *dentry)
+{
+	/* Is the task we represent dead?
+	 * If so, then don't put the dentry on the lru list,
+	 * kill it immediately.
+	 */
+	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
 const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 4b13417acfc4..aaf2dd8c2b10 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -114,15 +114,6 @@ static inline int task_dumpable(struct task_struct *task)
 	return 0;
 }
 
-static inline int pid_delete_dentry(const struct dentry * dentry)
-{
-	/* Is the task we represent dead?
-	 * If so, then don't put the dentry on the lru list,
-	 * kill it immediately.
-	 */
-	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
 static inline unsigned name_to_int(struct dentry *dentry)
 {
 	const char *name = dentry->d_name.name;
@@ -145,6 +136,11 @@ out:
 	return ~0U;
 }
 
+/*
+ * base.c
+ */
+extern int pid_delete_dentry(const struct dentry *);
+
 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
-- 
cgit 


From c3bef7bcaaa7d9f6704fcd81a171c9f0c91a2259 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 01:42:56 +0100
Subject: proc: Move proc_fd() to fs/proc/fd.h

Move proc_fd() to fs/proc/fd.h.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/fd.h       | 5 +++++
 fs/proc/internal.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index cbb1d47deda8..7c047f256ae2 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -11,4 +11,9 @@ extern const struct inode_operations proc_fdinfo_inode_operations;
 
 extern int proc_fd_permission(struct inode *inode, int mask);
 
+static inline int proc_fd(struct inode *inode)
+{
+	return PROC_I(inode)->fd;
+}
+
 #endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aaf2dd8c2b10..32d8f510d65c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -94,11 +94,6 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
 	return get_pid_task(proc_pid(inode), PIDTYPE_PID);
 }
 
-static inline int proc_fd(struct inode *inode)
-{
-	return PROC_I(inode)->fd;
-}
-
 static inline int task_dumpable(struct task_struct *task)
 {
 	int dumpable = 0;
-- 
cgit 


From 0bb80f240520c4148b623161e7856858c021696d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 01:50:06 +0100
Subject: proc: Split the namespace stuff out into linux/proc_ns.h

Split the proc namespace stuff out into linux/proc_ns.h.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: netdev@vger.kernel.org
cc: Serge E. Hallyn <serge.hallyn@ubuntu.com>
cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c       |  6 +++---
 fs/proc/inode.c      |  8 ++++----
 fs/proc/namespaces.c | 17 +++++++++++------
 3 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index ed0708f2415f..0f0cf9379c9e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -21,7 +21,7 @@
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
 #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
-#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -1350,13 +1350,13 @@ static bool mnt_ns_loop(struct path *path)
 	 * mount namespace loop?
 	 */
 	struct inode *inode = path->dentry->d_inode;
-	struct proc_inode *ei;
+	struct proc_ns *ei;
 	struct mnt_namespace *mnt_ns;
 
 	if (!proc_ns_inode(inode))
 		return false;
 
-	ei = PROC_I(inode);
+	ei = get_proc_ns(inode);
 	if (ei->ns_ops != &mntns_operations)
 		return false;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bd2f76427fec..073aea60cf8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -51,8 +51,8 @@ static void proc_evict_inode(struct inode *inode)
 		sysctl_head_put(head);
 	}
 	/* Release any associated namespace */
-	ns_ops = PROC_I(inode)->ns_ops;
-	ns = PROC_I(inode)->ns;
+	ns_ops = PROC_I(inode)->ns.ns_ops;
+	ns = PROC_I(inode)->ns.ns;
 	if (ns_ops && ns)
 		ns_ops->put(ns);
 }
@@ -73,8 +73,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	ei->pde = NULL;
 	ei->sysctl = NULL;
 	ei->sysctl_entry = NULL;
-	ei->ns = NULL;
-	ei->ns_ops = NULL;
+	ei->ns.ns = NULL;
+	ei->ns.ns_ops = NULL;
 	inode = &ei->vfs_inode;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	return inode;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 66b51c0383da..54bdc6701e9f 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -51,7 +51,7 @@ static int ns_delete_dentry(const struct dentry *dentry)
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct inode *inode = dentry->d_inode;
-	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
 
 	return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
 		ns_ops->name, inode->i_ino);
@@ -95,8 +95,8 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 		inode->i_op = &ns_inode_operations;
 		inode->i_mode = S_IFREG | S_IRUGO;
 		inode->i_fop = &ns_file_operations;
-		ei->ns_ops = ns_ops;
-		ei->ns = ns;
+		ei->ns.ns_ops = ns_ops;
+		ei->ns.ns = ns;
 		unlock_new_inode(inode);
 	} else {
 		ns_ops->put(ns);
@@ -128,7 +128,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!ptrace_may_access(task, PTRACE_MODE_READ))
 		goto out_put_task;
 
-	ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+	ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
 	if (IS_ERR(ns_path.dentry)) {
 		error = ERR_CAST(ns_path.dentry);
 		goto out_put_task;
@@ -148,7 +148,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
 {
 	struct inode *inode = dentry->d_inode;
 	struct proc_inode *ei = PROC_I(inode);
-	const struct proc_ns_operations *ns_ops = ei->ns_ops;
+	const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
 	struct task_struct *task;
 	void *ns;
 	char name[50];
@@ -202,7 +202,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
 	ei = PROC_I(inode);
 	inode->i_mode = S_IFLNK|S_IRWXUGO;
 	inode->i_op = &proc_ns_link_inode_operations;
-	ei->ns_ops = ns_ops;
+	ei->ns.ns_ops = ns_ops;
 
 	d_set_d_op(dentry, &pid_dentry_operations);
 	d_add(dentry, inode);
@@ -337,6 +337,11 @@ out_invalid:
 	return ERR_PTR(-EINVAL);
 }
 
+struct proc_ns *get_proc_ns(struct inode *inode)
+{
+	return &PROC_I(inode)->ns;
+}
+
 bool proc_ns_inode(struct inode *inode)
 {
 	return inode->i_fop == &ns_file_operations;
-- 
cgit 


From 4abfd0298900851930310e5d736a7f3a105089ec Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 02:09:03 +0100
Subject: proc: Move PDE_NET() to fs/proc/proc_net.c

Move PDE_NET() to fs/proc/proc_net.c as that's where the only user is.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/proc_net.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b4ac6572474f..986e83220d56 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -26,6 +26,10 @@
 
 #include "internal.h"
 
+static inline struct net *PDE_NET(struct proc_dir_entry *pde)
+{
+	return pde->parent->data;
+}
 
 static struct net *get_proc_net(const struct inode *inode)
 {
-- 
cgit 


From 34db8aaf0f95ffac407d39da22972b38da631db4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 02:29:19 +0100
Subject: proc: Move some bits from linux/proc_fs.h to
 linux/{of.h,signal.h,tty.h}

Move some bits from linux/proc_fs.h to linux/of.h, signal.h and tty.h.

Also move proc_tty_init() and proc_device_tree_init() to fs/proc/internal.h as
they're internal to procfs.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
cc: devicetree-discuss@lists.ozlabs.org
cc: linux-arch@vger.kernel.org
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Jri Slaby <jslaby@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/internal.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 32d8f510d65c..c529b5f16ee4 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -198,3 +198,19 @@ extern const struct inode_operations proc_ns_dir_inode_operations;
 extern const struct file_operations proc_ns_dir_operations;
 
 extern int proc_setup_self(struct super_block *);
+
+/*
+ * proc_devtree.c
+ */
+#ifdef CONFIG_PROC_DEVICETREE
+extern void proc_device_tree_init(void);
+#endif /* CONFIG_PROC_DEVICETREE */
+
+/*
+ * proc_tty.c
+ */
+#ifdef CONFIG_TTY
+extern void proc_tty_init(void);
+#else
+static inline void proc_tty_init(void) {}
+#endif
-- 
cgit 


From 270b5ac2151707c25d3327722c5badfbd95945bc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 02:48:30 +0100
Subject: proc: Add proc_mkdir_data()

Add proc_mkdir_data() to allow procfs directories to be created that are
annotated at the time of creation with private data rather than doing this
post-creation.  This means no access is then required to the proc_dir_entry
struct to set this.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Neela Syam Kolli <megaraidlinux@lsi.com>
cc: Jerry Chuang <jerry-chuang@realtek.com>
cc: linux-scsi@vger.kernel.org
cc: devel@driverdev.osuosl.org
cc: linux-wireless@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c    | 30 ++++++++++++------------------
 fs/reiserfs/procfs.c |  3 +--
 2 files changed, 13 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5f6f6c38701f..4074da57c99e 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -428,13 +428,17 @@ struct proc_dir_entry *proc_symlink(const char *name,
 }
 EXPORT_SYMBOL(proc_symlink);
 
-struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
-		struct proc_dir_entry *parent)
+struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
+		struct proc_dir_entry *parent, void *data)
 {
 	struct proc_dir_entry *ent;
 
+	if (mode == 0)
+		mode = S_IRUGO | S_IXUGO;
+
 	ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
 	if (ent) {
+		ent->data = data;
 		if (proc_register(parent, ent) < 0) {
 			kfree(ent);
 			ent = NULL;
@@ -442,29 +446,19 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
 	}
 	return ent;
 }
-EXPORT_SYMBOL(proc_mkdir_mode);
+EXPORT_SYMBOL_GPL(proc_mkdir_data);
 
-struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
-		struct proc_dir_entry *parent)
+struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
+				       struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry *ent;
-
-	ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
-	if (ent) {
-		ent->data = net;
-		if (proc_register(parent, ent) < 0) {
-			kfree(ent);
-			ent = NULL;
-		}
-	}
-	return ent;
+	return proc_mkdir_data(name, mode, parent, NULL);
 }
-EXPORT_SYMBOL_GPL(proc_net_mkdir);
+EXPORT_SYMBOL(proc_mkdir_mode);
 
 struct proc_dir_entry *proc_mkdir(const char *name,
 		struct proc_dir_entry *parent)
 {
-	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
+	return proc_mkdir_data(name, 0, parent, NULL);
 }
 EXPORT_SYMBOL(proc_mkdir);
 
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 274adea363ff..07c2162ef556 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -479,9 +479,8 @@ int reiserfs_proc_info_init(struct super_block *sb)
 		*s = '!';
 
 	spin_lock_init(&__PINFO(sb).lock);
-	REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
+	REISERFS_SB(sb)->procdir = proc_mkdir_data(b, 0, proc_info_root, sb);
 	if (REISERFS_SB(sb)->procdir) {
-		REISERFS_SB(sb)->procdir->data = sb;
 		add_file(sb, "version", show_version);
 		add_file(sb, "super", show_super);
 		add_file(sb, "per-level", show_per_level);
-- 
cgit 


From 4a520d2769beb736ba2bd084b8293ce148a1a7ae Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 14:06:01 +0100
Subject: proc: Supply an accessor for getting the data from a PDE's parent

Supply an accessor function for getting the private data from the parent
proc_dir_entry struct of the proc_dir_entry struct associated with an inode.

ReiserFS, for instance, stores the super_block pointer in the proc directory
it makes for that super_block, and a pointer to the respective seq_file show
function in each of the proc files in that directory.

This allows a reduction in the number of file_operations structs, open
functions and seq_operations structs required.  The problem otherwise is that
each show function requires two pieces of data but only has storage for one
per PDE (and this has no release function).

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Jerry Chuang <jerry-chuang@realtek.com>
cc: Maxim Mikityanskiy <maxtram95@gmail.com>
cc: YAMANE Toshiaki <yamanetoshi@gmail.com>
cc: linux-wireless@vger.kernel.org
cc: linux-scsi@vger.kernel.org
cc: devel@driverdev.osuosl.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4074da57c99e..75e08d36b2f1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -617,3 +617,10 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 	return 0;
 }
 EXPORT_SYMBOL(remove_proc_subtree);
+
+void *proc_get_parent_data(const struct inode *inode)
+{
+	struct proc_dir_entry *de = PDE(inode);
+	return de->parent->data;
+}
+EXPORT_SYMBOL_GPL(proc_get_parent_data);
-- 
cgit 


From e42270a19e357d7808890bdbeb0cae97f2a2d234 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 11:17:06 +0100
Subject: reiserfs: Don't access the proc_dir_entry in r_open(), r_start()
 r_show()

Don't access the proc_dir_entry in ReiserFS's r_open(), r_start() r_show()
procfs interface functions.

ReiserFS stores the ->show() method pointer in PDE->data and the super_block
pointer in PDE->parent->data.  This isn't changing.

Currently, ReiserFS passes the PDE pointer into seq_file::private from
r_open() so that r_start() and r_show() can then access it.  Instead, use
seq_open_private() to allocate a two-pointer struct that's passed through
seq_file::private and put the ->show() method and the sb pointers in there.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: reiserfs-devel@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/procfs.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 07c2162ef556..33532f79b4f7 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -394,20 +394,24 @@ static int set_sb(struct super_block *sb, void *data)
 	return -ENOENT;
 }
 
+struct reiserfs_seq_private {
+	struct super_block *sb;
+	int (*show) (struct seq_file *, struct super_block *);
+};
+
 static void *r_start(struct seq_file *m, loff_t * pos)
 {
-	struct proc_dir_entry *de = m->private;
-	struct super_block *s = de->parent->data;
+	struct reiserfs_seq_private *priv = m->private;
 	loff_t l = *pos;
 
 	if (l)
 		return NULL;
 
-	if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s)))
+	if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
 		return NULL;
 
-	up_write(&s->s_umount);
-	return s;
+	up_write(&priv->sb->s_umount);
+	return priv->sb;
 }
 
 static void *r_next(struct seq_file *m, void *v, loff_t * pos)
@@ -426,9 +430,8 @@ static void r_stop(struct seq_file *m, void *v)
 
 static int r_show(struct seq_file *m, void *v)
 {
-	struct proc_dir_entry *de = m->private;
-	int (*show) (struct seq_file *, struct super_block *) = de->data;
-	return show(m, v);
+	struct reiserfs_seq_private *priv = m->private;
+	return priv->show(m, v);
 }
 
 static const struct seq_operations r_ops = {
@@ -440,11 +443,15 @@ static const struct seq_operations r_ops = {
 
 static int r_open(struct inode *inode, struct file *file)
 {
-	int ret = seq_open(file, &r_ops);
+	struct reiserfs_seq_private *priv;
+	int ret = seq_open_private(file, &r_ops,
+				   sizeof(struct reiserfs_seq_private));
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
-		m->private = PDE(inode);
+		priv = m->private;
+		priv->sb = proc_get_parent_data(inode);
+		priv->show = PDE_DATA(inode);
 	}
 	return ret;
 }
@@ -453,7 +460,7 @@ static const struct file_operations r_file_operations = {
 	.open = r_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
 	.owner = THIS_MODULE,
 };
 
-- 
cgit 


From 8d8b97ba499cb69fccb5fd9f2b439e3265fc3f27 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 19 Apr 2013 23:11:24 -0400
Subject: take cgroup_open() and cpuset_open() to fs/proc/base.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f2637c972160..8281986693be 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -404,6 +404,37 @@ static const struct file_operations proc_lstats_operations = {
 
 #endif
 
+#ifdef CONFIG_CGROUPS
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+	struct pid *pid = PROC_I(inode)->pid;
+	return single_open(file, proc_cgroup_show, pid);
+}
+
+static const struct file_operations proc_cgroup_operations = {
+	.open		= cgroup_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
+#ifdef CONFIG_PROC_PID_CPUSET
+
+static int cpuset_open(struct inode *inode, struct file *file)
+{
+	struct pid *pid = PROC_I(inode)->pid;
+	return single_open(file, proc_cpuset_show, pid);
+}
+
+static const struct file_operations proc_cpuset_operations = {
+	.open		= cpuset_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
 static int proc_oom_score(struct task_struct *task, char *buffer)
 {
 	unsigned long totalpages = totalram_pages + total_swap_pages;
-- 
cgit 


From a8ca16ea7b0abb0a7e49492d1123b715f0ec62e8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 17:27:28 +0100
Subject: proc: Supply a function to remove a proc entry by PDE

Supply a function (proc_remove()) to remove a proc entry (and any subtree
rooted there) by proc_dir_entry pointer rather than by name and (optionally)
root dir entry pointer.  This allows us to eliminate all remaining pde->name
accesses outside of procfs.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Grant Likely <grant.likely@linaro.or>
cc: linux-acpi@vger.kernel.org
cc: openipmi-developer@lists.sourceforge.net
cc: devicetree-discuss@lists.ozlabs.org
cc: linux-pci@vger.kernel.org
cc: netdev@vger.kernel.org
cc: netfilter-devel@vger.kernel.org
cc: alsa-devel@alsa-project.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c | 7 +++++++
 fs/proc/vmcore.c  | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 75e08d36b2f1..d9631d9b7aff 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -624,3 +624,10 @@ void *proc_get_parent_data(const struct inode *inode)
 	return de->parent->data;
 }
 EXPORT_SYMBOL_GPL(proc_get_parent_data);
+
+void proc_remove(struct proc_dir_entry *de)
+{
+	if (de)
+		remove_proc_subtree(de->name, de->parent);
+}
+EXPORT_SYMBOL(proc_remove);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 38edddc25816..17f7e080d7ff 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -699,7 +699,7 @@ void vmcore_cleanup(void)
 	struct list_head *pos, *next;
 
 	if (proc_vmcore) {
-		remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
+		proc_remove(proc_vmcore);
 		proc_vmcore = NULL;
 	}
 
-- 
cgit 


From c30480b92cf497aa3b463367a82f1c2fdc5c46e9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 12 Apr 2013 18:03:36 +0100
Subject: proc: Make the PROC_I() and PDE() macros internal to procfs

Make the PROC_I() and PDE() macros internal to procfs.  This means making
PDE_DATA() out of line.  This could be made more optimal by storing
PDE()->data into inode->i_private.

Also provide a __PDE_DATA() that is inline and internal to procfs.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/generic.c      |  8 +++++++-
 fs/proc/internal.h     | 18 ++++++++++++++++++
 fs/proc/proc_devtree.c |  2 +-
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index d9631d9b7aff..a2596afffae6 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -165,7 +165,7 @@ void proc_free_inum(unsigned int inum)
 
 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	nd_set_link(nd, PDE_DATA(dentry->d_inode));
+	nd_set_link(nd, __PDE_DATA(dentry->d_inode));
 	return NULL;
 }
 
@@ -631,3 +631,9 @@ void proc_remove(struct proc_dir_entry *de)
 		remove_proc_subtree(de->name, de->parent);
 }
 EXPORT_SYMBOL(proc_remove);
+
+void *PDE_DATA(const struct inode *inode)
+{
+	return __PDE_DATA(inode);
+}
+EXPORT_SYMBOL(PDE_DATA);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c529b5f16ee4..86a24060e1b9 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -84,6 +84,24 @@ struct proc_maps_private {
 
 void proc_init_inodecache(void);
 
+/*
+ * General functions
+ */
+static inline struct proc_inode *PROC_I(const struct inode *inode)
+{
+	return container_of(inode, struct proc_inode, vfs_inode);
+}
+
+static inline struct proc_dir_entry *PDE(const struct inode *inode)
+{
+	return PROC_I(inode)->pde;
+}
+
+static inline void *__PDE_DATA(const struct inode *inode)
+{
+	return PDE(inode)->data;
+}
+
 static inline struct pid *proc_pid(struct inode *inode)
 {
 	return PROC_I(inode)->pid;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index e0043c7e7ab7..505afc950e0a 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -41,7 +41,7 @@ static int property_proc_show(struct seq_file *m, void *v)
 
 static int property_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, property_proc_show, PDE_DATA(inode));
+	return single_open(file, property_proc_show, __PDE_DATA(inode));
 }
 
 static const struct file_operations property_proc_fops = {
-- 
cgit 


From 59d8053f1e16904d54ed7469d4b36801ea6b8f2c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 11 Apr 2013 13:34:43 +0100
Subject: proc: Move non-public stuff from linux/proc_fs.h to
 fs/proc/internal.h

Move non-public declarations and definitions from linux/proc_fs.h to
fs/proc/internal.h.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/internal.h | 307 ++++++++++++++++++++++++++++++++++-------------------
 fs/proc/kcore.c    |   1 +
 2 files changed, 197 insertions(+), 111 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 86a24060e1b9..04255b6e96b7 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -1,4 +1,4 @@
-/* internal.h: internal procfs definitions
+/* Internal procfs definitions
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,81 +9,66 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/sched.h>
 #include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
 #include <linux/binfmts.h>
-struct  ctl_table_header;
-struct  mempolicy;
 
-extern struct proc_dir_entry proc_root;
-extern void proc_self_init(void);
-#ifdef CONFIG_PROC_SYSCTL
-extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *head);
-#else
-static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
-#endif
-#ifdef CONFIG_NET
-extern int proc_net_init(void);
-#else
-static inline int proc_net_init(void) { return 0; }
-#endif
+struct ctl_table_header;
+struct mempolicy;
 
-struct vmalloc_info {
-	unsigned long	used;
-	unsigned long	largest_chunk;
+/*
+ * This is not completely implemented yet. The idea is to
+ * create an in-memory tree (like the actual /proc filesystem
+ * tree) of these proc_dir_entries, so that we can dynamically
+ * add new files to /proc.
+ *
+ * The "next" pointer creates a linked list of one /proc directory,
+ * while parent/subdir create the directory structure (every
+ * /proc file has a parent, but "subdir" is NULL for all
+ * non-directory entries).
+ */
+struct proc_dir_entry {
+	unsigned int low_ino;
+	umode_t mode;
+	nlink_t nlink;
+	kuid_t uid;
+	kgid_t gid;
+	loff_t size;
+	const struct inode_operations *proc_iops;
+	const struct file_operations *proc_fops;
+	struct proc_dir_entry *next, *parent, *subdir;
+	void *data;
+	atomic_t count;		/* use count */
+	atomic_t in_use;	/* number of callers into module in progress; */
+			/* negative -> it's going away RSN */
+	struct completion *pde_unload_completion;
+	struct list_head pde_openers;	/* who did ->open, but not ->release */
+	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+	u8 namelen;
+	char name[];
 };
 
-#ifdef CONFIG_MMU
-#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
-extern void get_vmalloc_info(struct vmalloc_info *vmi);
-#else
-
-#define VMALLOC_TOTAL 0UL
-#define get_vmalloc_info(vmi)			\
-do {						\
-	(vmi)->used = 0;			\
-	(vmi)->largest_chunk = 0;		\
-} while(0)
-#endif
-
-extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
-				struct pid *pid, struct task_struct *task);
-extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
-				struct pid *pid, struct task_struct *task);
-extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
-				struct pid *pid, struct task_struct *task);
-extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
-				struct pid *pid, struct task_struct *task);
-extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
-
-extern const struct file_operations proc_tid_children_operations;
-extern const struct file_operations proc_pid_maps_operations;
-extern const struct file_operations proc_tid_maps_operations;
-extern const struct file_operations proc_pid_numa_maps_operations;
-extern const struct file_operations proc_tid_numa_maps_operations;
-extern const struct file_operations proc_pid_smaps_operations;
-extern const struct file_operations proc_tid_smaps_operations;
-extern const struct file_operations proc_clear_refs_operations;
-extern const struct file_operations proc_pagemap_operations;
-extern const struct file_operations proc_net_operations;
-extern const struct inode_operations proc_net_inode_operations;
-extern const struct inode_operations proc_pid_link_inode_operations;
+union proc_op {
+	int (*proc_get_link)(struct dentry *, struct path *);
+	int (*proc_read)(struct task_struct *task, char *page);
+	int (*proc_show)(struct seq_file *m,
+		struct pid_namespace *ns, struct pid *pid,
+		struct task_struct *task);
+};
 
-struct proc_maps_private {
+struct proc_inode {
 	struct pid *pid;
-	struct task_struct *task;
-#ifdef CONFIG_MMU
-	struct vm_area_struct *tail_vma;
-#endif
-#ifdef CONFIG_NUMA
-	struct mempolicy *task_mempolicy;
-#endif
+	int fd;
+	union proc_op op;
+	struct proc_dir_entry *pde;
+	struct ctl_table_header *sysctl;
+	struct ctl_table *sysctl_entry;
+	struct proc_ns ns;
+	struct inode vfs_inode;
 };
 
-void proc_init_inodecache(void);
-
 /*
  * General functions
  */
@@ -150,79 +135,142 @@ out:
 }
 
 /*
- * base.c
+ * Offset of the first process in the /proc root directory..
  */
-extern int pid_delete_dentry(const struct dentry *);
+#define FIRST_PROCESS_ENTRY 256
 
-struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
-		struct dentry *dentry);
-int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
-		filldir_t filldir);
+/* Worst case buffer size needed for holding an integer. */
+#define PROC_NUMBUF 13
 
-struct pde_opener {
-	struct file *file;
-	struct list_head lh;
-	int closing;
-	struct completion *c;
-};
+/*
+ * array.c
+ */
+extern const struct file_operations proc_tid_children_operations;
+
+extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
+			 struct pid *, struct task_struct *);
+extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
+			  struct pid *, struct task_struct *);
+extern int proc_pid_status(struct seq_file *, struct pid_namespace *,
+			   struct pid *, struct task_struct *);
+extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
+			  struct pid *, struct task_struct *);
+
+/*
+ * base.c
+ */
+extern const struct dentry_operations pid_dentry_operations;
+extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int proc_setattr(struct dentry *, struct iattr *);
+extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *);
+extern int pid_revalidate(struct dentry *, unsigned int);
+extern int pid_delete_dentry(const struct dentry *);
+extern int proc_pid_readdir(struct file *, void *, filldir_t);
+extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
+extern loff_t mem_lseek(struct file *, loff_t, int);
 
-void proc_entry_rundown(struct proc_dir_entry *);
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+				     struct task_struct *, const void *);
+extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int,
+			   instantiate_t, struct task_struct *, const void *);
 
+/*
+ * generic.c
+ */
 extern spinlock_t proc_subdir_lock;
 
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
-unsigned long task_vsize(struct mm_struct *);
-unsigned long task_statm(struct mm_struct *,
-	unsigned long *, unsigned long *, unsigned long *, unsigned long *);
-void task_mem(struct seq_file *, struct mm_struct *);
+extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
+extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
+				     struct dentry *);
+extern int proc_readdir(struct file *, void *, filldir_t);
+extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t);
 
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
 	atomic_inc(&pde->count);
 	return pde;
 }
-void pde_put(struct proc_dir_entry *pde);
+extern void pde_put(struct proc_dir_entry *);
+
+/*
+ * inode.c
+ */
+struct pde_opener {
+	struct file *file;
+	struct list_head lh;
+	int closing;
+	struct completion *c;
+};
 
-int proc_fill_super(struct super_block *);
-struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-int proc_remount(struct super_block *sb, int *flags, char *data);
+extern const struct inode_operations proc_pid_link_inode_operations;
+
+extern void proc_init_inodecache(void);
+extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
+extern int proc_fill_super(struct super_block *);
+extern void proc_entry_rundown(struct proc_dir_entry *);
 
 /*
- * These are generic /proc routines that use the internal
- * "struct proc_dir_entry" tree to traverse the filesystem.
- *
- * The /proc root directory has extended versions to take care
- * of the /proc/<pid> subdirectories.
+ * mmu.c
  */
-int proc_readdir(struct file *, void *, filldir_t);
-struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
+struct vmalloc_info {
+	unsigned long	used;
+	unsigned long	largest_chunk;
+};
 
+#ifdef CONFIG_MMU
+#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
+extern void get_vmalloc_info(struct vmalloc_info *);
 
+#else
+#define VMALLOC_TOTAL 0UL
+static inline void get_vmalloc_info(struct vmalloc_info *vmi)
+{
+	vmi->used = 0;
+	vmi->largest_chunk = 0;
+}
+#endif
 
-/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
-				struct task_struct *, const void *);
-int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-	const char *name, int len,
-	instantiate_t instantiate, struct task_struct *task, const void *ptr);
-int pid_revalidate(struct dentry *dentry, unsigned int flags);
-struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
-extern const struct dentry_operations pid_dentry_operations;
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-int proc_setattr(struct dentry *dentry, struct iattr *attr);
+/*
+ * proc_devtree.c
+ */
+#ifdef CONFIG_PROC_DEVICETREE
+extern void proc_device_tree_init(void);
+#endif
 
+/*
+ * proc_namespaces.c
+ */
 extern const struct inode_operations proc_ns_dir_inode_operations;
 extern const struct file_operations proc_ns_dir_operations;
 
+/*
+ * proc_net.c
+ */
+extern const struct file_operations proc_net_operations;
+extern const struct inode_operations proc_net_inode_operations;
+
+#ifdef CONFIG_NET
+extern int proc_net_init(void);
+#else
+static inline int proc_net_init(void) { return 0; }
+#endif
+
+/*
+ * proc_self.c
+ */
 extern int proc_setup_self(struct super_block *);
 
 /*
- * proc_devtree.c
+ * proc_sysctl.c
  */
-#ifdef CONFIG_PROC_DEVICETREE
-extern void proc_device_tree_init(void);
-#endif /* CONFIG_PROC_DEVICETREE */
+#ifdef CONFIG_PROC_SYSCTL
+extern int proc_sys_init(void);
+extern void sysctl_head_put(struct ctl_table_header *);
+#else
+static inline void proc_sys_init(void) { }
+static inline void sysctl_head_put(struct ctl_table_header *head) { }
+#endif
 
 /*
  * proc_tty.c
@@ -232,3 +280,40 @@ extern void proc_tty_init(void);
 #else
 static inline void proc_tty_init(void) {}
 #endif
+
+/*
+ * root.c
+ */
+extern struct proc_dir_entry proc_root;
+
+extern void proc_self_init(void);
+extern int proc_remount(struct super_block *, int *, char *);
+
+/*
+ * task_[no]mmu.c
+ */
+struct proc_maps_private {
+	struct pid *pid;
+	struct task_struct *task;
+#ifdef CONFIG_MMU
+	struct vm_area_struct *tail_vma;
+#endif
+#ifdef CONFIG_NUMA
+	struct mempolicy *task_mempolicy;
+#endif
+};
+
+extern const struct file_operations proc_pid_maps_operations;
+extern const struct file_operations proc_tid_maps_operations;
+extern const struct file_operations proc_pid_numa_maps_operations;
+extern const struct file_operations proc_tid_numa_maps_operations;
+extern const struct file_operations proc_pid_smaps_operations;
+extern const struct file_operations proc_tid_smaps_operations;
+extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
+
+extern unsigned long task_vsize(struct mm_struct *);
+extern unsigned long task_statm(struct mm_struct *,
+				unsigned long *, unsigned long *,
+				unsigned long *, unsigned long *);
+extern void task_mem(struct seq_file *, struct mm_struct *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8e6ce830de44..13cf87c4686f 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -28,6 +28,7 @@
 #include <linux/ioport.h>
 #include <linux/memory.h>
 #include <asm/sections.h>
+#include "internal.h"
 
 #define CORE_STR "CORE"
 
-- 
cgit 


From ac3e3c5b1164397656df81b9e9ab4991184d3236 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 28 Apr 2013 21:42:33 -0400
Subject: don't bother with deferred freeing of fdtables

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 68 ++-------------------------------------------------------------
 1 file changed, 2 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 3906d9577a18..4a78f981557a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -23,24 +23,10 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
-struct fdtable_defer {
-	spinlock_t lock;
-	struct work_struct wq;
-	struct fdtable *next;
-};
-
 int sysctl_nr_open __read_mostly = 1024*1024;
 int sysctl_nr_open_min = BITS_PER_LONG;
 int sysctl_nr_open_max = 1024 * 1024; /* raised later */
 
-/*
- * We use this list to defer free fdtables that have vmalloced
- * sets/arrays. By keeping a per-cpu list, we avoid having to embed
- * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
- * this per-task structure.
- */
-static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
-
 static void *alloc_fdmem(size_t size)
 {
 	/*
@@ -67,46 +53,9 @@ static void __free_fdtable(struct fdtable *fdt)
 	kfree(fdt);
 }
 
-static void free_fdtable_work(struct work_struct *work)
-{
-	struct fdtable_defer *f =
-		container_of(work, struct fdtable_defer, wq);
-	struct fdtable *fdt;
-
-	spin_lock_bh(&f->lock);
-	fdt = f->next;
-	f->next = NULL;
-	spin_unlock_bh(&f->lock);
-	while(fdt) {
-		struct fdtable *next = fdt->next;
-
-		__free_fdtable(fdt);
-		fdt = next;
-	}
-}
-
 static void free_fdtable_rcu(struct rcu_head *rcu)
 {
-	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
-	struct fdtable_defer *fddef;
-
-	BUG_ON(!fdt);
-	BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
-
-	if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
-		kfree(fdt->fd);
-		kfree(fdt->open_fds);
-		kfree(fdt);
-	} else {
-		fddef = &get_cpu_var(fdtable_defer_list);
-		spin_lock(&fddef->lock);
-		fdt->next = fddef->next;
-		fddef->next = fdt;
-		/* vmallocs are handled from the workqueue context */
-		schedule_work(&fddef->wq);
-		spin_unlock(&fddef->lock);
-		put_cpu_var(fdtable_defer_list);
-	}
+	__free_fdtable(container_of(rcu, struct fdtable, rcu));
 }
 
 /*
@@ -174,7 +123,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
 	fdt->open_fds = data;
 	data += nr / BITS_PER_BYTE;
 	fdt->close_on_exec = data;
-	fdt->next = NULL;
 
 	return fdt;
 
@@ -221,7 +169,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
 		/* Continue as planned */
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
-		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
+		if (cur_fdt != &files->fdtab)
 			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
@@ -316,7 +264,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	new_fdt->close_on_exec = newf->close_on_exec_init;
 	new_fdt->open_fds = newf->open_fds_init;
 	new_fdt->fd = &newf->fd_array[0];
-	new_fdt->next = NULL;
 
 	spin_lock(&oldf->file_lock);
 	old_fdt = files_fdtable(oldf);
@@ -490,19 +437,8 @@ void exit_files(struct task_struct *tsk)
 	}
 }
 
-static void fdtable_defer_list_init(int cpu)
-{
-	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
-	spin_lock_init(&fddef->lock);
-	INIT_WORK(&fddef->wq, free_fdtable_work);
-	fddef->next = NULL;
-}
-
 void __init files_defer_init(void)
 {
-	int i;
-	for_each_possible_cpu(i)
-		fdtable_defer_list_init(i);
 	sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
 			     -BITS_PER_LONG;
 }
-- 
cgit