From 81abe27b10af98f861c955be63da700938dd59c1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Aug 2012 09:38:08 -0700
Subject: userns:  Fix link restrictions to use uid_eq

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namei.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 1b464390dde8..05480a64d7b7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -678,7 +678,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
 
 	/* Allowed if owner and follower match. */
 	inode = link->dentry->d_inode;
-	if (current_cred()->fsuid == inode->i_uid)
+	if (uid_eq(current_cred()->fsuid, inode->i_uid))
 		return 0;
 
 	/* Allowed if parent directory not sticky and world-writable. */
@@ -687,7 +687,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
 		return 0;
 
 	/* Allowed if parent directory and link owner match. */
-	if (parent->i_uid == inode->i_uid)
+	if (uid_eq(parent->i_uid, inode->i_uid))
 		return 0;
 
 	path_put_conditional(link, nd);
@@ -757,7 +757,7 @@ static int may_linkat(struct path *link)
 	/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
 	 * otherwise, it must be a safe source.
 	 */
-	if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) ||
+	if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) ||
 	    capable(CAP_FOWNER))
 		return 0;
 
-- 
cgit 


From 36b71a8bfbc92e1ba164e9aec840c0180ee933b5 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 26 Jul 2012 12:44:30 -0500
Subject: dlm: fix deadlock between dlm_send and dlm_controld

A deadlock sometimes occurs between dlm_controld closing
a lowcomms connection through configfs and dlm_send looking
up the address for a new connection in configfs.

dlm_controld does a configfs rmdir which calls
dlm_lowcomms_close which waits for dlm_send to
cancel work on the workqueues.

The dlm_send workqueue thread has called
tcp_connect_to_sock which calls dlm_nodeid_to_addr
which does a configfs lookup and blocks on a lock
held by dlm_controld in the rmdir path.

The solution here is to save the node addresses within
the lowcomms code so that the lowcomms workqueue does
not need to step through configfs to get a node address.

dlm_controld:
wait_for_completion+0x1d/0x20
__cancel_work_timer+0x1b3/0x1e0
cancel_work_sync+0x10/0x20
dlm_lowcomms_close+0x4c/0xb0 [dlm]
drop_comm+0x22/0x60 [dlm]
client_drop_item+0x26/0x50 [configfs]
configfs_rmdir+0x180/0x230 [configfs]
vfs_rmdir+0xbd/0xf0
do_rmdir+0x103/0x120
sys_rmdir+0x16/0x20

dlm_send:
mutex_lock+0x2b/0x50
get_comm+0x34/0x140 [dlm]
dlm_nodeid_to_addr+0x18/0xd0 [dlm]
tcp_connect_to_sock+0xf4/0x2d0 [dlm]
process_send_sockets+0x1d2/0x260 [dlm]
worker_thread+0x170/0x2a0

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c   |  79 ++++-----------------
 fs/dlm/config.h   |   2 -
 fs/dlm/lowcomms.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++-------
 fs/dlm/lowcomms.h |   2 +
 fs/dlm/main.c     |   2 +
 5 files changed, 200 insertions(+), 90 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 9ccf7346834a..a0387dd8b1f0 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
 static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
 {
 	struct sockaddr_storage *addr;
+	int rv;
 
 	if (len != sizeof(struct sockaddr_storage))
 		return -EINVAL;
@@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
 		return -ENOMEM;
 
 	memcpy(addr, buf, len);
+
+	rv = dlm_lowcomms_addr(cm->nodeid, addr, len);
+	if (rv) {
+		kfree(addr);
+		return rv;
+	}
+
 	cm->addr[cm->addr_count++] = addr;
 	return len;
 }
@@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp)
 	config_item_put(&sp->group.cg_item);
 }
 
-static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
-{
-	switch (x->ss_family) {
-	case AF_INET: {
-		struct sockaddr_in *sinx = (struct sockaddr_in *)x;
-		struct sockaddr_in *siny = (struct sockaddr_in *)y;
-		if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
-			return 0;
-		if (sinx->sin_port != siny->sin_port)
-			return 0;
-		break;
-	}
-	case AF_INET6: {
-		struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
-		struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
-		if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
-			return 0;
-		if (sinx->sin6_port != siny->sin6_port)
-			return 0;
-		break;
-	}
-	default:
-		return 0;
-	}
-	return 1;
-}
-
-static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
+static struct dlm_comm *get_comm(int nodeid)
 {
 	struct config_item *i;
 	struct dlm_comm *cm = NULL;
@@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
 	list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
 		cm = config_item_to_comm(i);
 
-		if (nodeid) {
-			if (cm->nodeid != nodeid)
-				continue;
-			found = 1;
-			config_item_get(i);
-			break;
-		} else {
-			if (!cm->addr_count || !addr_compare(cm->addr[0], addr))
-				continue;
-			found = 1;
-			config_item_get(i);
-			break;
-		}
+		if (cm->nodeid != nodeid)
+			continue;
+		found = 1;
+		config_item_get(i);
+		break;
 	}
 	mutex_unlock(&clusters_root.subsys.su_mutex);
 
@@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
 
 int dlm_comm_seq(int nodeid, uint32_t *seq)
 {
-	struct dlm_comm *cm = get_comm(nodeid, NULL);
+	struct dlm_comm *cm = get_comm(nodeid);
 	if (!cm)
 		return -EEXIST;
 	*seq = cm->seq;
@@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
 	return 0;
 }
 
-int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
-{
-	struct dlm_comm *cm = get_comm(nodeid, NULL);
-	if (!cm)
-		return -EEXIST;
-	if (!cm->addr_count)
-		return -ENOENT;
-	memcpy(addr, cm->addr[0], sizeof(*addr));
-	put_comm(cm);
-	return 0;
-}
-
-int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
-{
-	struct dlm_comm *cm = get_comm(0, addr);
-	if (!cm)
-		return -EEXIST;
-	*nodeid = cm->nodeid;
-	put_comm(cm);
-	return 0;
-}
-
 int dlm_our_nodeid(void)
 {
 	return local_comm ? local_comm->nodeid : 0;
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index dbd35a08f3a5..f30697bc2780 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -46,8 +46,6 @@ void dlm_config_exit(void);
 int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
 		     int *count_out);
 int dlm_comm_seq(int nodeid, uint32_t *seq);
-int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
-int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
 int dlm_our_nodeid(void);
 int dlm_our_addr(struct sockaddr_storage *addr, int num);
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5c1b0e38c7a4..522a69fccd84 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -140,6 +140,16 @@ struct writequeue_entry {
 	struct connection *con;
 };
 
+struct dlm_node_addr {
+	struct list_head list;
+	int nodeid;
+	int addr_count;
+	struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
+};
+
+static LIST_HEAD(dlm_node_addrs);
+static DEFINE_SPINLOCK(dlm_node_addrs_spin);
+
 static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
 static int dlm_local_count;
 static int dlm_allow_conn;
@@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id)
 	return NULL;
 }
 
-static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
+static struct dlm_node_addr *find_node_addr(int nodeid)
+{
+	struct dlm_node_addr *na;
+
+	list_for_each_entry(na, &dlm_node_addrs, list) {
+		if (na->nodeid == nodeid)
+			return na;
+	}
+	return NULL;
+}
+
+static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
 {
-	struct sockaddr_storage addr;
-	int error;
+	switch (x->ss_family) {
+	case AF_INET: {
+		struct sockaddr_in *sinx = (struct sockaddr_in *)x;
+		struct sockaddr_in *siny = (struct sockaddr_in *)y;
+		if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
+			return 0;
+		if (sinx->sin_port != siny->sin_port)
+			return 0;
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
+		struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
+		if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
+			return 0;
+		if (sinx->sin6_port != siny->sin6_port)
+			return 0;
+		break;
+	}
+	default:
+		return 0;
+	}
+	return 1;
+}
+
+static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
+			  struct sockaddr *sa_out)
+{
+	struct sockaddr_storage sas;
+	struct dlm_node_addr *na;
 
 	if (!dlm_local_count)
 		return -1;
 
-	error = dlm_nodeid_to_addr(nodeid, &addr);
-	if (error)
-		return error;
+	spin_lock(&dlm_node_addrs_spin);
+	na = find_node_addr(nodeid);
+	if (na && na->addr_count)
+		memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage));
+	spin_unlock(&dlm_node_addrs_spin);
+
+	if (!na)
+		return -EEXIST;
+
+	if (!na->addr_count)
+		return -ENOENT;
+
+	if (sas_out)
+		memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
+
+	if (!sa_out)
+		return 0;
 
 	if (dlm_local_addr[0]->ss_family == AF_INET) {
-		struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
-		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
+		struct sockaddr_in *in4  = (struct sockaddr_in *) &sas;
+		struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
 		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
 	} else {
-		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
-		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
+		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &sas;
+		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
 		ret6->sin6_addr = in6->sin6_addr;
 	}
 
 	return 0;
 }
 
+static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
+{
+	struct dlm_node_addr *na;
+	int rv = -EEXIST;
+
+	spin_lock(&dlm_node_addrs_spin);
+	list_for_each_entry(na, &dlm_node_addrs, list) {
+		if (!na->addr_count)
+			continue;
+
+		if (!addr_compare(na->addr[0], addr))
+			continue;
+
+		*nodeid = na->nodeid;
+		rv = 0;
+		break;
+	}
+	spin_unlock(&dlm_node_addrs_spin);
+	return rv;
+}
+
+int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
+{
+	struct sockaddr_storage *new_addr;
+	struct dlm_node_addr *new_node, *na;
+
+	new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
+	if (!new_node)
+		return -ENOMEM;
+
+	new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
+	if (!new_addr) {
+		kfree(new_node);
+		return -ENOMEM;
+	}
+
+	memcpy(new_addr, addr, len);
+
+	spin_lock(&dlm_node_addrs_spin);
+	na = find_node_addr(nodeid);
+	if (!na) {
+		new_node->nodeid = nodeid;
+		new_node->addr[0] = new_addr;
+		new_node->addr_count = 1;
+		list_add(&new_node->list, &dlm_node_addrs);
+		spin_unlock(&dlm_node_addrs_spin);
+		return 0;
+	}
+
+	if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
+		spin_unlock(&dlm_node_addrs_spin);
+		kfree(new_addr);
+		kfree(new_node);
+		return -ENOSPC;
+	}
+
+	na->addr[na->addr_count++] = new_addr;
+	spin_unlock(&dlm_node_addrs_spin);
+	kfree(new_node);
+	return 0;
+}
+
 /* Data available on socket or listen socket received a connect */
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
@@ -510,7 +635,7 @@ static void process_sctp_notification(struct connection *con,
 				return;
 			}
 			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
-			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
+			if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
 				unsigned char *b=(unsigned char *)&prim.ssp_addr;
 				log_print("reject connect from unknown addr");
 				print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
@@ -747,7 +872,7 @@ static int tcp_accept_from_sock(struct connection *con)
 
 	/* Get the new node's NODEID */
 	make_sockaddr(&peeraddr, 0, &len);
-	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+	if (addr_to_nodeid(&peeraddr, &nodeid)) {
 		unsigned char *b=(unsigned char *)&peeraddr;
 		log_print("connect from non cluster node");
 		print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
@@ -862,7 +987,7 @@ static void sctp_init_assoc(struct connection *con)
 	if (con->retries++ > MAX_CONNECT_RETRIES)
 		return;
 
-	if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) {
+	if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) {
 		log_print("no address for nodeid %d", con->nodeid);
 		return;
 	}
@@ -928,11 +1053,11 @@ static void sctp_init_assoc(struct connection *con)
 /* Connect a new socket to its peer */
 static void tcp_connect_to_sock(struct connection *con)
 {
-	int result = -EHOSTUNREACH;
 	struct sockaddr_storage saddr, src_addr;
 	int addr_len;
 	struct socket *sock = NULL;
 	int one = 1;
+	int result;
 
 	if (con->nodeid == 0) {
 		log_print("attempt to connect sock 0 foiled");
@@ -944,10 +1069,8 @@ static void tcp_connect_to_sock(struct connection *con)
 		goto out;
 
 	/* Some odd races can cause double-connects, ignore them */
-	if (con->sock) {
-		result = 0;
+	if (con->sock)
 		goto out;
-	}
 
 	/* Create a socket to communicate with */
 	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
@@ -956,8 +1079,11 @@ static void tcp_connect_to_sock(struct connection *con)
 		goto out_err;
 
 	memset(&saddr, 0, sizeof(saddr));
-	if (dlm_nodeid_to_addr(con->nodeid, &saddr))
+	result = nodeid_to_addr(con->nodeid, &saddr, NULL);
+	if (result < 0) {
+		log_print("no address for nodeid %d", con->nodeid);
 		goto out_err;
+	}
 
 	sock->sk->sk_user_data = con;
 	con->rx_action = receive_from_sock;
@@ -983,8 +1109,7 @@ static void tcp_connect_to_sock(struct connection *con)
 	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
 			  sizeof(one));
 
-	result =
-		sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
+	result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
 				   O_NONBLOCK);
 	if (result == -EINPROGRESS)
 		result = 0;
@@ -1002,11 +1127,17 @@ out_err:
 	 * Some errors are fatal and this list might need adjusting. For other
 	 * errors we try again until the max number of retries is reached.
 	 */
-	if (result != -EHOSTUNREACH && result != -ENETUNREACH &&
-	    result != -ENETDOWN && result != -EINVAL
-	    && result != -EPROTONOSUPPORT) {
+	if (result != -EHOSTUNREACH &&
+	    result != -ENETUNREACH &&
+	    result != -ENETDOWN && 
+	    result != -EINVAL &&
+	    result != -EPROTONOSUPPORT) {
+		log_print("connect %d try %d error %d", con->nodeid,
+			  con->retries, result);
+		mutex_unlock(&con->sock_mutex);
+		msleep(1000);
 		lowcomms_connect_sock(con);
-		result = 0;
+		return;
 	}
 out:
 	mutex_unlock(&con->sock_mutex);
@@ -1414,6 +1545,7 @@ static void clean_one_writequeue(struct connection *con)
 int dlm_lowcomms_close(int nodeid)
 {
 	struct connection *con;
+	struct dlm_node_addr *na;
 
 	log_print("closing connection to node %d", nodeid);
 	con = nodeid2con(nodeid, 0);
@@ -1428,6 +1560,17 @@ int dlm_lowcomms_close(int nodeid)
 		clean_one_writequeue(con);
 		close_connection(con, true);
 	}
+
+	spin_lock(&dlm_node_addrs_spin);
+	na = find_node_addr(nodeid);
+	if (na) {
+		list_del(&na->list);
+		while (na->addr_count--)
+			kfree(na->addr[na->addr_count]);
+		kfree(na);
+	}
+	spin_unlock(&dlm_node_addrs_spin);
+
 	return 0;
 }
 
@@ -1577,3 +1720,17 @@ fail_destroy:
 fail:
 	return error;
 }
+
+void dlm_lowcomms_exit(void)
+{
+	struct dlm_node_addr *na, *safe;
+
+	spin_lock(&dlm_node_addrs_spin);
+	list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
+		list_del(&na->list);
+		while (na->addr_count--)
+			kfree(na->addr[na->addr_count]);
+		kfree(na);
+	}
+	spin_unlock(&dlm_node_addrs_spin);
+}
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 1311e6426287..67462e54fc2f 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -16,10 +16,12 @@
 
 int dlm_lowcomms_start(void);
 void dlm_lowcomms_stop(void);
+void dlm_lowcomms_exit(void);
 int dlm_lowcomms_close(int nodeid);
 void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
 void dlm_lowcomms_commit_buffer(void *mh);
 int dlm_lowcomms_connect_node(int nodeid);
+int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 5a59efa0bb46..079c0bd71ab7 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -17,6 +17,7 @@
 #include "user.h"
 #include "memory.h"
 #include "config.h"
+#include "lowcomms.h"
 
 static int __init init_dlm(void)
 {
@@ -78,6 +79,7 @@ static void __exit exit_dlm(void)
 	dlm_config_exit();
 	dlm_memory_exit();
 	dlm_lockspace_exit();
+	dlm_lowcomms_exit();
 	dlm_unregister_debugfs();
 }
 
-- 
cgit 


From 6ad2291624824c1de19dbbbbb6d4f9f601b60781 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 2 Aug 2012 11:00:05 -0500
Subject: dlm: fix uninitialized spinlock

Use DEFINE_SPINLOCK for global dlm_cb_seq_spin.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/ast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 63dc19c54d5a..27a6ba9aaeec 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -15,8 +15,8 @@
 #include "lock.h"
 #include "user.h"
 
-static uint64_t			dlm_cb_seq;
-static spinlock_t		dlm_cb_seq_spin;
+static uint64_t dlm_cb_seq;
+static DEFINE_SPINLOCK(dlm_cb_seq_spin);
 
 static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
 {
-- 
cgit 


From 475f230c6072fb2186f48b23943afcd0ee3a8343 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 2 Aug 2012 11:08:21 -0500
Subject: dlm: fix unlock balance warnings

The in_recovery rw_semaphore has always been acquired and
released by different threads by design.  To work around
the "BUG: bad unlock balance detected!" messages, adjust
things so the dlm_recoverd thread always does both down_write
and up_write.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h | 46 ++++++++++++++++++++++++++++++++++++----------
 fs/dlm/lockspace.c    | 15 ++++++++++++---
 fs/dlm/member.c       | 17 +++++++++++------
 fs/dlm/rcom.c         |  2 +-
 fs/dlm/recoverd.c     | 27 ++++++++++++++++++---------
 fs/dlm/recoverd.h     |  1 -
 6 files changed, 78 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 9d3e485f88c8..871c1abf6029 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -604,6 +604,7 @@ struct dlm_ls {
 	struct idr		ls_recover_idr;
 	spinlock_t		ls_recover_idr_lock;
 	wait_queue_head_t	ls_wait_general;
+	wait_queue_head_t	ls_recover_lock_wait;
 	struct mutex		ls_clear_proc_locks;
 
 	struct list_head	ls_root_list;	/* root resources */
@@ -616,15 +617,40 @@ struct dlm_ls {
 	char			ls_name[1];
 };
 
-#define LSFL_WORK		0
-#define LSFL_RUNNING		1
-#define LSFL_RECOVERY_STOP	2
-#define LSFL_RCOM_READY		3
-#define LSFL_RCOM_WAIT		4
-#define LSFL_UEVENT_WAIT	5
-#define LSFL_TIMEWARN		6
-#define LSFL_CB_DELAY		7
-#define LSFL_NODIR		8
+/*
+ * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
+ * that they should abort what they're doing so new recovery can be started.
+ *
+ * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
+ * should do down_write() on the in_recovery rw_semaphore. (doing down_write
+ * within dlm_ls_stop causes complaints about the lock acquired/released
+ * in different contexts.)
+ *
+ * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
+ * It sets this after it is done with down_write() on the in_recovery
+ * rw_semaphore and clears it after it has released the rw_semaphore.
+ *
+ * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
+ * should begin recovery of the lockspace.
+ *
+ * LSFL_RUNNING - set when normal locking activity is enabled.
+ * dlm_ls_stop() clears this to tell dlm locking routines that they should
+ * quit what they are doing so recovery can run.  dlm_recoverd sets
+ * this after recovery is finished.
+ */
+
+#define LSFL_RECOVER_STOP	0
+#define LSFL_RECOVER_DOWN	1
+#define LSFL_RECOVER_LOCK	2
+#define LSFL_RECOVER_WORK	3
+#define LSFL_RUNNING		4
+
+#define LSFL_RCOM_READY		5
+#define LSFL_RCOM_WAIT		6
+#define LSFL_UEVENT_WAIT	7
+#define LSFL_TIMEWARN		8
+#define LSFL_CB_DELAY		9
+#define LSFL_NODIR		10
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)
 
 static inline int dlm_recovery_stopped(struct dlm_ls *ls)
 {
-	return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
 }
 
 static inline int dlm_no_directory(struct dlm_ls *ls)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 952557d00ccd..2e99fb0c9737 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
 	INIT_LIST_HEAD(&ls->ls_root_list);
 	init_rwsem(&ls->ls_root_sem);
 
-	down_write(&ls->ls_in_recovery);
-
 	spin_lock(&lslist_lock);
 	ls->ls_create_count = 1;
 	list_add(&ls->ls_list, &lslist);
@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
 		}
 	}
 
-	/* needs to find ls in lslist */
+	init_waitqueue_head(&ls->ls_recover_lock_wait);
+
+	/*
+	 * Once started, dlm_recoverd first looks for ls in lslist, then
+	 * initializes ls_in_recovery as locked in "down" mode.  We need
+	 * to wait for the wakeup from dlm_recoverd because in_recovery
+	 * has to start out in down mode.
+	 */
+
 	error = dlm_recoverd_start(ls);
 	if (error) {
 		log_error(ls, "can't start dlm_recoverd %d", error);
 		goto out_callback;
 	}
 
+	wait_event(ls->ls_recover_lock_wait,
+		   test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
+
 	ls->ls_kobj.kset = dlm_kset;
 	error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
 				     "%s", ls->ls_name);
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 862640a36d5c..476557b54921 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
 	down_write(&ls->ls_recv_active);
 
 	/*
-	 * Abort any recovery that's in progress (see RECOVERY_STOP,
+	 * Abort any recovery that's in progress (see RECOVER_STOP,
 	 * dlm_recovery_stopped()) and tell any other threads running in the
 	 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
 	 */
 
 	spin_lock(&ls->ls_recover_lock);
-	set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
 	new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
 	ls->ls_recover_seq++;
 	spin_unlock(&ls->ls_recover_lock);
@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
 	 *    when recovery is complete.
 	 */
 
-	if (new)
-		down_write(&ls->ls_in_recovery);
+	if (new) {
+		set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
+		wake_up_process(ls->ls_recoverd_task);
+		wait_event(ls->ls_recover_lock_wait,
+			   test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
+	}
 
 	/*
 	 * The recoverd suspend/resume makes sure that dlm_recoverd (if
-	 * running) has noticed RECOVERY_STOP above and quit processing the
+	 * running) has noticed RECOVER_STOP above and quit processing the
 	 * previous recovery.
 	 */
 
@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
 		kfree(rv_old);
 	}
 
-	dlm_recoverd_kick(ls);
+	set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
+	wake_up_process(ls->ls_recoverd_task);
 	return 0;
 
  fail:
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 87f1a56eab32..9d61947d473a 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 
 	spin_lock(&ls->ls_recover_lock);
 	status = ls->ls_recover_status;
-	stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
 	seq = ls->ls_recover_seq;
 	spin_unlock(&ls->ls_recover_lock);
 
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 88ce65ff021e..32f9f8926ec3 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
 		set_bit(LSFL_RUNNING, &ls->ls_flags);
 		/* unblocks processes waiting to enter the dlm */
 		up_write(&ls->ls_in_recovery);
+		clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
 		error = 0;
 	}
 	spin_unlock(&ls->ls_recover_lock);
@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
 	rv = ls->ls_recover_args;
 	ls->ls_recover_args = NULL;
 	if (rv && ls->ls_recover_seq == rv->seq)
-		clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+		clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
 	spin_unlock(&ls->ls_recover_lock);
 
 	if (rv) {
@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
 		return -1;
 	}
 
+	down_write(&ls->ls_in_recovery);
+	set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
+	wake_up(&ls->ls_recover_lock_wait);
+
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!test_bit(LSFL_WORK, &ls->ls_flags))
+		if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
+		    !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
 			schedule();
 		set_current_state(TASK_RUNNING);
 
-		if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
+		if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
+			down_write(&ls->ls_in_recovery);
+			set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
+			wake_up(&ls->ls_recover_lock_wait);
+		}
+
+		if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
 			do_ls_recovery(ls);
 	}
 
+	if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
+		up_write(&ls->ls_in_recovery);
+
 	dlm_put_lockspace(ls);
 	return 0;
 }
 
-void dlm_recoverd_kick(struct dlm_ls *ls)
-{
-	set_bit(LSFL_WORK, &ls->ls_flags);
-	wake_up_process(ls->ls_recoverd_task);
-}
-
 int dlm_recoverd_start(struct dlm_ls *ls)
 {
 	struct task_struct *p;
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h
index 866657c5d69d..8856079733fa 100644
--- a/fs/dlm/recoverd.h
+++ b/fs/dlm/recoverd.h
@@ -14,7 +14,6 @@
 #ifndef __RECOVERD_DOT_H__
 #define __RECOVERD_DOT_H__
 
-void dlm_recoverd_kick(struct dlm_ls *ls);
 void dlm_recoverd_stop(struct dlm_ls *ls);
 int dlm_recoverd_start(struct dlm_ls *ls);
 void dlm_recoverd_suspend(struct dlm_ls *ls);
-- 
cgit 


From b4c798cf695dc7cee9798a686128461ad0070115 Mon Sep 17 00:00:00 2001
From: Xue Ying <ying.xue@windriver.com>
Date: Fri, 10 Aug 2012 10:58:37 +0800
Subject: dlm: remove redundant variable assignments

Once the tcp_create_listen_sock() is returned successfully, we
will invoke add_sock() immediately. In add_sock(), the 'con'
variable is assigned to 'sk_user_data', meanwhile, the 'sock' is
also set to 'con->sock'. So it's unnecessary to do the same thing
in tcp_create_listen_sock().

Signed-off-by: Xue Ying <ying.xue@windriver.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 522a69fccd84..3e6aaccce951 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1175,10 +1175,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
 	if (result < 0) {
 		log_print("Failed to set SO_REUSEADDR on socket: %d", result);
 	}
-	sock->sk->sk_user_data = con;
 	con->rx_action = tcp_accept_from_sock;
 	con->connect_action = tcp_connect_to_sock;
-	con->sock = sock;
 
 	/* Bind to our port */
 	make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
-- 
cgit 


From 4dd40f0cd99a3500c6df80eb8f537678559c761e Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 10 Aug 2012 14:58:42 +0800
Subject: dlm: convert add_sock routine return value type to void

Since add_sock() always returns a success code - 0, its return
value type should be changed from integer to void.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3e6aaccce951..3637f3f18824 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -473,7 +473,7 @@ int dlm_lowcomms_connect_node(int nodeid)
 }
 
 /* Make a socket active */
-static int add_sock(struct socket *sock, struct connection *con)
+static void add_sock(struct socket *sock, struct connection *con)
 {
 	con->sock = sock;
 
@@ -483,7 +483,6 @@ static int add_sock(struct socket *sock, struct connection *con)
 	con->sock->sk->sk_state_change = lowcomms_state_change;
 	con->sock->sk->sk_user_data = con;
 	con->sock->sk->sk_allocation = GFP_NOFS;
-	return 0;
 }
 
 /* Add the port number to an IPv6 or 4 sockaddr and return the address
-- 
cgit 


From 9c5bef5849c9fde1a37ac005299f759440cbaf4c Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Mon, 13 Aug 2012 14:29:55 +0800
Subject: dlm: cleanup send_to_sock routine

Remove unnecessary code form send_to_sock routine.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3637f3f18824..331ea4f94efd 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1486,8 +1486,7 @@ static void send_to_sock(struct connection *con)
 				}
 				cond_resched();
 				goto out;
-			}
-			if (ret <= 0)
+			} else if (ret < 0)
 				goto send_error;
 		}
 
@@ -1504,7 +1503,6 @@ static void send_to_sock(struct connection *con)
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
 			free_entry(e);
-			continue;
 		}
 	}
 	spin_unlock(&con->writequeue_lock);
@@ -1522,7 +1520,6 @@ out_connect:
 	mutex_unlock(&con->sock_mutex);
 	if (!test_bit(CF_INIT_PENDING, &con->flags))
 		lowcomms_connect_sock(con);
-	return;
 }
 
 static void clean_one_writequeue(struct connection *con)
-- 
cgit 


From 41f63c5359d14ca995172b8f6eaffd93f60fec54 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:47 -0700
Subject: workqueue: use mod_delayed_work() instead of cancel + queue

Convert delayed_work users doing cancel_delayed_work() followed by
queue_delayed_work() to mod_delayed_work().

Most conversions are straight-forward.  Ones worth mentioning are,

* drivers/edac/edac_mc.c: edac_mc_workq_setup() converted to always
  use mod_delayed_work() and cancel loop in
  edac_mc_reset_delay_period() is dropped.

* drivers/platform/x86/thinkpad_acpi.c: No need to remember whether
  watchdog is active or not.  @fan_watchdog_active and related code
  dropped.

* drivers/power/charger-manager.c: Seemingly a lot of
  delayed_work_pending() abuse going on here.
  [delayed_]work_pending() are unsynchronized and racy when used like
  this.  I converted one instance in fullbatt_handler().  Please
  conver the rest so that it invokes workqueue APIs for the intended
  target state rather than trying to game work item pending state
  transitions.  e.g. if timer should be modified - call
  mod_delayed_work(), canceled - call cancel_delayed_work[_sync]().

* drivers/thermal/thermal_sys.c: thermal_zone_device_set_polling()
  simplified.  Note that round_jiffies() calls in this function are
  meaningless.  round_jiffies() work on absolute jiffies not delta
  delay used by delayed_work.

v2: Tomi pointed out that __cancel_delayed_work() users can't be
    safely converted to mod_delayed_work().  They could be calling it
    from irq context and if that happens while delayed_work_timer_fn()
    is running, it could deadlock.  __cancel_delayed_work() users are
    dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Anton Vorontsov <cbouatmailru@gmail.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Doug Thompson <dougthompson@xmission.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Roland Dreier <roland@kernel.org>
Cc: "John W. Linville" <linville@tuxdriver.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
---
 fs/afs/callback.c   |  4 +---
 fs/afs/server.c     | 10 ++--------
 fs/afs/vlocation.c  | 14 +++-----------
 fs/nfs/nfs4renewd.c |  3 +--
 4 files changed, 7 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 587ef5123cd8..7ef637d7f3a5 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -351,9 +351,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work)
  */
 void afs_flush_callback_breaks(struct afs_server *server)
 {
-	cancel_delayed_work(&server->cb_break_work);
-	queue_delayed_work(afs_callback_update_worker,
-			   &server->cb_break_work, 0);
+	mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
 }
 
 #if 0
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d59b7516e943..f342acf3547d 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -285,12 +285,7 @@ static void afs_reap_server(struct work_struct *work)
 		expiry = server->time_of_death + afs_server_timeout;
 		if (expiry > now) {
 			delay = (expiry - now) * HZ;
-			if (!queue_delayed_work(afs_wq, &afs_server_reaper,
-						delay)) {
-				cancel_delayed_work(&afs_server_reaper);
-				queue_delayed_work(afs_wq, &afs_server_reaper,
-						   delay);
-			}
+			mod_delayed_work(afs_wq, &afs_server_reaper, delay);
 			break;
 		}
 
@@ -323,6 +318,5 @@ static void afs_reap_server(struct work_struct *work)
 void __exit afs_purge_servers(void)
 {
 	afs_server_timeout = 0;
-	cancel_delayed_work(&afs_server_reaper);
-	queue_delayed_work(afs_wq, &afs_server_reaper, 0);
+	mod_delayed_work(afs_wq, &afs_server_reaper, 0);
 }
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 431984d2e372..57bcb1596530 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -561,12 +561,7 @@ static void afs_vlocation_reaper(struct work_struct *work)
 		if (expiry > now) {
 			delay = (expiry - now) * HZ;
 			_debug("delay %lu", delay);
-			if (!queue_delayed_work(afs_wq, &afs_vlocation_reap,
-						delay)) {
-				cancel_delayed_work(&afs_vlocation_reap);
-				queue_delayed_work(afs_wq, &afs_vlocation_reap,
-						   delay);
-			}
+			mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
 			break;
 		}
 
@@ -614,13 +609,10 @@ void afs_vlocation_purge(void)
 	spin_lock(&afs_vlocation_updates_lock);
 	list_del_init(&afs_vlocation_updates);
 	spin_unlock(&afs_vlocation_updates_lock);
-	cancel_delayed_work(&afs_vlocation_update);
-	queue_delayed_work(afs_vlocation_update_worker,
-			   &afs_vlocation_update, 0);
+	mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
 	destroy_workqueue(afs_vlocation_update_worker);
 
-	cancel_delayed_work(&afs_vlocation_reap);
-	queue_delayed_work(afs_wq, &afs_vlocation_reap, 0);
+	mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
 }
 
 /*
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 6930bec91bca..1720d32ffa54 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 		timeout = 5 * HZ;
 	dprintk("%s: requeueing work. Lease period = %ld\n",
 			__func__, (timeout + HZ - 1) / HZ);
-	cancel_delayed_work(&clp->cl_renewd);
-	schedule_delayed_work(&clp->cl_renewd, timeout);
+	mod_delayed_work(system_wq, &clp->cl_renewd, timeout);
 	set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
 	spin_unlock(&clp->cl_lock);
 }
-- 
cgit 


From adb37c4c67f807f16beb222028fb3ce9a354dc2b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 May 2012 18:01:20 -0600
Subject: userns: Make seq_file's user namespace accessible

struct file already has a user namespace associated with it
in file->f_cred->user_ns, unfortunately because struct
seq_file has no struct file backpointer associated with
it, it is difficult to get at the user namespace in seq_file
context.  Therefore add a helper function seq_user_ns to return
the associated user namespace and a user_ns field to struct
seq_file to be used in implementing seq_user_ns.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/seq_file.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 14cf9de1dbe1..99dffab4c4e4 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -56,6 +57,9 @@ int seq_open(struct file *file, const struct seq_operations *op)
 	memset(p, 0, sizeof(*p));
 	mutex_init(&p->lock);
 	p->op = op;
+#ifdef CONFIG_USER_NS
+	p->user_ns = file->f_cred->user_ns;
+#endif
 
 	/*
 	 * Wrappers around seq_open(e.g. swaps_open) need to be
-- 
cgit 


From a76cccbeef9e91b5f799e8853acac1ed1fc833cb Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 31 Jul 2012 14:55:51 +1000
Subject: xfs: fix uninitialised variable in xfs_rtbuf_get()

Results in this assert failure in generic/090:

XFS: Assertion failed: *nmap >= 1, file: fs/xfs/xfs_bmap.c, line: 4363
.....
Call Trace:
 [<ffffffff814680db>] xfs_bmapi_read+0x6b/0x370
 [<ffffffff814b64b2>] xfs_rtbuf_get+0x42/0x130
 [<ffffffff814b6f09>] xfs_rtget_summary+0x89/0x120
 [<ffffffff814b7bfe>] xfs_rtallocate_extent_size+0xce/0x340
 [<ffffffff814b89f0>] xfs_rtallocate_extent+0x240/0x290
 [<ffffffff81462c1a>] xfs_bmap_rtalloc+0x1ba/0x340
 [<ffffffff81463a65>] xfs_bmap_alloc+0x35/0x40
 [<ffffffff8146f111>] xfs_bmapi_allocate+0xf1/0x350
 [<ffffffff8146f9de>] xfs_bmapi_write+0x66e/0xa60
 [<ffffffff8144538a>] xfs_iomap_write_direct+0x22a/0x3f0
 [<ffffffff8143707b>] __xfs_get_blocks+0x38b/0x5d0
 [<ffffffff814372d4>] xfs_get_blocks_direct+0x14/0x20
 [<ffffffff811b0081>] do_blockdev_direct_IO+0xf71/0x1eb0
 [<ffffffff811b1015>] __blockdev_direct_IO+0x55/0x60
 [<ffffffff814355ca>] xfs_vm_direct_IO+0x11a/0x1e0
 [<ffffffff8112d617>] generic_file_direct_write+0xd7/0x1b0
 [<ffffffff8143e16c>] xfs_file_dio_aio_write+0x13c/0x320
 [<ffffffff8143e6f2>] xfs_file_aio_write+0x1c2/0x1d0
 [<ffffffff81174a07>] do_sync_write+0xa7/0xe0
 [<ffffffff81175288>] vfs_write+0xa8/0x160
 [<ffffffff81175702>] sys_pwrite64+0x92/0xb0
 [<ffffffff81b68f69>] system_call_fastpath+0x16/0x1b

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_rtalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 92d4331cd4f1..ca28a4ba4b54 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -857,7 +857,7 @@ xfs_rtbuf_get(
 	xfs_buf_t	*bp;		/* block buffer, result */
 	xfs_inode_t	*ip;		/* bitmap or summary inode */
 	xfs_bmbt_irec_t	map;
-	int		nmap;
+	int		nmap = 1;
 	int		error;		/* error value */
 
 	ip = issum ? mp->m_rsumip : mp->m_rbmip;
-- 
cgit 


From 1ed845df60f3f02d4b7cd9fcad79ccb69c289f5c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 1 Aug 2012 09:56:49 -0500
Subject: xfs: kill struct declarations in xfs_mount.h

I noticed that "struct xfs_mount_args" was still declared in
"fs/xfs/xfs_mount.h".  That struct doesn't even exist any more (and
is obviously not referenced elsewhere in that header file).  While
in there, delete four other unneeded struct declarations in that
file.

Doing so highlights that "fs/xfs/xfs_trace.h" was relying indirectly
on "xfs_mount.h" to be #included in order to declare "struct
xfs_bmbt_irec", so add that declaration to resolve that issue.

Signed-off-by: Alex Elder <elder@inktank.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_mount.h | 5 -----
 fs/xfs/xfs_trace.h | 1 +
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 05a05a7b6119..deee09e534dc 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations {
 #include "xfs_sync.h"
 
 struct xlog;
-struct xfs_mount_args;
 struct xfs_inode;
-struct xfs_bmbt_irec;
-struct xfs_bmap_free;
-struct xfs_extdelta;
-struct xfs_swapext;
 struct xfs_mru_cache;
 struct xfs_nameops;
 struct xfs_ail;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e5795dd6013a..7d36ccf57f93 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -37,6 +37,7 @@ struct xlog_recover;
 struct xlog_recover_item;
 struct xfs_buf_log_format;
 struct xfs_inode_log_format;
+struct xfs_bmbt_irec;
 
 DECLARE_EVENT_CLASS(xfs_attr_list_class,
 	TP_PROTO(struct xfs_attr_list_context *ctx),
-- 
cgit 


From 3cd52ab68b7f17eddbff46c1f8e5a105cd901f8e Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Thu, 9 Aug 2012 15:40:39 -0700
Subject: debugfs: make __create_file static

It's only used locally, no need to pollute global namespace.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4733eab34a23..2c9fafbe8425 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -291,9 +291,9 @@ static struct file_system_type debug_fs_type = {
 	.kill_sb =	kill_litter_super,
 };
 
-struct dentry *__create_file(const char *name, umode_t mode,
-				   struct dentry *parent, void *data,
-				   const struct file_operations *fops)
+static struct dentry *__create_file(const char *name, umode_t mode,
+				    struct dentry *parent, void *data,
+				    const struct file_operations *fops)
 {
 	struct dentry *dentry = NULL;
 	int error;
-- 
cgit 


From c4982110ae93d7575503feb81d15e93c0c5f393c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Tue, 7 Aug 2012 02:02:02 -0400
Subject: xfs: unlock the AGI buffer when looping in xfs_dialloc

Also update some commens in the area to make the code easier to read.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 21e37b55f7e5..5aceb3f8ecd6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -962,23 +962,22 @@ xfs_dialloc(
 		if (!pag->pagi_freecount && !okalloc)
 			goto nextag;
 
+		/*
+		 * Then read in the AGI buffer and recheck with the AGI buffer
+		 * lock held.
+		 */
 		error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
 		if (error)
 			goto out_error;
 
-		/*
-		 * Once the AGI has been read in we have to recheck
-		 * pagi_freecount with the AGI buffer lock held.
-		 */
 		if (pag->pagi_freecount) {
 			xfs_perag_put(pag);
 			goto out_alloc;
 		}
 
-		if (!okalloc) {
-			xfs_trans_brelse(tp, agbp);
-			goto nextag;
-		}
+		if (!okalloc)
+			goto nextag_relse_buffer;
+
 
 		error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
 		if (error) {
@@ -1007,6 +1006,8 @@ xfs_dialloc(
 			return 0;
 		}
 
+nextag_relse_buffer:
+		xfs_trans_brelse(tp, agbp);
 nextag:
 		xfs_perag_put(pag);
 		if (++agno == mp->m_sb.sb_agcount)
-- 
cgit 


From 643bfc061c47e9c7661324a09fb0a0bc6601e5d6 Mon Sep 17 00:00:00 2001
From: Tomas Racek <tracek@redhat.com>
Date: Tue, 14 Aug 2012 10:35:04 +0200
Subject: xfs: check for possible overflow in xfs_ioc_trim

If range.start or range.minlen is bigger than filesystem size, return
invalid value error. This fixes possible overflow in BTOBB macro when
passed value was nearly ULLONG_MAX.

Signed-off-by: Tomas Racek <tracek@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_discard.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index f9c3fe304a17..69cf4fcde03e 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,12 +179,14 @@ xfs_ioc_trim(
 	 * used by the fstrim application.  In the end it really doesn't
 	 * matter as trimming blocks is an advisory interface.
 	 */
+	if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
+	    range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
+		return -XFS_ERROR(EINVAL);
+
 	start = BTOBB(range.start);
 	end = start + BTOBBT(range.len) - 1;
 	minlen = BTOBB(max_t(u64, granularity, range.minlen));
 
-	if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
-		return -XFS_ERROR(EINVAL);
 	if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
 		end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
 
-- 
cgit 


From 43829731dd372d04d6706c51052b9dabab9ca356 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 20 Aug 2012 14:51:24 -0700
Subject: workqueue: deprecate flush[_delayed]_work_sync()

flush[_delayed]_work_sync() are now spurious.  Mark them deprecated
and convert all users to flush[_delayed]_work().

If you're cc'd and wondering what's going on: Now all workqueues are
non-reentrant and the regular flushes guarantee that the work item is
not pending or running on any CPU on return, so there's no reason to
use the sync flushes at all and they're going away.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mattia Dongili <malattia@linux.it>
Cc: Kent Yoder <key@linux.vnet.ibm.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: Bryan Wu <bryan.wu@canonical.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-wireless@vger.kernel.org
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Petr Vandrovec <petr@vandrovec.name>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Avi Kivity <avi@redhat.com>
---
 fs/affs/super.c           | 2 +-
 fs/gfs2/lock_dlm.c        | 2 +-
 fs/gfs2/super.c           | 2 +-
 fs/hfs/inode.c            | 2 +-
 fs/ncpfs/inode.c          | 6 +++---
 fs/ocfs2/cluster/quorum.c | 2 +-
 fs/xfs/xfs_super.c        | 2 +-
 fs/xfs/xfs_sync.c         | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index c70f1e5fc024..022cecb0757d 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -551,7 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 	}
 
-	flush_delayed_work_sync(&sbi->sb_work);
+	flush_delayed_work(&sbi->sb_work);
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 4a38db739ca0..0fb6539b0c8c 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
 	spin_lock(&ls->ls_recover_spin);
 	set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
 	spin_unlock(&ls->ls_recover_spin);
-	flush_delayed_work_sync(&sdp->sd_control_work);
+	flush_delayed_work(&sdp->sd_control_work);
 
 	/* mounted_lock and control_lock will be purged in dlm recovery */
 release:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a14..867700aba536 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1572,7 +1572,7 @@ out:
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
-	flush_delayed_work_sync(&ip->i_gl->gl_work);
+	flush_delayed_work(&ip->i_gl->gl_work);
 	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index ee1bc55677f1..553909395270 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
-	flush_delayed_work_sync(&HFS_SB(sb)->mdb_work);
+	flush_delayed_work(&HFS_SB(sb)->mdb_work);
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
 	if (!ret)
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 333df07ae3bd..eaa74323663a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -314,11 +314,11 @@ static void ncp_stop_tasks(struct ncp_server *server) {
 	release_sock(sk);
 	del_timer_sync(&server->timeout_tm);
 
-	flush_work_sync(&server->rcv.tq);
+	flush_work(&server->rcv.tq);
 	if (sk->sk_socket->type == SOCK_STREAM)
-		flush_work_sync(&server->tx.tq);
+		flush_work(&server->tx.tq);
 	else
-		flush_work_sync(&server->timeout_tq);
+		flush_work(&server->timeout_tq);
 }
 
 static int  ncp_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 8f9cea1597af..c19897d0fe14 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -327,5 +327,5 @@ void o2quo_exit(void)
 {
 	struct o2quo_state *qs = &o2quo_state;
 
-	flush_work_sync(&qs->qs_work);
+	flush_work(&qs->qs_work);
 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a2..e8e6be439bcd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_sync_fs(
 		 * We schedule xfssyncd now (now that the disk is
 		 * active) instead of later (when it might not be).
 		 */
-		flush_delayed_work_sync(&mp->m_sync_work);
+		flush_delayed_work(&mp->m_sync_work);
 	}
 
 	return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 96548176db80..9500caf15acf 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -475,7 +475,7 @@ xfs_flush_inodes(
 	struct xfs_mount	*mp = ip->i_mount;
 
 	queue_work(xfs_syncd_wq, &mp->m_flush_work);
-	flush_work_sync(&mp->m_flush_work);
+	flush_work(&mp->m_flush_work);
 }
 
 STATIC void
-- 
cgit 


From c45640d87b8272e85d19147dad914a6b82e51370 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 22 Aug 2012 16:37:13 +0300
Subject: UBIFS: print PID in debug messages

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/debug.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 760de723dadb..46b8dd123585 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -165,12 +165,14 @@ struct ubifs_global_debug_info {
 } while (0)
 
 #define ubifs_dbg_msg(type, fmt, ...) \
-	pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
+	pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid,       \
+		 ##__VA_ARGS__)
 
 #define DBG_KEY_BUF_LEN 48
 #define ubifs_dbg_msg_key(type, key, fmt, ...) do {                            \
 	char __tmp_key_buf[DBG_KEY_BUF_LEN];                                   \
-	pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__,             \
+	pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid,     \
+		 ##__VA_ARGS__,                                                \
 		 dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN));    \
 } while (0)
 
-- 
cgit 


From db0f89690639ac1d5f89c0dd9713c2e83dd37281 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 22 Aug 2012 16:55:08 +0300
Subject: UBIFS: always print full error reports

Even when we are emulating power cuts, otherwise it is difficult to investigate
failures during emulated power cuts testing.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/debug.c | 6 ------
 fs/ubifs/scan.c  | 2 --
 2 files changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index bb3167257aab..1aaa9f519485 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -305,9 +305,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	const struct ubifs_ch *ch = node;
 	char key_buf[DBG_KEY_BUF_LEN];
 
-	if (dbg_is_tst_rcvry(c))
-		return;
-
 	/* If the magic is incorrect, just hexdump the first bytes */
 	if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
 		printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ);
@@ -882,9 +879,6 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 	struct ubifs_scan_node *snod;
 	void *buf;
 
-	if (dbg_is_tst_rcvry(c))
-		return;
-
 	printk(KERN_ERR "(pid %d) start dumping LEB %d\n",
 	       current->pid, lnum);
 
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 7c40e6025fd6..07a49258ccfe 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -240,8 +240,6 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
 	int len;
 
 	ubifs_err("corruption at LEB %d:%d", lnum, offs);
-	if (dbg_is_tst_rcvry(c))
-		return;
 	len = c->leb_size - offs;
 	if (len > 8192)
 		len = 8192;
-- 
cgit 


From 4b3e58a6d6c62b5457ff876fdfc79f66fff04cd2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 23 Aug 2012 17:28:58 +0300
Subject: UBIFS: improve scanning debug output

Include LEB number and offset in scanning debugging output.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/scan.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 07a49258ccfe..019ca47a1f8e 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -75,7 +75,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 	magic = le32_to_cpu(ch->magic);
 
 	if (magic == 0xFFFFFFFF) {
-		dbg_scan("hit empty space");
+		dbg_scan("hit empty space at LEB %d:%d", lnum, offs);
 		return SCANNED_EMPTY_SPACE;
 	}
 
@@ -85,7 +85,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 	if (len < UBIFS_CH_SZ)
 		return SCANNED_GARBAGE;
 
-	dbg_scan("scanning %s", dbg_ntype(ch->node_type));
+	dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs);
 
 	if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
 		return SCANNED_A_CORRUPT_NODE;
@@ -114,8 +114,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 			return SCANNED_A_BAD_PAD_NODE;
 		}
 
-		dbg_scan("%d bytes padded, offset now %d",
-			 pad_len, ALIGN(offs + node_len + pad_len, 8));
+		dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len,
+			 lnum, offs, ALIGN(offs + node_len + pad_len, 8));
 
 		return node_len + pad_len;
 	}
-- 
cgit 


From e599b3253c5e49f7a2a579eef2fc2aa066989ef5 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Fri, 10 Aug 2012 15:01:51 -0300
Subject: xfs: fix race while discarding buffers [V4]

While xfs_buftarg_shrink() is freeing buffers from the dispose list (filled with
buffers from lru list), there is a possibility to have xfs_buf_stale() racing
with it, and removing buffers from dispose list before xfs_buftarg_shrink() does
it.

This happens because xfs_buftarg_shrink() handle the dispose list without
locking and the test condition in xfs_buf_stale() checks for the buffer being in
*any* list:

if (!list_empty(&bp->b_lru))

If the buffer happens to be on dispose list, this causes the buffer counter of
lru list (btp->bt_lru_nr) to be decremented twice (once in xfs_buftarg_shrink()
and another in xfs_buf_stale()) causing a wrong account usage of the lru list.

This may cause xfs_buftarg_shrink() to return a wrong value to the memory
shrinker shrink_slab(), and such account error may also cause an underflowed
value to be returned; since the counter is lower than the current number of
items in the lru list, a decrement may happen when the counter is 0, causing
an underflow on the counter.

The fix uses a new flag field (and a new buffer flag) to serialize buffer
handling during the shrink process. The new flag field has been designed to use
btp->bt_lru_lock/unlock instead of xfs_buf_lock/unlock mechanism.

dchinner, sandeen, aquini and aris also deserve credits for this.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_buf.c |  5 ++++-
 fs/xfs/xfs_buf.h | 41 ++++++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d7a9dd735e1e..933b7930b863 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,6 +96,7 @@ xfs_buf_lru_add(
 		atomic_inc(&bp->b_hold);
 		list_add_tail(&bp->b_lru, &btp->bt_lru);
 		btp->bt_lru_nr++;
+		bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
 	}
 	spin_unlock(&btp->bt_lru_lock);
 }
@@ -154,7 +155,8 @@ xfs_buf_stale(
 		struct xfs_buftarg *btp = bp->b_target;
 
 		spin_lock(&btp->bt_lru_lock);
-		if (!list_empty(&bp->b_lru)) {
+		if (!list_empty(&bp->b_lru) &&
+		    !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
 			list_del_init(&bp->b_lru);
 			btp->bt_lru_nr--;
 			atomic_dec(&bp->b_hold);
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink(
 		 */
 		list_move(&bp->b_lru, &dispose);
 		btp->bt_lru_nr--;
+		bp->b_lru_flags |= _XBF_LRU_DISPOSE;
 	}
 	spin_unlock(&btp->bt_lru_lock);
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d03b73b9604e..7c0b6a0a1557 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -38,27 +38,28 @@ typedef enum {
 	XBRW_ZERO = 3,			/* Zero target memory */
 } xfs_buf_rw_t;
 
-#define XBF_READ	(1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE	(1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD	(1 << 2) /* asynchronous read-ahead */
-#define XBF_ASYNC	(1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE	(1 << 5) /* all pages in the buffer uptodate */
-#define XBF_STALE	(1 << 6) /* buffer has been staled, do not find it */
+#define XBF_READ	 (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE	 (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD	 (1 << 2) /* asynchronous read-ahead */
+#define XBF_ASYNC	 (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE	 (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */
 
 /* I/O hints for the BIO layer */
-#define XBF_SYNCIO	(1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA		(1 << 11)/* force cache write through mode */
-#define XBF_FLUSH	(1 << 12)/* flush the disk cache before a write */
+#define XBF_SYNCIO	 (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA		 (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH	 (1 << 12)/* flush the disk cache before a write */
 
 /* flags used only as arguments to access routines */
-#define XBF_TRYLOCK	(1 << 16)/* lock requested, but do not wait */
-#define XBF_UNMAPPED	(1 << 17)/* do not map the buffer */
+#define XBF_TRYLOCK	 (1 << 16)/* lock requested, but do not wait */
+#define XBF_UNMAPPED	 (1 << 17)/* do not map the buffer */
 
 /* flags used only internally */
-#define _XBF_PAGES	(1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM	(1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q	(1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND	(1 << 23)/* compound buffer */
+#define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
+#define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
+#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t;
 	{ XBF_SYNCIO,		"SYNCIO" }, \
 	{ XBF_FUA,		"FUA" }, \
 	{ XBF_FLUSH,		"FLUSH" }, \
-	{ XBF_TRYLOCK,		"TRYLOCK" }, 	/* should never be set */\
+	{ XBF_TRYLOCK,		"TRYLOCK" },	/* should never be set */\
 	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
-	{ _XBF_COMPOUND,	"COMPOUND" }
+	{ _XBF_COMPOUND,	"COMPOUND" }, \
+	{ _XBF_LRU_DISPOSE,	"LRU_DISPOSE" }
 
 typedef struct xfs_buftarg {
 	dev_t			bt_dev;
@@ -124,7 +126,12 @@ typedef struct xfs_buf {
 	xfs_buf_flags_t		b_flags;	/* status flags */
 	struct semaphore	b_sema;		/* semaphore for lockables */
 
+	/*
+	 * concurrent access to b_lru and b_lru_flags are protected by
+	 * bt_lru_lock and not by b_sema
+	 */
 	struct list_head	b_lru;		/* lru list */
+	xfs_buf_flags_t		b_lru_flags;	/* internal lru status flags */
 	wait_queue_head_t	b_waiters;	/* unpin waiters */
 	struct list_head	b_list;
 	struct xfs_perag	*b_pag;		/* contains rbtree root */
-- 
cgit 


From 834ab12228fad777a11007a24cb6286b02c9a41c Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:11:45 +0800
Subject: xfs: Remove type argument from xfs_seek_data()/xfs_seek_hole()

The type is already indicated by the function naming explicitly, so this argument
can be omitted from those calls.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 56afcdb2377d..92ba18f841f1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -962,8 +962,7 @@ xfs_vm_page_mkwrite(
 STATIC loff_t
 xfs_seek_data(
 	struct file		*file,
-	loff_t			start,
-	u32			type)
+	loff_t			start)
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
@@ -1029,8 +1028,7 @@ out_unlock:
 STATIC loff_t
 xfs_seek_hole(
 	struct file		*file,
-	loff_t			start,
-	u32			type)
+	loff_t			start)
 {
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
@@ -1092,9 +1090,9 @@ xfs_file_llseek(
 	case SEEK_SET:
 		return generic_file_llseek(file, offset, origin);
 	case SEEK_DATA:
-		return xfs_seek_data(file, offset, origin);
+		return xfs_seek_data(file, offset);
 	case SEEK_HOLE:
-		return xfs_seek_hole(file, offset, origin);
+		return xfs_seek_hole(file, offset);
 	default:
 		return -EINVAL;
 	}
-- 
cgit 


From d126d43f631f996daeee5006714fed914be32368 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:11:57 +0800
Subject: xfs: Introduce a helper routine to probe data or hole offset from
 page cache

Introduce helpers to probe data or hole offset from page cache.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 92ba18f841f1..d78a746b6c7c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
 
 #include <linux/dcache.h>
 #include <linux/falloc.h>
+#include <linux/pagevec.h>
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
@@ -959,6 +960,224 @@ xfs_vm_page_mkwrite(
 	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
 
+/*
+ * This type is designed to indicate the type of offset we would like
+ * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
+ */
+enum {
+	HOLE_OFF = 0,
+	DATA_OFF,
+};
+
+/*
+ * Lookup the desired type of offset from the given page.
+ *
+ * On success, return true and the offset argument will point to the
+ * start of the region that was found.  Otherwise this function will
+ * return false and keep the offset argument unchanged.
+ */
+STATIC bool
+xfs_lookup_buffer_offset(
+	struct page		*page,
+	loff_t			*offset,
+	unsigned int		type)
+{
+	loff_t			lastoff = page_offset(page);
+	bool			found = false;
+	struct buffer_head	*bh, *head;
+
+	bh = head = page_buffers(page);
+	do {
+		/*
+		 * Unwritten extents that have data in the page
+		 * cache covering them can be identified by the
+		 * BH_Unwritten state flag.  Pages with multiple
+		 * buffers might have a mix of holes, data and
+		 * unwritten extents - any buffer with valid
+		 * data in it should have BH_Uptodate flag set
+		 * on it.
+		 */
+		if (buffer_unwritten(bh) ||
+		    buffer_uptodate(bh)) {
+			if (type == DATA_OFF)
+				found = true;
+		} else {
+			if (type == HOLE_OFF)
+				found = true;
+		}
+
+		if (found) {
+			*offset = lastoff;
+			break;
+		}
+		lastoff += bh->b_size;
+	} while ((bh = bh->b_this_page) != head);
+
+	return found;
+}
+
+/*
+ * This routine is called to find out and return a data or hole offset
+ * from the page cache for unwritten extents according to the desired
+ * type for xfs_seek_data() or xfs_seek_hole().
+ *
+ * The argument offset is used to tell where we start to search from the
+ * page cache.  Map is used to figure out the end points of the range to
+ * lookup pages.
+ *
+ * Return true if the desired type of offset was found, and the argument
+ * offset is filled with that address.  Otherwise, return false and keep
+ * offset unchanged.
+ */
+STATIC bool
+xfs_find_get_desired_pgoff(
+	struct inode		*inode,
+	struct xfs_bmbt_irec	*map,
+	unsigned int		type,
+	loff_t			*offset)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct pagevec		pvec;
+	pgoff_t			index;
+	pgoff_t			end;
+	loff_t			endoff;
+	loff_t			startoff = *offset;
+	loff_t			lastoff = startoff;
+	bool			found = false;
+
+	pagevec_init(&pvec, 0);
+
+	index = startoff >> PAGE_CACHE_SHIFT;
+	endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
+	end = endoff >> PAGE_CACHE_SHIFT;
+	do {
+		int		want;
+		unsigned	nr_pages;
+		unsigned int	i;
+
+		want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
+					  want);
+		/*
+		 * No page mapped into given range.  If we are searching holes
+		 * and if this is the first time we got into the loop, it means
+		 * that the given offset is landed in a hole, return it.
+		 *
+		 * If we have already stepped through some block buffers to find
+		 * holes but they all contains data.  In this case, the last
+		 * offset is already updated and pointed to the end of the last
+		 * mapped page, if it does not reach the endpoint to search,
+		 * that means there should be a hole between them.
+		 */
+		if (nr_pages == 0) {
+			/* Data search found nothing */
+			if (type == DATA_OFF)
+				break;
+
+			ASSERT(type == HOLE_OFF);
+			if (lastoff == startoff || lastoff < endoff) {
+				found = true;
+				*offset = lastoff;
+			}
+			break;
+		}
+
+		/*
+		 * At lease we found one page.  If this is the first time we
+		 * step into the loop, and if the first page index offset is
+		 * greater than the given search offset, a hole was found.
+		 */
+		if (type == HOLE_OFF && lastoff == startoff &&
+		    lastoff < page_offset(pvec.pages[0])) {
+			found = true;
+			break;
+		}
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page	*page = pvec.pages[i];
+			loff_t		b_offset;
+
+			/*
+			 * At this point, the page may be truncated or
+			 * invalidated (changing page->mapping to NULL),
+			 * or even swizzled back from swapper_space to tmpfs
+			 * file mapping. However, page->index will not change
+			 * because we have a reference on the page.
+			 *
+			 * Searching done if the page index is out of range.
+			 * If the current offset is not reaches the end of
+			 * the specified search range, there should be a hole
+			 * between them.
+			 */
+			if (page->index > end) {
+				if (type == HOLE_OFF && lastoff < endoff) {
+					*offset = lastoff;
+					found = true;
+				}
+				goto out;
+			}
+
+			lock_page(page);
+			/*
+			 * Page truncated or invalidated(page->mapping == NULL).
+			 * We can freely skip it and proceed to check the next
+			 * page.
+			 */
+			if (unlikely(page->mapping != inode->i_mapping)) {
+				unlock_page(page);
+				continue;
+			}
+
+			if (!page_has_buffers(page)) {
+				unlock_page(page);
+				continue;
+			}
+
+			found = xfs_lookup_buffer_offset(page, &b_offset, type);
+			if (found) {
+				/*
+				 * The found offset may be less than the start
+				 * point to search if this is the first time to
+				 * come here.
+				 */
+				*offset = max_t(loff_t, startoff, b_offset);
+				unlock_page(page);
+				goto out;
+			}
+
+			/*
+			 * We either searching data but nothing was found, or
+			 * searching hole but found a data buffer.  In either
+			 * case, probably the next page contains the desired
+			 * things, update the last offset to it so.
+			 */
+			lastoff = page_offset(page) + PAGE_SIZE;
+			unlock_page(page);
+		}
+
+		/*
+		 * The number of returned pages less than our desired, search
+		 * done.  In this case, nothing was found for searching data,
+		 * but we found a hole behind the last offset.
+		 */
+		if (nr_pages < want) {
+			if (type == HOLE_OFF) {
+				*offset = lastoff;
+				found = true;
+			}
+			break;
+		}
+
+		index = pvec.pages[i - 1]->index + 1;
+		pagevec_release(&pvec);
+	} while (index <= end);
+
+out:
+	pagevec_release(&pvec);
+	return found;
+}
+
 STATIC loff_t
 xfs_seek_data(
 	struct file		*file,
-- 
cgit 


From 52f1acc8b56a333fbc7218711c3fa2fb3bf78b92 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:12:07 +0800
Subject: xfs: xfs_seek_data() refinement with unwritten extents check up from
 page cache

xfs_seek_data() refinement with unwritten extents check up from page cache.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 72 +++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d78a746b6c7c..3f9107431dfb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1186,8 +1186,6 @@ xfs_seek_data(
 	struct inode		*inode = file->f_mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_bmbt_irec	map[2];
-	int			nmap = 2;
 	loff_t			uninitialized_var(offset);
 	xfs_fsize_t		isize;
 	xfs_fileoff_t		fsbno;
@@ -1203,36 +1201,74 @@ xfs_seek_data(
 		goto out_unlock;
 	}
 
-	fsbno = XFS_B_TO_FSBT(mp, start);
-
 	/*
 	 * Try to read extents from the first block indicated
 	 * by fsbno to the end block of the file.
 	 */
+	fsbno = XFS_B_TO_FSBT(mp, start);
 	end = XFS_B_TO_FSB(mp, isize);
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		unsigned int		i;
 
-	error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
-			       XFS_BMAPI_ENTIRE);
-	if (error)
-		goto out_unlock;
+		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error)
+			goto out_unlock;
 
-	/*
-	 * Treat unwritten extent as data extent since it might
-	 * contains dirty data in page cache.
-	 */
-	if (map[0].br_startblock != HOLESTARTBLOCK) {
-		offset = max_t(loff_t, start,
-			       XFS_FSB_TO_B(mp, map[0].br_startoff));
-	} else {
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		for (i = 0; i < nmap; i++) {
+			offset = max_t(loff_t, start,
+				       XFS_FSB_TO_B(mp, map[i].br_startoff));
+
+			/* Landed in a data extent */
+			if (map[i].br_startblock == DELAYSTARTBLOCK ||
+			    (map[i].br_state == XFS_EXT_NORM &&
+			     !isnullstartblock(map[i].br_startblock)))
+				goto out;
+
+			/*
+			 * Landed in an unwritten extent, try to search data
+			 * from page cache.
+			 */
+			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
+				if (xfs_find_get_desired_pgoff(inode, &map[i],
+							DATA_OFF, &offset))
+					goto out;
+			}
+		}
+
+		/*
+		 * map[0] is hole or its an unwritten extent but
+		 * without data in page cache.  Probably means that
+		 * we are reading after EOF if nothing in map[1].
+		 */
 		if (nmap == 1) {
 			error = ENXIO;
 			goto out_unlock;
 		}
 
-		offset = max_t(loff_t, start,
-			       XFS_FSB_TO_B(mp, map[1].br_startoff));
+		ASSERT(i > 1);
+
+		/*
+		 * Nothing was found, proceed to the next round of search
+		 * if reading offset not beyond or hit EOF.
+		 */
+		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
+		start = XFS_FSB_TO_B(mp, fsbno);
+		if (start >= isize) {
+			error = ENXIO;
+			goto out_unlock;
+		}
 	}
 
+out:
 	if (offset != file->f_pos)
 		file->f_pos = offset;
 
-- 
cgit 


From b686d1f79acb65c6a34473c15fcfa2ee54aed8e2 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Tue, 21 Aug 2012 17:12:18 +0800
Subject: xfs: xfs_seek_hole() refinement with hole searching from page cache
 for unwritten extents

xfs_seek_hole() refinement with hole searching from page cache for unwritten extent.

Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_file.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 67 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 3f9107431dfb..1eaeb8be3aae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1289,9 +1289,9 @@ xfs_seek_hole(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	loff_t			uninitialized_var(offset);
-	loff_t			holeoff;
 	xfs_fsize_t		isize;
 	xfs_fileoff_t		fsbno;
+	xfs_filblks_t		end;
 	uint			lock;
 	int			error;
 
@@ -1307,21 +1307,77 @@ xfs_seek_hole(
 	}
 
 	fsbno = XFS_B_TO_FSBT(mp, start);
-	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
-	if (error)
-		goto out_unlock;
+	end = XFS_B_TO_FSB(mp, isize);
+
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		unsigned int		i;
+
+		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error)
+			goto out_unlock;
+
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		for (i = 0; i < nmap; i++) {
+			offset = max_t(loff_t, start,
+				       XFS_FSB_TO_B(mp, map[i].br_startoff));
+
+			/* Landed in a hole */
+			if (map[i].br_startblock == HOLESTARTBLOCK)
+				goto out;
+
+			/*
+			 * Landed in an unwritten extent, try to search hole
+			 * from page cache.
+			 */
+			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
+				if (xfs_find_get_desired_pgoff(inode, &map[i],
+							HOLE_OFF, &offset))
+					goto out;
+			}
+		}
 
-	holeoff = XFS_FSB_TO_B(mp, fsbno);
-	if (holeoff <= start)
-		offset = start;
-	else {
 		/*
-		 * xfs_bmap_first_unused() could return a value bigger than
-		 * isize if there are no more holes past the supplied offset.
+		 * map[0] contains data or its unwritten but contains
+		 * data in page cache, probably means that we are
+		 * reading after EOF.  We should fix offset to point
+		 * to the end of the file(i.e., there is an implicit
+		 * hole at the end of any file).
 		 */
-		offset = min_t(loff_t, holeoff, isize);
+		if (nmap == 1) {
+			offset = isize;
+			break;
+		}
+
+		ASSERT(i > 1);
+
+		/*
+		 * Both mappings contains data, proceed to the next round of
+		 * search if the current reading offset not beyond or hit EOF.
+		 */
+		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
+		start = XFS_FSB_TO_B(mp, fsbno);
+		if (start >= isize) {
+			offset = isize;
+			break;
+		}
 	}
 
+out:
+	/*
+	 * At this point, we must have found a hole.  However, the returned
+	 * offset may be bigger than the file size as it may be aligned to
+	 * page boundary for unwritten extents, we need to deal with this
+	 * situation in particular.
+	 */
+	offset = min_t(loff_t, offset, isize);
 	if (offset != file->f_pos)
 		file->f_pos = offset;
 
-- 
cgit 


From 38f38657444d15e1a8574eae80ed3de9f501737a Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Thu, 23 Aug 2012 16:53:28 -0400
Subject: xattr: extract simple_xattr code from tmpfs

Extract in-memory xattr APIs from tmpfs. Will be used by cgroup.

$ size vmlinux.o
   text    data     bss     dec     hex filename
4658782  880729 5195032 10734543         a3cbcf vmlinux.o
$ size vmlinux.o
   text    data     bss     dec     hex filename
4658957  880729 5195032 10734718         a3cc7e vmlinux.o

v7:
- checkpatch warnings fixed
- Implement the changes requested by Hugh Dickins:
	- make simple_xattrs_init and simple_xattrs_free inline
	- get rid of locking and list reinitialization in simple_xattrs_free,
	  they're not needed
v6:
- no changes
v5:
- no changes
v4:
- move simple_xattrs_free() to fs/xattr.c
v3:
- in kmem_xattrs_free(), reinitialize the list
- use simple_xattr_* prefix
- introduce simple_xattr_add() to prevent direct list usage

Original-patch-by: Li Zefan <lizefan@huawei.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Lennart Poettering <lpoetter@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/xattr.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)

(limited to 'fs')

diff --git a/fs/xattr.c b/fs/xattr.c
index 4d45b7189e7e..e17e773517ef 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -791,3 +791,169 @@ EXPORT_SYMBOL(generic_getxattr);
 EXPORT_SYMBOL(generic_listxattr);
 EXPORT_SYMBOL(generic_setxattr);
 EXPORT_SYMBOL(generic_removexattr);
+
+/*
+ * Allocate new xattr and copy in the value; but leave the name to callers.
+ */
+struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
+{
+	struct simple_xattr *new_xattr;
+	size_t len;
+
+	/* wrap around? */
+	len = sizeof(*new_xattr) + size;
+	if (len <= sizeof(*new_xattr))
+		return NULL;
+
+	new_xattr = kmalloc(len, GFP_KERNEL);
+	if (!new_xattr)
+		return NULL;
+
+	new_xattr->size = size;
+	memcpy(new_xattr->value, value, size);
+	return new_xattr;
+}
+
+/*
+ * xattr GET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
+		     void *buffer, size_t size)
+{
+	struct simple_xattr *xattr;
+	int ret = -ENODATA;
+
+	spin_lock(&xattrs->lock);
+	list_for_each_entry(xattr, &xattrs->head, list) {
+		if (strcmp(name, xattr->name))
+			continue;
+
+		ret = xattr->size;
+		if (buffer) {
+			if (size < xattr->size)
+				ret = -ERANGE;
+			else
+				memcpy(buffer, xattr->value, xattr->size);
+		}
+		break;
+	}
+	spin_unlock(&xattrs->lock);
+	return ret;
+}
+
+static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+			      const void *value, size_t size, int flags)
+{
+	struct simple_xattr *xattr;
+	struct simple_xattr *new_xattr = NULL;
+	int err = 0;
+
+	/* value == NULL means remove */
+	if (value) {
+		new_xattr = simple_xattr_alloc(value, size);
+		if (!new_xattr)
+			return -ENOMEM;
+
+		new_xattr->name = kstrdup(name, GFP_KERNEL);
+		if (!new_xattr->name) {
+			kfree(new_xattr);
+			return -ENOMEM;
+		}
+	}
+
+	spin_lock(&xattrs->lock);
+	list_for_each_entry(xattr, &xattrs->head, list) {
+		if (!strcmp(name, xattr->name)) {
+			if (flags & XATTR_CREATE) {
+				xattr = new_xattr;
+				err = -EEXIST;
+			} else if (new_xattr) {
+				list_replace(&xattr->list, &new_xattr->list);
+			} else {
+				list_del(&xattr->list);
+			}
+			goto out;
+		}
+	}
+	if (flags & XATTR_REPLACE) {
+		xattr = new_xattr;
+		err = -ENODATA;
+	} else {
+		list_add(&new_xattr->list, &xattrs->head);
+		xattr = NULL;
+	}
+out:
+	spin_unlock(&xattrs->lock);
+	if (xattr) {
+		kfree(xattr->name);
+		kfree(xattr);
+	}
+	return err;
+
+}
+
+/*
+ * xattr SET operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
+		     const void *value, size_t size, int flags)
+{
+	if (size == 0)
+		value = ""; /* empty EA, do not remove */
+	return __simple_xattr_set(xattrs, name, value, size, flags);
+}
+
+/*
+ * xattr REMOVE operation for in-memory/pseudo filesystems
+ */
+int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name)
+{
+	return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE);
+}
+
+static bool xattr_is_trusted(const char *name)
+{
+	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
+}
+
+/*
+ * xattr LIST operation for in-memory/pseudo filesystems
+ */
+ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
+			  size_t size)
+{
+	bool trusted = capable(CAP_SYS_ADMIN);
+	struct simple_xattr *xattr;
+	size_t used = 0;
+
+	spin_lock(&xattrs->lock);
+	list_for_each_entry(xattr, &xattrs->head, list) {
+		size_t len;
+
+		/* skip "trusted." attributes for unprivileged callers */
+		if (!trusted && xattr_is_trusted(xattr->name))
+			continue;
+
+		len = strlen(xattr->name) + 1;
+		used += len;
+		if (buffer) {
+			if (size < used) {
+				used = -ERANGE;
+				break;
+			}
+			memcpy(buffer, xattr->name, len);
+			buffer += len;
+		}
+	}
+	spin_unlock(&xattrs->lock);
+
+	return used;
+}
+
+void simple_xattr_list_add(struct simple_xattrs *xattrs,
+			   struct simple_xattr *new_xattr)
+{
+	spin_lock(&xattrs->lock);
+	list_add(&new_xattr->list, &xattrs->head);
+	spin_unlock(&xattrs->lock);
+}
-- 
cgit 


From 82aceae4f0d42f03d9ad7d1e90389e731153898f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 27 Aug 2012 13:32:15 -0700
Subject: debugfs: more tightly restrict default mount mode

Since the debugfs is mostly only used by root, make the default mount
mode 0700. Most system owners do not need a more permissive value,
but they can choose to weaken the restrictions via their fstab.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 2c9fafbe8425..6393fd61d5c4 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -28,7 +28,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 
-#define DEBUGFS_DEFAULT_MODE	0755
+#define DEBUGFS_DEFAULT_MODE	0700
 
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
-- 
cgit 


From 8089ed792832e5d3dd219da435c5a08d969662b4 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 22 Aug 2012 17:35:11 +0300
Subject: UBIFS: fix power cut emulation for mtdram

The power cut emulation did not work correctly because we corrupted more than
one max. I/O unit in the buffer and then wrote the entire buffer. This lead to
recovery errors because UBIFS complained about corrupted free space. And this
was easily reproducible on mtdram because max. write size is very small there
(64 bytes), and we could easily have a 1KiB buffer, corrupt 128 bytes there,
and then write the entire buffer.

The fix is to corrupt max. write size bytes at most, and write only up to the
last corrupted max. write size chunk, not the entire buffer.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/debug.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 1aaa9f519485..5dc993fdca87 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2646,20 +2646,18 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
 	return 1;
 }
 
-static void cut_data(const void *buf, unsigned int len)
+static int corrupt_data(const struct ubifs_info *c, const void *buf,
+			unsigned int len)
 {
 	unsigned int from, to, i, ffs = chance(1, 2);
 	unsigned char *p = (void *)buf;
 
 	from = random32() % (len + 1);
-	if (chance(1, 2))
-		to = random32() % (len - from + 1);
-	else
-		to = len;
+	/* Corruption may only span one max. write unit */
+	to = min(len, ALIGN(from, c->max_write_size));
 
-	if (from < to)
-		ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
-			   ffs ? "0xFFs" : "random data");
+	ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
+		   ffs ? "0xFFs" : "random data");
 
 	if (ffs)
 		for (i = from; i < to; i++)
@@ -2667,6 +2665,8 @@ static void cut_data(const void *buf, unsigned int len)
 	else
 		for (i = from; i < to; i++)
 			p[i] = random32() % 0x100;
+
+	return to;
 }
 
 int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
@@ -2679,7 +2679,9 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
 
 	failing = power_cut_emulated(c, lnum, 1);
 	if (failing)
-		cut_data(buf, len);
+		len = corrupt_data(c, buf, len);
+	ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
+		   len, lnum, offs);
 	err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
 	if (err)
 		return err;
-- 
cgit 


From e2441b43190efe0c1e156455b35397ac5efd2610 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 9 Aug 2012 19:43:14 +0200
Subject: UBIFS: remove __DATE__ and __TIME__

This tag is useless and it breaks automatic builds. It causes rebuilds
for packages that depend on kernel for no real reason.

Further, quoting Michal, who removed most of the users already:
The kernel already prints its build timestamp during boot, no need to
repeat it in random drivers and produce different object files each
time.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/super.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 71a197f0f93d..d31928975cc8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1428,7 +1428,6 @@ static int mount_ubifs(struct ubifs_info *c)
 	ubifs_msg("reserved for root:  %llu bytes (%llu KiB)",
 		c->report_rp_size, c->report_rp_size >> 10);
 
-	dbg_msg("compiled on:         " __DATE__ " at " __TIME__);
 	dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size);
 	dbg_msg("max. write size:     %d bytes", c->max_write_size);
 	dbg_msg("LEB size:            %d bytes (%d KiB)",
-- 
cgit 


From 43457c60c8314835412848a9df25d4ba2f49f0ed Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 27 Aug 2012 13:48:48 +0300
Subject: UBIFS: use __aligned() attribute

.. instead of __attribute__((aligned())).

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/commit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 8eda717cb99b..a054cafd614b 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -514,7 +514,7 @@ struct idx_node {
 	struct list_head list;
 	int iip;
 	union ubifs_key upper_key;
-	struct ubifs_idx_node idx __attribute__((aligned(8)));
+	struct ubifs_idx_node idx __aligned(8);
 };
 
 /**
-- 
cgit 


From 79fda5179a5227c930e5b0242b5d5ebf3df29422 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 27 Aug 2012 13:34:09 +0300
Subject: UBIFS: comply with coding style

Join all the split printk lines in order to stop checkpatch complaining.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/budget.c     |   5 +-
 fs/ubifs/compress.c   |   7 +--
 fs/ubifs/debug.c      | 157 ++++++++++++++++++++------------------------------
 fs/ubifs/dir.c        |   4 +-
 fs/ubifs/file.c       |   4 +-
 fs/ubifs/gc.c         |   6 +-
 fs/ubifs/log.c        |  14 ++---
 fs/ubifs/lprops.c     |  58 +++++++++----------
 fs/ubifs/lpt.c        |   3 +-
 fs/ubifs/lpt_commit.c |  14 ++---
 fs/ubifs/orphan.c     |   7 +--
 fs/ubifs/recovery.c   |  11 ++--
 fs/ubifs/replay.c     |  16 ++---
 fs/ubifs/sb.c         |  19 +++---
 fs/ubifs/scan.c       |   7 ++-
 fs/ubifs/super.c      |  35 +++++------
 16 files changed, 161 insertions(+), 206 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index bc4f94b28706..ea9c814c0a33 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -342,9 +342,8 @@ static int do_budget_space(struct ubifs_info *c)
 	lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
 	       c->lst.taken_empty_lebs;
 	if (unlikely(rsvd_idx_lebs > lebs)) {
-		dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
-			 "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
-			 rsvd_idx_lebs);
+		dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d",
+			 min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs);
 		return -ENOSPC;
 	}
 
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 11e4132f314a..2bfa0953335d 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -112,8 +112,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
 	if (compr->comp_mutex)
 		mutex_unlock(compr->comp_mutex);
 	if (unlikely(err)) {
-		ubifs_warn("cannot compress %d bytes, compressor %s, "
-			   "error %d, leave data uncompressed",
+		ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
 			   in_len, compr->name, err);
 		 goto no_compr;
 	}
@@ -176,8 +175,8 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
 	if (compr->decomp_mutex)
 		mutex_unlock(compr->decomp_mutex);
 	if (err)
-		ubifs_err("cannot decompress %d bytes, compressor %s, "
-			  "error %d", in_len, compr->name, err);
+		ubifs_err("cannot decompress %d bytes, compressor %s, error %d",
+			  in_len, compr->name, err);
 
 	return err;
 }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 5dc993fdca87..9f28375fc5b3 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -509,8 +509,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		printk(KERN_ERR "\tname           ");
 
 		if (nlen > UBIFS_MAX_NLEN)
-			printk(KERN_ERR "(bad name length, not printing, "
-					  "bad or corrupted node)");
+			printk(KERN_ERR "(bad name length, not printing, bad or corrupted node)");
 		else {
 			for (i = 0; i < nlen && dent->name[i]; i++)
 				printk(KERN_CONT "%c", dent->name[i]);
@@ -618,14 +617,12 @@ void ubifs_dump_budget_req(const struct ubifs_budget_req *req)
 void ubifs_dump_lstats(const struct ubifs_lp_stats *lst)
 {
 	spin_lock(&dbg_lock);
-	printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, "
-	       "idx_lebs  %d\n", current->pid, lst->empty_lebs, lst->idx_lebs);
-	printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, "
-	       "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
-	       lst->total_dirty);
-	printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, "
-	       "total_dead %lld\n", lst->total_used, lst->total_dark,
-	       lst->total_dead);
+	printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, idx_lebs  %d\n",
+	       current->pid, lst->empty_lebs, lst->idx_lebs);
+	printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n",
+	       lst->taken_empty_lebs, lst->total_free, lst->total_dirty);
+	printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, total_dead %lld\n",
+	       lst->total_used, lst->total_dark, lst->total_dead);
 	spin_unlock(&dbg_lock);
 }
 
@@ -639,16 +636,13 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
 
 	spin_lock(&c->space_lock);
 	spin_lock(&dbg_lock);
-	printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, "
-	       "total budget sum %lld\n", current->pid,
-	       bi->data_growth + bi->dd_growth,
+	printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n",
+	       current->pid, bi->data_growth + bi->dd_growth,
 	       bi->data_growth + bi->dd_growth + bi->idx_growth);
-	printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, "
-	       "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
-	       bi->idx_growth);
-	printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, "
-	       "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
-	       bi->uncommitted_idx);
+	printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n",
+	       bi->data_growth, bi->dd_growth, bi->idx_growth);
+	printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n",
+	       bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx);
 	printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
 	       bi->page_budget, bi->inode_budget, bi->dent_budget);
 	printk(KERN_ERR "\tnospace %u, nospace_rp %u\n",
@@ -666,8 +660,8 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
 
 	printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
 	       c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
-	printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
-	       "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
+	printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n",
+	       atomic_long_read(&c->dirty_pg_cnt),
 	       atomic_long_read(&c->dirty_zn_cnt),
 	       atomic_long_read(&c->clean_zn_cnt));
 	printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n",
@@ -715,15 +709,13 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
 		dark = ubifs_calc_dark(c, spc);
 
 	if (lp->flags & LPROPS_INDEX)
-		printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
-		       "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
-		       lp->dirty, c->leb_size - spc, spc, lp->flags);
+		printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (",
+		       lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
+		       lp->flags);
 	else
-		printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d "
-		       "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
-		       "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
-		       c->leb_size - spc, spc, dark, dead,
-		       (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
+		printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (",
+		       lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
+		       dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
 
 	if (lp->flags & LPROPS_TAKEN) {
 		if (lp->flags & LPROPS_INDEX)
@@ -851,9 +843,9 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
 		printk(KERN_ERR "\tLPT lsave is at %d:%d\n",
 		       c->lsave_lnum, c->lsave_offs);
 	for (i = 0; i < c->lpt_lebs; i++)
-		printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d "
-		       "cmt %d\n", i + c->lpt_first, c->ltab[i].free,
-		       c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt);
+		printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n",
+		       i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty,
+		       c->ltab[i].tgc, c->ltab[i].cmt);
 	spin_unlock(&dbg_lock);
 }
 
@@ -867,8 +859,8 @@ void ubifs_dump_sleb(const struct ubifs_info *c,
 
 	list_for_each_entry(snod, &sleb->nodes, list) {
 		cond_resched();
-		printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum,
-		       snod->offs, snod->len);
+		printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n",
+		       sleb->lnum, snod->offs, snod->len);
 		ubifs_dump_node(c, snod->node);
 	}
 }
@@ -926,10 +918,9 @@ void ubifs_dump_znode(const struct ubifs_info *c,
 	else
 		zbr = &c->zroot;
 
-	printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
-	       " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
-	       zbr->len, znode->parent, znode->iip, znode->level,
-	       znode->child_cnt, znode->flags);
+	printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n",
+	       znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip,
+	       znode->level, znode->child_cnt, znode->flags);
 
 	if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
 		spin_unlock(&dbg_lock);
@@ -940,19 +931,15 @@ void ubifs_dump_znode(const struct ubifs_info *c,
 	for (n = 0; n < znode->child_cnt; n++) {
 		zbr = &znode->zbranch[n];
 		if (znode->level > 0)
-			printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key "
-					  "%s\n", n, zbr->znode, zbr->lnum,
-					  zbr->offs, zbr->len,
-					  dbg_snprintf_key(c, &zbr->key,
-							   key_buf,
-							   DBG_KEY_BUF_LEN));
+			printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key %s\n",
+			       n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
+			       dbg_snprintf_key(c, &zbr->key, key_buf,
+						DBG_KEY_BUF_LEN));
 		else
-			printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key "
-					  "%s\n", n, zbr->znode, zbr->lnum,
-					  zbr->offs, zbr->len,
-					  dbg_snprintf_key(c, &zbr->key,
-							   key_buf,
-							   DBG_KEY_BUF_LEN));
+			printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key %s\n",
+			       n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
+			       dbg_snprintf_key(c, &zbr->key, key_buf,
+						DBG_KEY_BUF_LEN));
 	}
 	spin_unlock(&dbg_lock);
 }
@@ -966,9 +953,9 @@ void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
 	for (i = 0; i < heap->cnt; i++) {
 		struct ubifs_lprops *lprops = heap->arr[i];
 
-		printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d "
-		       "flags %d\n", i, lprops->lnum, lprops->hpos,
-		       lprops->free, lprops->dirty, lprops->flags);
+		printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d flags %d\n",
+		       i, lprops->lnum, lprops->hpos, lprops->free,
+		       lprops->dirty, lprops->flags);
 	}
 	printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid);
 }
@@ -1148,8 +1135,8 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
 	mutex_lock(&ui->ui_mutex);
 	spin_lock(&ui->ui_lock);
 	if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
-		ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode "
-			  "is clean", ui->ui_size, ui->synced_i_size);
+		ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean",
+			  ui->ui_size, ui->synced_i_size);
 		ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
 			  inode->i_mode, i_size_read(inode));
 		dump_stack();
@@ -1211,17 +1198,16 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
 	kfree(pdent);
 
 	if (i_size_read(dir) != size) {
-		ubifs_err("directory inode %lu has size %llu, "
-			  "but calculated size is %llu", dir->i_ino,
-			  (unsigned long long)i_size_read(dir),
+		ubifs_err("directory inode %lu has size %llu, but calculated size is %llu",
+			  dir->i_ino, (unsigned long long)i_size_read(dir),
 			  (unsigned long long)size);
 		ubifs_dump_inode(c, dir);
 		dump_stack();
 		return -EINVAL;
 	}
 	if (dir->i_nlink != nlink) {
-		ubifs_err("directory inode %lu has nlink %u, but calculated "
-			  "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
+		ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u",
+			  dir->i_ino, dir->i_nlink, nlink);
 		ubifs_dump_inode(c, dir);
 		dump_stack();
 		return -EINVAL;
@@ -1680,8 +1666,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
 		if (znode_cb) {
 			err = znode_cb(c, znode, priv);
 			if (err) {
-				ubifs_err("znode checking function returned "
-					  "error %d", err);
+				ubifs_err("znode checking function returned error %d",
+					  err);
 				ubifs_dump_znode(c, znode);
 				goto out_dump;
 			}
@@ -1691,9 +1677,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
 				zbr = &znode->zbranch[idx];
 				err = leaf_cb(c, zbr, priv);
 				if (err) {
-					ubifs_err("leaf checking function "
-						  "returned error %d, for leaf "
-						  "at LEB %d:%d",
+					ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d",
 						  err, zbr->lnum, zbr->offs);
 					goto out_dump;
 				}
@@ -1801,8 +1785,8 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
 	}
 
 	if (calc != idx_size) {
-		ubifs_err("index size check failed: calculated size is %lld, "
-			  "should be %lld", calc, idx_size);
+		ubifs_err("index size check failed: calculated size is %lld, should be %lld",
+			  calc, idx_size);
 		dump_stack();
 		return -EINVAL;
 	}
@@ -2114,8 +2098,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 		fscki = read_add_inode(c, priv, inum);
 		if (IS_ERR(fscki)) {
 			err = PTR_ERR(fscki);
-			ubifs_err("error %d while processing data node and "
-				  "trying to find inode node %lu",
+			ubifs_err("error %d while processing data node and trying to find inode node %lu",
 				  err, (unsigned long)inum);
 			goto out_dump;
 		}
@@ -2125,9 +2108,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 		blk_offs <<= UBIFS_BLOCK_SHIFT;
 		blk_offs += le32_to_cpu(dn->size);
 		if (blk_offs > fscki->size) {
-			ubifs_err("data node at LEB %d:%d is not within inode "
-				  "size %lld", zbr->lnum, zbr->offs,
-				  fscki->size);
+			ubifs_err("data node at LEB %d:%d is not within inode size %lld",
+				  zbr->lnum, zbr->offs, fscki->size);
 			err = -EINVAL;
 			goto out_dump;
 		}
@@ -2148,8 +2130,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 		fscki = read_add_inode(c, priv, inum);
 		if (IS_ERR(fscki)) {
 			err = PTR_ERR(fscki);
-			ubifs_err("error %d while processing entry node and "
-				  "trying to find inode node %lu",
+			ubifs_err("error %d while processing entry node and trying to find inode node %lu",
 				  err, (unsigned long)inum);
 			goto out_dump;
 		}
@@ -2161,8 +2142,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 		fscki1 = read_add_inode(c, priv, inum);
 		if (IS_ERR(fscki1)) {
 			err = PTR_ERR(fscki1);
-			ubifs_err("error %d while processing entry node and "
-				  "trying to find parent inode node %lu",
+			ubifs_err("error %d while processing entry node and trying to find parent inode node %lu",
 				  err, (unsigned long)inum);
 			goto out_dump;
 		}
@@ -2252,61 +2232,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
 			 */
 			if (fscki->inum != UBIFS_ROOT_INO &&
 			    fscki->references != 1) {
-				ubifs_err("directory inode %lu has %d "
-					  "direntries which refer it, but "
-					  "should be 1",
+				ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1",
 					  (unsigned long)fscki->inum,
 					  fscki->references);
 				goto out_dump;
 			}
 			if (fscki->inum == UBIFS_ROOT_INO &&
 			    fscki->references != 0) {
-				ubifs_err("root inode %lu has non-zero (%d) "
-					  "direntries which refer it",
+				ubifs_err("root inode %lu has non-zero (%d) direntries which refer it",
 					  (unsigned long)fscki->inum,
 					  fscki->references);
 				goto out_dump;
 			}
 			if (fscki->calc_sz != fscki->size) {
-				ubifs_err("directory inode %lu size is %lld, "
-					  "but calculated size is %lld",
+				ubifs_err("directory inode %lu size is %lld, but calculated size is %lld",
 					  (unsigned long)fscki->inum,
 					  fscki->size, fscki->calc_sz);
 				goto out_dump;
 			}
 			if (fscki->calc_cnt != fscki->nlink) {
-				ubifs_err("directory inode %lu nlink is %d, "
-					  "but calculated nlink is %d",
+				ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d",
 					  (unsigned long)fscki->inum,
 					  fscki->nlink, fscki->calc_cnt);
 				goto out_dump;
 			}
 		} else {
 			if (fscki->references != fscki->nlink) {
-				ubifs_err("inode %lu nlink is %d, but "
-					  "calculated nlink is %d",
+				ubifs_err("inode %lu nlink is %d, but calculated nlink is %d",
 					  (unsigned long)fscki->inum,
 					  fscki->nlink, fscki->references);
 				goto out_dump;
 			}
 		}
 		if (fscki->xattr_sz != fscki->calc_xsz) {
-			ubifs_err("inode %lu has xattr size %u, but "
-				  "calculated size is %lld",
+			ubifs_err("inode %lu has xattr size %u, but calculated size is %lld",
 				  (unsigned long)fscki->inum, fscki->xattr_sz,
 				  fscki->calc_xsz);
 			goto out_dump;
 		}
 		if (fscki->xattr_cnt != fscki->calc_xcnt) {
-			ubifs_err("inode %lu has %u xattrs, but "
-				  "calculated count is %lld",
+			ubifs_err("inode %lu has %u xattrs, but calculated count is %lld",
 				  (unsigned long)fscki->inum,
 				  fscki->xattr_cnt, fscki->calc_xcnt);
 			goto out_dump;
 		}
 		if (fscki->xattr_nms != fscki->calc_xnms) {
-			ubifs_err("inode %lu has xattr names' size %u, but "
-				  "calculated names' size is %lld",
+			ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld",
 				  (unsigned long)fscki->inum, fscki->xattr_nms,
 				  fscki->calc_xnms);
 			goto out_dump;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index c95681cf1b71..e271fba1651b 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -980,8 +980,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * separately.
 	 */
 
-	dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in "
-		"dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
+	dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu",
+		old_dentry->d_name.len, old_dentry->d_name.name,
 		old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
 		new_dentry->d_name.name, new_dir->i_ino);
 	ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 7bd6e72afd11..ff48c5a85309 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1486,8 +1486,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
 	err = ubifs_budget_space(c, &req);
 	if (unlikely(err)) {
 		if (err == -ENOSPC)
-			ubifs_warn("out of space for mmapped file "
-				   "(inode number %lu)", inode->i_ino);
+			ubifs_warn("out of space for mmapped file (inode number %lu)",
+				   inode->i_ino);
 		return VM_FAULT_SIGBUS;
 	}
 
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 04dd6f47635e..76ca53cd3eee 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -714,9 +714,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
 			break;
 		}
 
-		dbg_gc("found LEB %d: free %d, dirty %d, sum %d "
-		       "(min. space %d)", lp.lnum, lp.free, lp.dirty,
-		       lp.free + lp.dirty, min_space);
+		dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)",
+		       lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty,
+		       min_space);
 
 		space_before = c->leb_size - wbuf->offs - wbuf->used;
 		if (wbuf->lnum == -1)
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index c80b15d6c8de..36bd4efd0819 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -315,17 +315,15 @@ static void remove_buds(struct ubifs_info *c)
 			 * heads (non-closed buds).
 			 */
 			c->cmt_bud_bytes += wbuf->offs - bud->start;
-			dbg_log("preserve %d:%d, jhead %s, bud bytes %d, "
-				"cmt_bud_bytes %lld", bud->lnum, bud->start,
-				dbg_jhead(bud->jhead), wbuf->offs - bud->start,
-				c->cmt_bud_bytes);
+			dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
+				bud->lnum, bud->start, dbg_jhead(bud->jhead),
+				wbuf->offs - bud->start, c->cmt_bud_bytes);
 			bud->start = wbuf->offs;
 		} else {
 			c->cmt_bud_bytes += c->leb_size - bud->start;
-			dbg_log("remove %d:%d, jhead %s, bud bytes %d, "
-				"cmt_bud_bytes %lld", bud->lnum, bud->start,
-				dbg_jhead(bud->jhead), c->leb_size - bud->start,
-				c->cmt_bud_bytes);
+			dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
+				bud->lnum, bud->start, dbg_jhead(bud->jhead),
+				c->leb_size - bud->start, c->cmt_bud_bytes);
 			rb_erase(p1, &c->buds);
 			/*
 			 * If the commit does not finish, the recovery will need
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 86eb8e533249..132b8e3e7b0b 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -867,15 +867,15 @@ int dbg_check_cats(struct ubifs_info *c)
 
 	list_for_each_entry(lprops, &c->empty_list, list) {
 		if (lprops->free != c->leb_size) {
-			ubifs_err("non-empty LEB %d on empty list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 		if (lprops->flags & LPROPS_TAKEN) {
-			ubifs_err("taken LEB %d on empty list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 	}
@@ -883,15 +883,15 @@ int dbg_check_cats(struct ubifs_info *c)
 	i = 0;
 	list_for_each_entry(lprops, &c->freeable_list, list) {
 		if (lprops->free + lprops->dirty != c->leb_size) {
-			ubifs_err("non-freeable LEB %d on freeable list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 		if (lprops->flags & LPROPS_TAKEN) {
-			ubifs_err("taken LEB %d on freeable list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 		i += 1;
@@ -913,21 +913,21 @@ int dbg_check_cats(struct ubifs_info *c)
 
 	list_for_each_entry(lprops, &c->frdi_idx_list, list) {
 		if (lprops->free + lprops->dirty != c->leb_size) {
-			ubifs_err("non-freeable LEB %d on frdi_idx list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 		if (lprops->flags & LPROPS_TAKEN) {
-			ubifs_err("taken LEB %d on frdi_idx list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 		if (!(lprops->flags & LPROPS_INDEX)) {
-			ubifs_err("non-index LEB %d on frdi_idx list "
-				  "(free %d dirty %d flags %d)", lprops->lnum,
-				  lprops->free, lprops->dirty, lprops->flags);
+			ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+				  lprops->lnum, lprops->free, lprops->dirty,
+				  lprops->flags);
 			return -EINVAL;
 		}
 	}
@@ -1153,8 +1153,8 @@ static int scan_check_cb(struct ubifs_info *c,
 
 	if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
 	    dirty < 0) {
-		ubifs_err("bad calculated accounting for LEB %d: "
-			  "free %d, dirty %d", lnum, free, dirty);
+		ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d",
+			  lnum, free, dirty);
 		goto out_destroy;
 	}
 
@@ -1200,8 +1200,7 @@ static int scan_check_cb(struct ubifs_info *c,
 			/* Free but not unmapped LEB, it's fine */
 			is_idx = 0;
 		else {
-			ubifs_err("indexing node without indexing "
-				  "flag");
+			ubifs_err("indexing node without indexing flag");
 			goto out_print;
 		}
 	}
@@ -1236,8 +1235,7 @@ static int scan_check_cb(struct ubifs_info *c,
 	return LPT_SCAN_CONTINUE;
 
 out_print:
-	ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
-		  "should be free %d, dirty %d",
+	ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
 		  lnum, lp->free, lp->dirty, lp->flags, free, dirty);
 	ubifs_dump_leb(c, lnum);
 out_destroy:
@@ -1290,12 +1288,10 @@ int dbg_check_lprops(struct ubifs_info *c)
 	    lst.total_dirty != c->lst.total_dirty ||
 	    lst.total_used != c->lst.total_used) {
 		ubifs_err("bad overall accounting");
-		ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
-			  "total_free %lld, total_dirty %lld, total_used %lld",
+		ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
 			  lst.empty_lebs, lst.idx_lebs, lst.total_free,
 			  lst.total_dirty, lst.total_used);
-		ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
-			  "total_free %lld, total_dirty %lld, total_used %lld",
+		ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
 			  c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
 			  c->lst.total_dirty, c->lst.total_used);
 		err = -EINVAL;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 8640920766ed..c3e6bbf13ba5 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -2237,8 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
 			/* cnode is a nnode */
 			num = calc_nnode_num(row, col);
 			if (cnode->num != num) {
-				ubifs_err("nnode num %d expected %d "
-					  "parent num %d iip %d",
+				ubifs_err("nnode num %d expected %d parent num %d iip %d",
 					  cnode->num, num,
 					  (nnode ? nnode->num : 0), cnode->iip);
 				return -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 4fa70734e6e7..74082553e1fc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -320,8 +320,8 @@ static int layout_cnodes(struct ubifs_info *c)
 	return 0;
 
 no_space:
-	ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, "
-		  "done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+	ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
+		  lnum, offs, len, done_ltab, done_lsave);
 	ubifs_dump_lpt_info(c);
 	ubifs_dump_lpt_lebs(c);
 	dump_stack();
@@ -545,8 +545,8 @@ static int write_cnodes(struct ubifs_info *c)
 	return 0;
 
 no_space:
-	ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab "
-		  "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave);
+	ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
+		  lnum, offs, len, done_ltab, done_lsave);
 	ubifs_dump_lpt_info(c);
 	ubifs_dump_lpt_lebs(c);
 	dump_stack();
@@ -1668,14 +1668,12 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 			}
 			i = lnum - c->lpt_first;
 			if (len != c->ltab[i].free) {
-				dbg_msg("invalid free space in LEB %d "
-					"(free %d, expected %d)",
+				dbg_msg("invalid free space in LEB %d (free %d, expected %d)",
 					lnum, len, c->ltab[i].free);
 				err = -EINVAL;
 			}
 			if (dirty != c->ltab[i].dirty) {
-				dbg_msg("invalid dirty space in LEB %d "
-					"(dirty %d, expected %d)",
+				dbg_msg("invalid dirty space in LEB %d (dirty %d, expected %d)",
 					lnum, dirty, c->ltab[i].dirty);
 				err = -EINVAL;
 			}
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index cebf17ea0458..769701ccb5c9 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -562,8 +562,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 
 	list_for_each_entry(snod, &sleb->nodes, list) {
 		if (snod->type != UBIFS_ORPH_NODE) {
-			ubifs_err("invalid node type %d in orphan area at "
-				  "%d:%d", snod->type, sleb->lnum, snod->offs);
+			ubifs_err("invalid node type %d in orphan area at %d:%d",
+				  snod->type, sleb->lnum, snod->offs);
 			ubifs_dump_node(c, snod->node);
 			return -EINVAL;
 		}
@@ -589,8 +589,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 			 * number. That makes this orphan node, out of date.
 			 */
 			if (!first) {
-				ubifs_err("out of order commit number %llu in "
-					  "orphan node at %d:%d",
+				ubifs_err("out of order commit number %llu in orphan node at %d:%d",
 					  cmt_no, sleb->lnum, snod->offs);
 				ubifs_dump_node(c, snod->node);
 				return -EINVAL;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index edeec499c048..065096e36ed9 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -609,7 +609,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
 		snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
 				  list);
 
-		dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
+		dbg_rcvry("dropping last node at %d:%d",
+			  sleb->lnum, snod->offs);
 		*offs = snod->offs;
 		list_del(&snod->list);
 		kfree(snod);
@@ -702,8 +703,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
 			 * See header comment for this file for more
 			 * explanations about the reasons we have this check.
 			 */
-			ubifs_err("corrupt empty space LEB %d:%d, corruption "
-				  "starts at %d", lnum, offs, corruption);
+			ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d",
+				  lnum, offs, corruption);
 			/* Make sure we dump interesting non-0xFF data */
 			offs += corruption;
 			buf += corruption;
@@ -899,8 +900,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
 				}
 			}
 			if (snod->sqnum > cs_sqnum) {
-				ubifs_err("unrecoverable log corruption "
-					  "in LEB %d", lnum);
+				ubifs_err("unrecoverable log corruption in LEB %d",
+					  lnum);
 				ubifs_scan_destroy(sleb);
 				return ERR_PTR(-EUCLEAN);
 			}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 94d78fc5d4e0..ede6e5835ced 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -141,9 +141,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
 		 * during the replay.
 		 */
 		if (dirty != 0)
-			dbg_msg("LEB %d lp: %d free %d dirty "
-				"replay: %d free %d dirty", b->bud->lnum,
-				lp->free, lp->dirty, b->free, b->dirty);
+			dbg_msg("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
+				b->bud->lnum, lp->free, lp->dirty, b->free,
+				b->dirty);
 	}
 	lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
 			     lp->flags | LPROPS_TAKEN, 0);
@@ -677,7 +677,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 
 	b->dirty = sleb->endpt - offs - used;
 	b->free = c->leb_size - sleb->endpt;
-	dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
+	dbg_mnt("bud LEB %d replied: dirty %d, free %d",
+		lnum, b->dirty, b->free);
 
 out:
 	ubifs_scan_destroy(sleb);
@@ -865,8 +866,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 			goto out_dump;
 		}
 		if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
-			ubifs_err("first CS node at LEB %d:%d has wrong "
-				  "commit number %llu expected %llu",
+			ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
 				  lnum, offs,
 				  (unsigned long long)le64_to_cpu(node->cmt_no),
 				  c->cmt_no);
@@ -1058,8 +1058,8 @@ int ubifs_replay_journal(struct ubifs_info *c)
 	c->bi.uncommitted_idx *= c->max_idx_node_sz;
 
 	ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
-	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
-		"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
+	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
+		c->lhead_lnum, c->lhead_offs, c->max_sqnum,
 		(unsigned long)c->highest_inum);
 out:
 	destroy_replay_list(c);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 15e2fc5aa60b..5b7bfa29ec37 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -391,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
 	min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
 
 	if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
-		ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, "
-			  "%d minimum required", c->leb_cnt, c->vi.size,
-			  min_leb_cnt);
+		ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
+			  c->leb_cnt, c->vi.size, min_leb_cnt);
 		goto failed;
 	}
 
@@ -411,15 +410,14 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
 
 	max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
 	if (c->max_bud_bytes < max_bytes) {
-		ubifs_err("too small journal (%lld bytes), must be at least "
-			  "%lld bytes",  c->max_bud_bytes, max_bytes);
+		ubifs_err("too small journal (%lld bytes), must be at least %lld bytes",
+			  c->max_bud_bytes, max_bytes);
 		goto failed;
 	}
 
 	max_bytes = (long long)c->leb_size * c->main_lebs;
 	if (c->max_bud_bytes > max_bytes) {
-		ubifs_err("too large journal size (%lld bytes), only %lld bytes"
-			  "available in the main area",
+		ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area",
 			  c->max_bud_bytes, max_bytes);
 		goto failed;
 	}
@@ -549,10 +547,9 @@ int ubifs_read_superblock(struct ubifs_info *c)
 		ubifs_assert(!c->ro_media || c->ro_mount);
 		if (!c->ro_mount ||
 		    c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
-			ubifs_err("on-flash format version is w%d/r%d, but "
-				  "software only supports up to version "
-				  "w%d/r%d", c->fmt_version,
-				  c->ro_compat_version, UBIFS_FORMAT_VERSION,
+			ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
+				  c->fmt_version, c->ro_compat_version,
+				  UBIFS_FORMAT_VERSION,
 				  UBIFS_RO_COMPAT_VERSION);
 			if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
 				ubifs_msg("only R/O mounting is possible");
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 019ca47a1f8e..58aa05df2bb6 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -85,7 +85,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 	if (len < UBIFS_CH_SZ)
 		return SCANNED_GARBAGE;
 
-	dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs);
+	dbg_scan("scanning %s at LEB %d:%d",
+		 dbg_ntype(ch->node_type), lnum, offs);
 
 	if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
 		return SCANNED_A_CORRUPT_NODE;
@@ -150,8 +151,8 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
 
 	err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
 	if (err && err != -EBADMSG) {
-		ubifs_err("cannot read %d bytes from LEB %d:%d,"
-			  " error %d", c->leb_size - offs, lnum, offs, err);
+		ubifs_err("cannot read %d bytes from LEB %d:%d, error %d",
+			  c->leb_size - offs, lnum, offs, err);
 		kfree(sleb);
 		return ERR_PTR(err);
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index d31928975cc8..448bf24ef8d0 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -89,9 +89,8 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
 		return 5;
 
 	if (!ubifs_compr_present(ui->compr_type)) {
-		ubifs_warn("inode %lu uses '%s' compression, but it was not "
-			   "compiled in", inode->i_ino,
-			   ubifs_compr_name(ui->compr_type));
+		ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in",
+			   inode->i_ino, ubifs_compr_name(ui->compr_type));
 	}
 
 	err = dbg_check_dir(c, inode);
@@ -1061,8 +1060,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 
 			flag = parse_standard_option(p);
 			if (!flag) {
-				ubifs_err("unrecognized mount option \"%s\" "
-					  "or missing value", p);
+				ubifs_err("unrecognized mount option \"%s\" or missing value",
+					  p);
 				return -EINVAL;
 			}
 			sb->s_flags |= flag;
@@ -1124,8 +1123,8 @@ again:
 		}
 
 		/* Just disable bulk-read */
-		ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, "
-			   "disabling it", c->max_bu_buf_len);
+		ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it",
+			   c->max_bu_buf_len);
 		c->mount_opts.bulk_read = 1;
 		c->bulk_read = 0;
 		return;
@@ -1416,11 +1415,11 @@ static int mount_ubifs(struct ubifs_info *c)
 	if (c->ro_mount)
 		ubifs_msg("mounted read-only");
 	x = (long long)c->main_lebs * c->leb_size;
-	ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d "
-		  "LEBs)", x, x >> 10, x >> 20, c->main_lebs);
+	ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
+		  x, x >> 10, x >> 20, c->main_lebs);
 	x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
-	ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d "
-		  "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
+	ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
+		  x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
 	ubifs_msg("media format:       w%d/r%d (latest is w%d/r%d)",
 		  c->fmt_version, c->ro_compat_version,
 		  UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
@@ -1563,10 +1562,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 
 	if (c->rw_incompat) {
 		ubifs_err("the file-system is not R/W-compatible");
-		ubifs_msg("on-flash format version is w%d/r%d, but software "
-			  "only supports up to version w%d/r%d", c->fmt_version,
-			  c->ro_compat_version, UBIFS_FORMAT_VERSION,
-			  UBIFS_RO_COMPAT_VERSION);
+		ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
+			  c->fmt_version, c->ro_compat_version,
+			  UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
 		return -EROFS;
 	}
 
@@ -1827,8 +1825,8 @@ static void ubifs_put_super(struct super_block *sb)
 				 * next mount, so we just print a message and
 				 * continue to unmount normally.
 				 */
-				ubifs_err("failed to write master node, "
-					  "error %d", err);
+				ubifs_err("failed to write master node, error %d",
+					  err);
 		} else {
 			for (i = 0; i < c->jhead_cnt; i++)
 				/* Make sure write-buffer timers are canceled */
@@ -2247,8 +2245,7 @@ static int __init ubifs_init(void)
 	 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
 	 */
 	if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
-		ubifs_err("VFS page cache size is %u bytes, but UBIFS requires"
-			  " at least 4096 bytes",
+		ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
 			  (unsigned int)PAGE_CACHE_SIZE);
 		return -EINVAL;
 	}
-- 
cgit 


From 6b38d03f48da3006085e2d3e45168ed60475f7bb Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 27 Aug 2012 13:56:19 +0300
Subject: UBIFS: use pr_ helper instead of printk

Use 'pr_err()' instead of 'printk(KERN_ERR', etc.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/debug.c      | 482 ++++++++++++++++++++++----------------------------
 fs/ubifs/debug.h      |   6 +-
 fs/ubifs/lpt_commit.c |  36 ++--
 fs/ubifs/ubifs.h      |  13 +-
 4 files changed, 235 insertions(+), 302 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 9f28375fc5b3..2714e02093a4 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -219,15 +219,15 @@ const char *dbg_jhead(int jhead)
 
 static void dump_ch(const struct ubifs_ch *ch)
 {
-	printk(KERN_ERR "\tmagic          %#x\n", le32_to_cpu(ch->magic));
-	printk(KERN_ERR "\tcrc            %#x\n", le32_to_cpu(ch->crc));
-	printk(KERN_ERR "\tnode_type      %d (%s)\n", ch->node_type,
+	pr_err("\tmagic          %#x\n", le32_to_cpu(ch->magic));
+	pr_err("\tcrc            %#x\n", le32_to_cpu(ch->crc));
+	pr_err("\tnode_type      %d (%s)\n", ch->node_type,
 	       dbg_ntype(ch->node_type));
-	printk(KERN_ERR "\tgroup_type     %d (%s)\n", ch->group_type,
+	pr_err("\tgroup_type     %d (%s)\n", ch->group_type,
 	       dbg_gtype(ch->group_type));
-	printk(KERN_ERR "\tsqnum          %llu\n",
+	pr_err("\tsqnum          %llu\n",
 	       (unsigned long long)le64_to_cpu(ch->sqnum));
-	printk(KERN_ERR "\tlen            %u\n", le32_to_cpu(ch->len));
+	pr_err("\tlen            %u\n", le32_to_cpu(ch->len));
 }
 
 void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
@@ -238,43 +238,43 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
 	struct ubifs_dent_node *dent, *pdent = NULL;
 	int count = 2;
 
-	printk(KERN_ERR "Dump in-memory inode:");
-	printk(KERN_ERR "\tinode          %lu\n", inode->i_ino);
-	printk(KERN_ERR "\tsize           %llu\n",
+	pr_err("Dump in-memory inode:");
+	pr_err("\tinode          %lu\n", inode->i_ino);
+	pr_err("\tsize           %llu\n",
 	       (unsigned long long)i_size_read(inode));
-	printk(KERN_ERR "\tnlink          %u\n", inode->i_nlink);
-	printk(KERN_ERR "\tuid            %u\n", (unsigned int)inode->i_uid);
-	printk(KERN_ERR "\tgid            %u\n", (unsigned int)inode->i_gid);
-	printk(KERN_ERR "\tatime          %u.%u\n",
+	pr_err("\tnlink          %u\n", inode->i_nlink);
+	pr_err("\tuid            %u\n", (unsigned int)inode->i_uid);
+	pr_err("\tgid            %u\n", (unsigned int)inode->i_gid);
+	pr_err("\tatime          %u.%u\n",
 	       (unsigned int)inode->i_atime.tv_sec,
 	       (unsigned int)inode->i_atime.tv_nsec);
-	printk(KERN_ERR "\tmtime          %u.%u\n",
+	pr_err("\tmtime          %u.%u\n",
 	       (unsigned int)inode->i_mtime.tv_sec,
 	       (unsigned int)inode->i_mtime.tv_nsec);
-	printk(KERN_ERR "\tctime          %u.%u\n",
+	pr_err("\tctime          %u.%u\n",
 	       (unsigned int)inode->i_ctime.tv_sec,
 	       (unsigned int)inode->i_ctime.tv_nsec);
-	printk(KERN_ERR "\tcreat_sqnum    %llu\n", ui->creat_sqnum);
-	printk(KERN_ERR "\txattr_size     %u\n", ui->xattr_size);
-	printk(KERN_ERR "\txattr_cnt      %u\n", ui->xattr_cnt);
-	printk(KERN_ERR "\txattr_names    %u\n", ui->xattr_names);
-	printk(KERN_ERR "\tdirty          %u\n", ui->dirty);
-	printk(KERN_ERR "\txattr          %u\n", ui->xattr);
-	printk(KERN_ERR "\tbulk_read      %u\n", ui->xattr);
-	printk(KERN_ERR "\tsynced_i_size  %llu\n",
+	pr_err("\tcreat_sqnum    %llu\n", ui->creat_sqnum);
+	pr_err("\txattr_size     %u\n", ui->xattr_size);
+	pr_err("\txattr_cnt      %u\n", ui->xattr_cnt);
+	pr_err("\txattr_names    %u\n", ui->xattr_names);
+	pr_err("\tdirty          %u\n", ui->dirty);
+	pr_err("\txattr          %u\n", ui->xattr);
+	pr_err("\tbulk_read      %u\n", ui->xattr);
+	pr_err("\tsynced_i_size  %llu\n",
 	       (unsigned long long)ui->synced_i_size);
-	printk(KERN_ERR "\tui_size        %llu\n",
+	pr_err("\tui_size        %llu\n",
 	       (unsigned long long)ui->ui_size);
-	printk(KERN_ERR "\tflags          %d\n", ui->flags);
-	printk(KERN_ERR "\tcompr_type     %d\n", ui->compr_type);
-	printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read);
-	printk(KERN_ERR "\tread_in_a_row  %lu\n", ui->read_in_a_row);
-	printk(KERN_ERR "\tdata_len       %d\n", ui->data_len);
+	pr_err("\tflags          %d\n", ui->flags);
+	pr_err("\tcompr_type     %d\n", ui->compr_type);
+	pr_err("\tlast_page_read %lu\n", ui->last_page_read);
+	pr_err("\tread_in_a_row  %lu\n", ui->read_in_a_row);
+	pr_err("\tdata_len       %d\n", ui->data_len);
 
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
-	printk(KERN_ERR "List of directory entries:\n");
+	pr_err("List of directory entries:\n");
 	ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
 
 	lowest_dent_key(c, &key, inode->i_ino);
@@ -282,11 +282,11 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
 		dent = ubifs_tnc_next_ent(c, &key, &nm);
 		if (IS_ERR(dent)) {
 			if (PTR_ERR(dent) != -ENOENT)
-				printk(KERN_ERR "error %ld\n", PTR_ERR(dent));
+				pr_err("error %ld\n", PTR_ERR(dent));
 			break;
 		}
 
-		printk(KERN_ERR "\t%d: %s (%s)\n",
+		pr_err("\t%d: %s (%s)\n",
 		       count++, dent->name, get_dent_type(dent->type));
 
 		nm.name = dent->name;
@@ -307,7 +307,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 
 	/* If the magic is incorrect, just hexdump the first bytes */
 	if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
-		printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ);
+		pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ);
 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
 			       (void *)node, UBIFS_CH_SZ, 1);
 		return;
@@ -321,8 +321,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	{
 		const struct ubifs_pad_node *pad = node;
 
-		printk(KERN_ERR "\tpad_len        %u\n",
-		       le32_to_cpu(pad->pad_len));
+		pr_err("\tpad_len        %u\n", le32_to_cpu(pad->pad_len));
 		break;
 	}
 	case UBIFS_SB_NODE:
@@ -330,112 +329,77 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		const struct ubifs_sb_node *sup = node;
 		unsigned int sup_flags = le32_to_cpu(sup->flags);
 
-		printk(KERN_ERR "\tkey_hash       %d (%s)\n",
+		pr_err("\tkey_hash       %d (%s)\n",
 		       (int)sup->key_hash, get_key_hash(sup->key_hash));
-		printk(KERN_ERR "\tkey_fmt        %d (%s)\n",
+		pr_err("\tkey_fmt        %d (%s)\n",
 		       (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
-		printk(KERN_ERR "\tflags          %#x\n", sup_flags);
-		printk(KERN_ERR "\t  big_lpt      %u\n",
+		pr_err("\tflags          %#x\n", sup_flags);
+		pr_err("\t  big_lpt      %u\n",
 		       !!(sup_flags & UBIFS_FLG_BIGLPT));
-		printk(KERN_ERR "\t  space_fixup  %u\n",
+		pr_err("\t  space_fixup  %u\n",
 		       !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
-		printk(KERN_ERR "\tmin_io_size    %u\n",
-		       le32_to_cpu(sup->min_io_size));
-		printk(KERN_ERR "\tleb_size       %u\n",
-		       le32_to_cpu(sup->leb_size));
-		printk(KERN_ERR "\tleb_cnt        %u\n",
-		       le32_to_cpu(sup->leb_cnt));
-		printk(KERN_ERR "\tmax_leb_cnt    %u\n",
-		       le32_to_cpu(sup->max_leb_cnt));
-		printk(KERN_ERR "\tmax_bud_bytes  %llu\n",
+		pr_err("\tmin_io_size    %u\n", le32_to_cpu(sup->min_io_size));
+		pr_err("\tleb_size       %u\n", le32_to_cpu(sup->leb_size));
+		pr_err("\tleb_cnt        %u\n", le32_to_cpu(sup->leb_cnt));
+		pr_err("\tmax_leb_cnt    %u\n", le32_to_cpu(sup->max_leb_cnt));
+		pr_err("\tmax_bud_bytes  %llu\n",
 		       (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
-		printk(KERN_ERR "\tlog_lebs       %u\n",
-		       le32_to_cpu(sup->log_lebs));
-		printk(KERN_ERR "\tlpt_lebs       %u\n",
-		       le32_to_cpu(sup->lpt_lebs));
-		printk(KERN_ERR "\torph_lebs      %u\n",
-		       le32_to_cpu(sup->orph_lebs));
-		printk(KERN_ERR "\tjhead_cnt      %u\n",
-		       le32_to_cpu(sup->jhead_cnt));
-		printk(KERN_ERR "\tfanout         %u\n",
-		       le32_to_cpu(sup->fanout));
-		printk(KERN_ERR "\tlsave_cnt      %u\n",
-		       le32_to_cpu(sup->lsave_cnt));
-		printk(KERN_ERR "\tdefault_compr  %u\n",
+		pr_err("\tlog_lebs       %u\n", le32_to_cpu(sup->log_lebs));
+		pr_err("\tlpt_lebs       %u\n", le32_to_cpu(sup->lpt_lebs));
+		pr_err("\torph_lebs      %u\n", le32_to_cpu(sup->orph_lebs));
+		pr_err("\tjhead_cnt      %u\n", le32_to_cpu(sup->jhead_cnt));
+		pr_err("\tfanout         %u\n", le32_to_cpu(sup->fanout));
+		pr_err("\tlsave_cnt      %u\n", le32_to_cpu(sup->lsave_cnt));
+		pr_err("\tdefault_compr  %u\n",
 		       (int)le16_to_cpu(sup->default_compr));
-		printk(KERN_ERR "\trp_size        %llu\n",
+		pr_err("\trp_size        %llu\n",
 		       (unsigned long long)le64_to_cpu(sup->rp_size));
-		printk(KERN_ERR "\trp_uid         %u\n",
-		       le32_to_cpu(sup->rp_uid));
-		printk(KERN_ERR "\trp_gid         %u\n",
-		       le32_to_cpu(sup->rp_gid));
-		printk(KERN_ERR "\tfmt_version    %u\n",
-		       le32_to_cpu(sup->fmt_version));
-		printk(KERN_ERR "\ttime_gran      %u\n",
-		       le32_to_cpu(sup->time_gran));
-		printk(KERN_ERR "\tUUID           %pUB\n",
-		       sup->uuid);
+		pr_err("\trp_uid         %u\n", le32_to_cpu(sup->rp_uid));
+		pr_err("\trp_gid         %u\n", le32_to_cpu(sup->rp_gid));
+		pr_err("\tfmt_version    %u\n", le32_to_cpu(sup->fmt_version));
+		pr_err("\ttime_gran      %u\n", le32_to_cpu(sup->time_gran));
+		pr_err("\tUUID           %pUB\n", sup->uuid);
 		break;
 	}
 	case UBIFS_MST_NODE:
 	{
 		const struct ubifs_mst_node *mst = node;
 
-		printk(KERN_ERR "\thighest_inum   %llu\n",
+		pr_err("\thighest_inum   %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->highest_inum));
-		printk(KERN_ERR "\tcommit number  %llu\n",
+		pr_err("\tcommit number  %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->cmt_no));
-		printk(KERN_ERR "\tflags          %#x\n",
-		       le32_to_cpu(mst->flags));
-		printk(KERN_ERR "\tlog_lnum       %u\n",
-		       le32_to_cpu(mst->log_lnum));
-		printk(KERN_ERR "\troot_lnum      %u\n",
-		       le32_to_cpu(mst->root_lnum));
-		printk(KERN_ERR "\troot_offs      %u\n",
-		       le32_to_cpu(mst->root_offs));
-		printk(KERN_ERR "\troot_len       %u\n",
-		       le32_to_cpu(mst->root_len));
-		printk(KERN_ERR "\tgc_lnum        %u\n",
-		       le32_to_cpu(mst->gc_lnum));
-		printk(KERN_ERR "\tihead_lnum     %u\n",
-		       le32_to_cpu(mst->ihead_lnum));
-		printk(KERN_ERR "\tihead_offs     %u\n",
-		       le32_to_cpu(mst->ihead_offs));
-		printk(KERN_ERR "\tindex_size     %llu\n",
+		pr_err("\tflags          %#x\n", le32_to_cpu(mst->flags));
+		pr_err("\tlog_lnum       %u\n", le32_to_cpu(mst->log_lnum));
+		pr_err("\troot_lnum      %u\n", le32_to_cpu(mst->root_lnum));
+		pr_err("\troot_offs      %u\n", le32_to_cpu(mst->root_offs));
+		pr_err("\troot_len       %u\n", le32_to_cpu(mst->root_len));
+		pr_err("\tgc_lnum        %u\n", le32_to_cpu(mst->gc_lnum));
+		pr_err("\tihead_lnum     %u\n", le32_to_cpu(mst->ihead_lnum));
+		pr_err("\tihead_offs     %u\n", le32_to_cpu(mst->ihead_offs));
+		pr_err("\tindex_size     %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->index_size));
-		printk(KERN_ERR "\tlpt_lnum       %u\n",
-		       le32_to_cpu(mst->lpt_lnum));
-		printk(KERN_ERR "\tlpt_offs       %u\n",
-		       le32_to_cpu(mst->lpt_offs));
-		printk(KERN_ERR "\tnhead_lnum     %u\n",
-		       le32_to_cpu(mst->nhead_lnum));
-		printk(KERN_ERR "\tnhead_offs     %u\n",
-		       le32_to_cpu(mst->nhead_offs));
-		printk(KERN_ERR "\tltab_lnum      %u\n",
-		       le32_to_cpu(mst->ltab_lnum));
-		printk(KERN_ERR "\tltab_offs      %u\n",
-		       le32_to_cpu(mst->ltab_offs));
-		printk(KERN_ERR "\tlsave_lnum     %u\n",
-		       le32_to_cpu(mst->lsave_lnum));
-		printk(KERN_ERR "\tlsave_offs     %u\n",
-		       le32_to_cpu(mst->lsave_offs));
-		printk(KERN_ERR "\tlscan_lnum     %u\n",
-		       le32_to_cpu(mst->lscan_lnum));
-		printk(KERN_ERR "\tleb_cnt        %u\n",
-		       le32_to_cpu(mst->leb_cnt));
-		printk(KERN_ERR "\tempty_lebs     %u\n",
-		       le32_to_cpu(mst->empty_lebs));
-		printk(KERN_ERR "\tidx_lebs       %u\n",
-		       le32_to_cpu(mst->idx_lebs));
-		printk(KERN_ERR "\ttotal_free     %llu\n",
+		pr_err("\tlpt_lnum       %u\n", le32_to_cpu(mst->lpt_lnum));
+		pr_err("\tlpt_offs       %u\n", le32_to_cpu(mst->lpt_offs));
+		pr_err("\tnhead_lnum     %u\n", le32_to_cpu(mst->nhead_lnum));
+		pr_err("\tnhead_offs     %u\n", le32_to_cpu(mst->nhead_offs));
+		pr_err("\tltab_lnum      %u\n", le32_to_cpu(mst->ltab_lnum));
+		pr_err("\tltab_offs      %u\n", le32_to_cpu(mst->ltab_offs));
+		pr_err("\tlsave_lnum     %u\n", le32_to_cpu(mst->lsave_lnum));
+		pr_err("\tlsave_offs     %u\n", le32_to_cpu(mst->lsave_offs));
+		pr_err("\tlscan_lnum     %u\n", le32_to_cpu(mst->lscan_lnum));
+		pr_err("\tleb_cnt        %u\n", le32_to_cpu(mst->leb_cnt));
+		pr_err("\tempty_lebs     %u\n", le32_to_cpu(mst->empty_lebs));
+		pr_err("\tidx_lebs       %u\n", le32_to_cpu(mst->idx_lebs));
+		pr_err("\ttotal_free     %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->total_free));
-		printk(KERN_ERR "\ttotal_dirty    %llu\n",
+		pr_err("\ttotal_dirty    %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->total_dirty));
-		printk(KERN_ERR "\ttotal_used     %llu\n",
+		pr_err("\ttotal_used     %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->total_used));
-		printk(KERN_ERR "\ttotal_dead     %llu\n",
+		pr_err("\ttotal_dead     %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->total_dead));
-		printk(KERN_ERR "\ttotal_dark     %llu\n",
+		pr_err("\ttotal_dark     %llu\n",
 		       (unsigned long long)le64_to_cpu(mst->total_dark));
 		break;
 	}
@@ -443,12 +407,9 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	{
 		const struct ubifs_ref_node *ref = node;
 
-		printk(KERN_ERR "\tlnum           %u\n",
-		       le32_to_cpu(ref->lnum));
-		printk(KERN_ERR "\toffs           %u\n",
-		       le32_to_cpu(ref->offs));
-		printk(KERN_ERR "\tjhead          %u\n",
-		       le32_to_cpu(ref->jhead));
+		pr_err("\tlnum           %u\n", le32_to_cpu(ref->lnum));
+		pr_err("\toffs           %u\n", le32_to_cpu(ref->offs));
+		pr_err("\tjhead          %u\n", le32_to_cpu(ref->jhead));
 		break;
 	}
 	case UBIFS_INO_NODE:
@@ -456,41 +417,32 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		const struct ubifs_ino_node *ino = node;
 
 		key_read(c, &ino->key, &key);
-		printk(KERN_ERR "\tkey            %s\n",
+		pr_err("\tkey            %s\n",
 		       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
-		printk(KERN_ERR "\tcreat_sqnum    %llu\n",
+		pr_err("\tcreat_sqnum    %llu\n",
 		       (unsigned long long)le64_to_cpu(ino->creat_sqnum));
-		printk(KERN_ERR "\tsize           %llu\n",
+		pr_err("\tsize           %llu\n",
 		       (unsigned long long)le64_to_cpu(ino->size));
-		printk(KERN_ERR "\tnlink          %u\n",
-		       le32_to_cpu(ino->nlink));
-		printk(KERN_ERR "\tatime          %lld.%u\n",
+		pr_err("\tnlink          %u\n", le32_to_cpu(ino->nlink));
+		pr_err("\tatime          %lld.%u\n",
 		       (long long)le64_to_cpu(ino->atime_sec),
 		       le32_to_cpu(ino->atime_nsec));
-		printk(KERN_ERR "\tmtime          %lld.%u\n",
+		pr_err("\tmtime          %lld.%u\n",
 		       (long long)le64_to_cpu(ino->mtime_sec),
 		       le32_to_cpu(ino->mtime_nsec));
-		printk(KERN_ERR "\tctime          %lld.%u\n",
+		pr_err("\tctime          %lld.%u\n",
 		       (long long)le64_to_cpu(ino->ctime_sec),
 		       le32_to_cpu(ino->ctime_nsec));
-		printk(KERN_ERR "\tuid            %u\n",
-		       le32_to_cpu(ino->uid));
-		printk(KERN_ERR "\tgid            %u\n",
-		       le32_to_cpu(ino->gid));
-		printk(KERN_ERR "\tmode           %u\n",
-		       le32_to_cpu(ino->mode));
-		printk(KERN_ERR "\tflags          %#x\n",
-		       le32_to_cpu(ino->flags));
-		printk(KERN_ERR "\txattr_cnt      %u\n",
-		       le32_to_cpu(ino->xattr_cnt));
-		printk(KERN_ERR "\txattr_size     %u\n",
-		       le32_to_cpu(ino->xattr_size));
-		printk(KERN_ERR "\txattr_names    %u\n",
-		       le32_to_cpu(ino->xattr_names));
-		printk(KERN_ERR "\tcompr_type     %#x\n",
+		pr_err("\tuid            %u\n", le32_to_cpu(ino->uid));
+		pr_err("\tgid            %u\n", le32_to_cpu(ino->gid));
+		pr_err("\tmode           %u\n", le32_to_cpu(ino->mode));
+		pr_err("\tflags          %#x\n", le32_to_cpu(ino->flags));
+		pr_err("\txattr_cnt      %u\n", le32_to_cpu(ino->xattr_cnt));
+		pr_err("\txattr_size     %u\n", le32_to_cpu(ino->xattr_size));
+		pr_err("\txattr_names    %u\n", le32_to_cpu(ino->xattr_names));
+		pr_err("\tcompr_type     %#x\n",
 		       (int)le16_to_cpu(ino->compr_type));
-		printk(KERN_ERR "\tdata len       %u\n",
-		       le32_to_cpu(ino->data_len));
+		pr_err("\tdata len       %u\n", le32_to_cpu(ino->data_len));
 		break;
 	}
 	case UBIFS_DENT_NODE:
@@ -500,21 +452,21 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		int nlen = le16_to_cpu(dent->nlen);
 
 		key_read(c, &dent->key, &key);
-		printk(KERN_ERR "\tkey            %s\n",
+		pr_err("\tkey            %s\n",
 		       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
-		printk(KERN_ERR "\tinum           %llu\n",
+		pr_err("\tinum           %llu\n",
 		       (unsigned long long)le64_to_cpu(dent->inum));
-		printk(KERN_ERR "\ttype           %d\n", (int)dent->type);
-		printk(KERN_ERR "\tnlen           %d\n", nlen);
-		printk(KERN_ERR "\tname           ");
+		pr_err("\ttype           %d\n", (int)dent->type);
+		pr_err("\tnlen           %d\n", nlen);
+		pr_err("\tname           ");
 
 		if (nlen > UBIFS_MAX_NLEN)
-			printk(KERN_ERR "(bad name length, not printing, bad or corrupted node)");
+			pr_err("(bad name length, not printing, bad or corrupted node)");
 		else {
 			for (i = 0; i < nlen && dent->name[i]; i++)
-				printk(KERN_CONT "%c", dent->name[i]);
+				pr_cont("%c", dent->name[i]);
 		}
-		printk(KERN_CONT "\n");
+		pr_cont("\n");
 
 		break;
 	}
@@ -524,15 +476,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
 
 		key_read(c, &dn->key, &key);
-		printk(KERN_ERR "\tkey            %s\n",
+		pr_err("\tkey            %s\n",
 		       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
-		printk(KERN_ERR "\tsize           %u\n",
-		       le32_to_cpu(dn->size));
-		printk(KERN_ERR "\tcompr_typ      %d\n",
+		pr_err("\tsize           %u\n", le32_to_cpu(dn->size));
+		pr_err("\tcompr_typ      %d\n",
 		       (int)le16_to_cpu(dn->compr_type));
-		printk(KERN_ERR "\tdata size      %d\n",
-		       dlen);
-		printk(KERN_ERR "\tdata:\n");
+		pr_err("\tdata size      %d\n", dlen);
+		pr_err("\tdata:\n");
 		print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
 			       (void *)&dn->data, dlen, 0);
 		break;
@@ -541,11 +491,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	{
 		const struct ubifs_trun_node *trun = node;
 
-		printk(KERN_ERR "\tinum           %u\n",
-		       le32_to_cpu(trun->inum));
-		printk(KERN_ERR "\told_size       %llu\n",
+		pr_err("\tinum           %u\n", le32_to_cpu(trun->inum));
+		pr_err("\told_size       %llu\n",
 		       (unsigned long long)le64_to_cpu(trun->old_size));
-		printk(KERN_ERR "\tnew_size       %llu\n",
+		pr_err("\tnew_size       %llu\n",
 		       (unsigned long long)le64_to_cpu(trun->new_size));
 		break;
 	}
@@ -554,17 +503,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		const struct ubifs_idx_node *idx = node;
 
 		n = le16_to_cpu(idx->child_cnt);
-		printk(KERN_ERR "\tchild_cnt      %d\n", n);
-		printk(KERN_ERR "\tlevel          %d\n",
-		       (int)le16_to_cpu(idx->level));
-		printk(KERN_ERR "\tBranches:\n");
+		pr_err("\tchild_cnt      %d\n", n);
+		pr_err("\tlevel          %d\n", (int)le16_to_cpu(idx->level));
+		pr_err("\tBranches:\n");
 
 		for (i = 0; i < n && i < c->fanout - 1; i++) {
 			const struct ubifs_branch *br;
 
 			br = ubifs_idx_branch(c, idx, i);
 			key_read(c, &br->key, &key);
-			printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n",
+			pr_err("\t%d: LEB %d:%d len %d key %s\n",
 			       i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
 			       le32_to_cpu(br->len),
 			       dbg_snprintf_key(c, &key, key_buf,
@@ -578,20 +526,20 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	{
 		const struct ubifs_orph_node *orph = node;
 
-		printk(KERN_ERR "\tcommit number  %llu\n",
+		pr_err("\tcommit number  %llu\n",
 		       (unsigned long long)
 				le64_to_cpu(orph->cmt_no) & LLONG_MAX);
-		printk(KERN_ERR "\tlast node flag %llu\n",
+		pr_err("\tlast node flag %llu\n",
 		       (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
 		n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
-		printk(KERN_ERR "\t%d orphan inode numbers:\n", n);
+		pr_err("\t%d orphan inode numbers:\n", n);
 		for (i = 0; i < n; i++)
-			printk(KERN_ERR "\t  ino %llu\n",
+			pr_err("\t  ino %llu\n",
 			       (unsigned long long)le64_to_cpu(orph->inos[i]));
 		break;
 	}
 	default:
-		printk(KERN_ERR "node type %d was not recognized\n",
+		pr_err("node type %d was not recognized\n",
 		       (int)ch->node_type);
 	}
 	spin_unlock(&dbg_lock);
@@ -600,16 +548,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 void ubifs_dump_budget_req(const struct ubifs_budget_req *req)
 {
 	spin_lock(&dbg_lock);
-	printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n",
+	pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n",
 	       req->new_ino, req->dirtied_ino);
-	printk(KERN_ERR "\tnew_ino_d   %d, dirtied_ino_d %d\n",
+	pr_err("\tnew_ino_d   %d, dirtied_ino_d %d\n",
 	       req->new_ino_d, req->dirtied_ino_d);
-	printk(KERN_ERR "\tnew_page    %d, dirtied_page %d\n",
+	pr_err("\tnew_page    %d, dirtied_page %d\n",
 	       req->new_page, req->dirtied_page);
-	printk(KERN_ERR "\tnew_dent    %d, mod_dent     %d\n",
+	pr_err("\tnew_dent    %d, mod_dent     %d\n",
 	       req->new_dent, req->mod_dent);
-	printk(KERN_ERR "\tidx_growth  %d\n", req->idx_growth);
-	printk(KERN_ERR "\tdata_growth %d dd_growth     %d\n",
+	pr_err("\tidx_growth  %d\n", req->idx_growth);
+	pr_err("\tdata_growth %d dd_growth     %d\n",
 	       req->data_growth, req->dd_growth);
 	spin_unlock(&dbg_lock);
 }
@@ -617,11 +565,11 @@ void ubifs_dump_budget_req(const struct ubifs_budget_req *req)
 void ubifs_dump_lstats(const struct ubifs_lp_stats *lst)
 {
 	spin_lock(&dbg_lock);
-	printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, idx_lebs  %d\n",
+	pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs  %d\n",
 	       current->pid, lst->empty_lebs, lst->idx_lebs);
-	printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n",
+	pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n",
 	       lst->taken_empty_lebs, lst->total_free, lst->total_dirty);
-	printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, total_dead %lld\n",
+	pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n",
 	       lst->total_used, lst->total_dark, lst->total_dead);
 	spin_unlock(&dbg_lock);
 }
@@ -636,18 +584,17 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
 
 	spin_lock(&c->space_lock);
 	spin_lock(&dbg_lock);
-	printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n",
+	pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n",
 	       current->pid, bi->data_growth + bi->dd_growth,
 	       bi->data_growth + bi->dd_growth + bi->idx_growth);
-	printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n",
+	pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n",
 	       bi->data_growth, bi->dd_growth, bi->idx_growth);
-	printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n",
+	pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n",
 	       bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx);
-	printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
+	pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n",
 	       bi->page_budget, bi->inode_budget, bi->dent_budget);
-	printk(KERN_ERR "\tnospace %u, nospace_rp %u\n",
-	       bi->nospace, bi->nospace_rp);
-	printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
+	pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp);
+	pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
 	       c->dark_wm, c->dead_wm, c->max_idx_node_sz);
 
 	if (bi != &c->bi)
@@ -658,38 +605,37 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
 		 */
 		goto out_unlock;
 
-	printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
+	pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
 	       c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
-	printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n",
+	pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n",
 	       atomic_long_read(&c->dirty_pg_cnt),
 	       atomic_long_read(&c->dirty_zn_cnt),
 	       atomic_long_read(&c->clean_zn_cnt));
-	printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n",
-	       c->gc_lnum, c->ihead_lnum);
+	pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum);
 
 	/* If we are in R/O mode, journal heads do not exist */
 	if (c->jheads)
 		for (i = 0; i < c->jhead_cnt; i++)
-			printk(KERN_ERR "\tjhead %s\t LEB %d\n",
+			pr_err("\tjhead %s\t LEB %d\n",
 			       dbg_jhead(c->jheads[i].wbuf.jhead),
 			       c->jheads[i].wbuf.lnum);
 	for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
 		bud = rb_entry(rb, struct ubifs_bud, rb);
-		printk(KERN_ERR "\tbud LEB %d\n", bud->lnum);
+		pr_err("\tbud LEB %d\n", bud->lnum);
 	}
 	list_for_each_entry(bud, &c->old_buds, list)
-		printk(KERN_ERR "\told bud LEB %d\n", bud->lnum);
+		pr_err("\told bud LEB %d\n", bud->lnum);
 	list_for_each_entry(idx_gc, &c->idx_gc, list)
-		printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n",
+		pr_err("\tGC'ed idx LEB %d unmap %d\n",
 		       idx_gc->lnum, idx_gc->unmap);
-	printk(KERN_ERR "\tcommit state %d\n", c->cmt_state);
+	pr_err("\tcommit state %d\n", c->cmt_state);
 
 	/* Print budgeting predictions */
 	available = ubifs_calc_available(c, c->bi.min_idx_lebs);
 	outstanding = c->bi.data_growth + c->bi.dd_growth;
 	free = ubifs_get_free_space_nolock(c);
-	printk(KERN_ERR "Budgeting predictions:\n");
-	printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n",
+	pr_err("Budgeting predictions:\n");
+	pr_err("\tavailable: %lld, outstanding %lld, free %lld\n",
 	       available, outstanding, free);
 out_unlock:
 	spin_unlock(&dbg_lock);
@@ -709,19 +655,19 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
 		dark = ubifs_calc_dark(c, spc);
 
 	if (lp->flags & LPROPS_INDEX)
-		printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (",
+		pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (",
 		       lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
 		       lp->flags);
 	else
-		printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (",
+		pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (",
 		       lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
 		       dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
 
 	if (lp->flags & LPROPS_TAKEN) {
 		if (lp->flags & LPROPS_INDEX)
-			printk(KERN_CONT "index, taken");
+			pr_cont("index, taken");
 		else
-			printk(KERN_CONT "taken");
+			pr_cont("taken");
 	} else {
 		const char *s;
 
@@ -758,7 +704,7 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
 				break;
 			}
 		}
-		printk(KERN_CONT "%s", s);
+		pr_cont("%s", s);
 	}
 
 	for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
@@ -773,19 +719,18 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
 				 */
 				if (c->jheads &&
 				    lp->lnum == c->jheads[i].wbuf.lnum) {
-					printk(KERN_CONT ", jhead %s",
-					       dbg_jhead(i));
+					pr_cont(", jhead %s", dbg_jhead(i));
 					head = 1;
 				}
 			}
 			if (!head)
-				printk(KERN_CONT ", bud of jhead %s",
+				pr_cont(", bud of jhead %s",
 				       dbg_jhead(bud->jhead));
 		}
 	}
 	if (lp->lnum == c->gc_lnum)
-		printk(KERN_CONT ", GC LEB");
-	printk(KERN_CONT ")\n");
+		pr_cont(", GC LEB");
+	pr_cont(")\n");
 }
 
 void ubifs_dump_lprops(struct ubifs_info *c)
@@ -794,8 +739,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
 	struct ubifs_lprops lp;
 	struct ubifs_lp_stats lst;
 
-	printk(KERN_ERR "(pid %d) start dumping LEB properties\n",
-	       current->pid);
+	pr_err("(pid %d) start dumping LEB properties\n", current->pid);
 	ubifs_get_lp_stats(c, &lst);
 	ubifs_dump_lstats(&lst);
 
@@ -806,8 +750,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
 
 		ubifs_dump_lprop(c, &lp);
 	}
-	printk(KERN_ERR "(pid %d) finish dumping LEB properties\n",
-	       current->pid);
+	pr_err("(pid %d) finish dumping LEB properties\n", current->pid);
 }
 
 void ubifs_dump_lpt_info(struct ubifs_info *c)
@@ -815,35 +758,34 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
 	int i;
 
 	spin_lock(&dbg_lock);
-	printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid);
-	printk(KERN_ERR "\tlpt_sz:        %lld\n", c->lpt_sz);
-	printk(KERN_ERR "\tpnode_sz:      %d\n", c->pnode_sz);
-	printk(KERN_ERR "\tnnode_sz:      %d\n", c->nnode_sz);
-	printk(KERN_ERR "\tltab_sz:       %d\n", c->ltab_sz);
-	printk(KERN_ERR "\tlsave_sz:      %d\n", c->lsave_sz);
-	printk(KERN_ERR "\tbig_lpt:       %d\n", c->big_lpt);
-	printk(KERN_ERR "\tlpt_hght:      %d\n", c->lpt_hght);
-	printk(KERN_ERR "\tpnode_cnt:     %d\n", c->pnode_cnt);
-	printk(KERN_ERR "\tnnode_cnt:     %d\n", c->nnode_cnt);
-	printk(KERN_ERR "\tdirty_pn_cnt:  %d\n", c->dirty_pn_cnt);
-	printk(KERN_ERR "\tdirty_nn_cnt:  %d\n", c->dirty_nn_cnt);
-	printk(KERN_ERR "\tlsave_cnt:     %d\n", c->lsave_cnt);
-	printk(KERN_ERR "\tspace_bits:    %d\n", c->space_bits);
-	printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
-	printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
-	printk(KERN_ERR "\tlpt_spc_bits:  %d\n", c->lpt_spc_bits);
-	printk(KERN_ERR "\tpcnt_bits:     %d\n", c->pcnt_bits);
-	printk(KERN_ERR "\tlnum_bits:     %d\n", c->lnum_bits);
-	printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
-	printk(KERN_ERR "\tLPT head is at %d:%d\n",
+	pr_err("(pid %d) dumping LPT information\n", current->pid);
+	pr_err("\tlpt_sz:        %lld\n", c->lpt_sz);
+	pr_err("\tpnode_sz:      %d\n", c->pnode_sz);
+	pr_err("\tnnode_sz:      %d\n", c->nnode_sz);
+	pr_err("\tltab_sz:       %d\n", c->ltab_sz);
+	pr_err("\tlsave_sz:      %d\n", c->lsave_sz);
+	pr_err("\tbig_lpt:       %d\n", c->big_lpt);
+	pr_err("\tlpt_hght:      %d\n", c->lpt_hght);
+	pr_err("\tpnode_cnt:     %d\n", c->pnode_cnt);
+	pr_err("\tnnode_cnt:     %d\n", c->nnode_cnt);
+	pr_err("\tdirty_pn_cnt:  %d\n", c->dirty_pn_cnt);
+	pr_err("\tdirty_nn_cnt:  %d\n", c->dirty_nn_cnt);
+	pr_err("\tlsave_cnt:     %d\n", c->lsave_cnt);
+	pr_err("\tspace_bits:    %d\n", c->space_bits);
+	pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
+	pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
+	pr_err("\tlpt_spc_bits:  %d\n", c->lpt_spc_bits);
+	pr_err("\tpcnt_bits:     %d\n", c->pcnt_bits);
+	pr_err("\tlnum_bits:     %d\n", c->lnum_bits);
+	pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
+	pr_err("\tLPT head is at %d:%d\n",
 	       c->nhead_lnum, c->nhead_offs);
-	printk(KERN_ERR "\tLPT ltab is at %d:%d\n",
-	       c->ltab_lnum, c->ltab_offs);
+	pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
 	if (c->big_lpt)
-		printk(KERN_ERR "\tLPT lsave is at %d:%d\n",
+		pr_err("\tLPT lsave is at %d:%d\n",
 		       c->lsave_lnum, c->lsave_offs);
 	for (i = 0; i < c->lpt_lebs; i++)
-		printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n",
+		pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n",
 		       i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty,
 		       c->ltab[i].tgc, c->ltab[i].cmt);
 	spin_unlock(&dbg_lock);
@@ -854,12 +796,12 @@ void ubifs_dump_sleb(const struct ubifs_info *c,
 {
 	struct ubifs_scan_node *snod;
 
-	printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n",
+	pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n",
 	       current->pid, sleb->lnum, offs);
 
 	list_for_each_entry(snod, &sleb->nodes, list) {
 		cond_resched();
-		printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n",
+		pr_err("Dumping node at LEB %d:%d len %d\n",
 		       sleb->lnum, snod->offs, snod->len);
 		ubifs_dump_node(c, snod->node);
 	}
@@ -871,8 +813,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 	struct ubifs_scan_node *snod;
 	void *buf;
 
-	printk(KERN_ERR "(pid %d) start dumping LEB %d\n",
-	       current->pid, lnum);
+	pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
 
 	buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
 	if (!buf) {
@@ -886,18 +827,17 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 		goto out;
 	}
 
-	printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum,
+	pr_err("LEB %d has %d nodes ending at %d\n", lnum,
 	       sleb->nodes_cnt, sleb->endpt);
 
 	list_for_each_entry(snod, &sleb->nodes, list) {
 		cond_resched();
-		printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum,
+		pr_err("Dumping node at LEB %d:%d len %d\n", lnum,
 		       snod->offs, snod->len);
 		ubifs_dump_node(c, snod->node);
 	}
 
-	printk(KERN_ERR "(pid %d) finish dumping LEB %d\n",
-	       current->pid, lnum);
+	pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
 	ubifs_scan_destroy(sleb);
 
 out:
@@ -918,7 +858,7 @@ void ubifs_dump_znode(const struct ubifs_info *c,
 	else
 		zbr = &c->zroot;
 
-	printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n",
+	pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n",
 	       znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip,
 	       znode->level, znode->child_cnt, znode->flags);
 
@@ -927,16 +867,16 @@ void ubifs_dump_znode(const struct ubifs_info *c,
 		return;
 	}
 
-	printk(KERN_ERR "zbranches:\n");
+	pr_err("zbranches:\n");
 	for (n = 0; n < znode->child_cnt; n++) {
 		zbr = &znode->zbranch[n];
 		if (znode->level > 0)
-			printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key %s\n",
+			pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n",
 			       n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
 			       dbg_snprintf_key(c, &zbr->key, key_buf,
 						DBG_KEY_BUF_LEN));
 		else
-			printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key %s\n",
+			pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n",
 			       n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
 			       dbg_snprintf_key(c, &zbr->key, key_buf,
 						DBG_KEY_BUF_LEN));
@@ -948,16 +888,16 @@ void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
 {
 	int i;
 
-	printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n",
+	pr_err("(pid %d) start dumping heap cat %d (%d elements)\n",
 	       current->pid, cat, heap->cnt);
 	for (i = 0; i < heap->cnt; i++) {
 		struct ubifs_lprops *lprops = heap->arr[i];
 
-		printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d flags %d\n",
+		pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n",
 		       i, lprops->lnum, lprops->hpos, lprops->free,
 		       lprops->dirty, lprops->flags);
 	}
-	printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid);
+	pr_err("(pid %d) finish dumping heap\n", current->pid);
 }
 
 void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -965,15 +905,15 @@ void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
 {
 	int i;
 
-	printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid);
-	printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n",
+	pr_err("(pid %d) dumping pnode:\n", current->pid);
+	pr_err("\taddress %zx parent %zx cnext %zx\n",
 	       (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
-	printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n",
+	pr_err("\tflags %lu iip %d level %d num %d\n",
 	       pnode->flags, iip, pnode->level, pnode->num);
 	for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
 		struct ubifs_lprops *lp = &pnode->lprops[i];
 
-		printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n",
+		pr_err("\t%d: free %d dirty %d flags %d lnum %d\n",
 		       i, lp->free, lp->dirty, lp->flags, lp->lnum);
 	}
 }
@@ -983,20 +923,20 @@ void ubifs_dump_tnc(struct ubifs_info *c)
 	struct ubifs_znode *znode;
 	int level;
 
-	printk(KERN_ERR "\n");
-	printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid);
+	pr_err("\n");
+	pr_err("(pid %d) start dumping TNC tree\n", current->pid);
 	znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
 	level = znode->level;
-	printk(KERN_ERR "== Level %d ==\n", level);
+	pr_err("== Level %d ==\n", level);
 	while (znode) {
 		if (level != znode->level) {
 			level = znode->level;
-			printk(KERN_ERR "== Level %d ==\n", level);
+			pr_err("== Level %d ==\n", level);
 		}
 		ubifs_dump_znode(c, znode);
 		znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
 	}
-	printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid);
+	pr_err("(pid %d) finish dumping TNC tree\n", current->pid);
 }
 
 static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 46b8dd123585..38230b1e544e 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -150,7 +150,7 @@ struct ubifs_global_debug_info {
 
 #define ubifs_assert(expr) do {                                                \
 	if (unlikely(!(expr))) {                                               \
-		printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
+		pr_crit("UBIFS assert failed in %s at %u (pid %d)\n",          \
 		       __func__, __LINE__, current->pid);                      \
 		dump_stack();                                                  \
 	}                                                                      \
@@ -159,7 +159,7 @@ struct ubifs_global_debug_info {
 #define ubifs_assert_cmt_locked(c) do {                                        \
 	if (unlikely(down_write_trylock(&(c)->commit_sem))) {                  \
 		up_write(&(c)->commit_sem);                                    \
-		printk(KERN_CRIT "commit lock is not locked!\n");              \
+		pr_crit("commit lock is not locked!\n");                       \
 		ubifs_assert(0);                                               \
 	}                                                                      \
 } while (0)
@@ -178,7 +178,7 @@ struct ubifs_global_debug_info {
 
 /* Just a debugging messages not related to any specific UBIFS subsystem */
 #define dbg_msg(fmt, ...)                                                      \
-	printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid,   \
+	pr_err("UBIFS DBG (pid %d): %s: " fmt "\n", current->pid,              \
 	       __func__, ##__VA_ARGS__)
 
 /* General messages */
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 74082553e1fc..a1de3cf9dba2 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1886,8 +1886,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 	int err, len = c->leb_size, node_type, node_num, node_len, offs;
 	void *buf, *p;
 
-	printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
-	       current->pid, lnum);
+	pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
 	buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
 	if (!buf) {
 		ubifs_err("cannot allocate memory to dump LPT");
@@ -1905,14 +1904,14 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 
 			pad_len = get_pad_len(c, p, len);
 			if (pad_len) {
-				printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
+				pr_err("LEB %d:%d, pad %d bytes\n",
 				       lnum, offs, pad_len);
 				p += pad_len;
 				len -= pad_len;
 				continue;
 			}
 			if (len)
-				printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n",
+				pr_err("LEB %d:%d, free %d bytes\n",
 				       lnum, offs, len);
 			break;
 		}
@@ -1923,11 +1922,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 		{
 			node_len = c->pnode_sz;
 			if (c->big_lpt)
-				printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n",
+				pr_err("LEB %d:%d, pnode num %d\n",
 				       lnum, offs, node_num);
 			else
-				printk(KERN_DEBUG "LEB %d:%d, pnode\n",
-				       lnum, offs);
+				pr_err("LEB %d:%d, pnode\n", lnum, offs);
 			break;
 		}
 		case UBIFS_LPT_NNODE:
@@ -1937,29 +1935,28 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 
 			node_len = c->nnode_sz;
 			if (c->big_lpt)
-				printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ",
+				pr_err("LEB %d:%d, nnode num %d, ",
 				       lnum, offs, node_num);
 			else
-				printk(KERN_DEBUG "LEB %d:%d, nnode, ",
+				pr_err("LEB %d:%d, nnode, ",
 				       lnum, offs);
 			err = ubifs_unpack_nnode(c, p, &nnode);
 			for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
-				printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
+				pr_cont("%d:%d", nnode.nbranch[i].lnum,
 				       nnode.nbranch[i].offs);
 				if (i != UBIFS_LPT_FANOUT - 1)
-					printk(KERN_CONT ", ");
+					pr_cont(", ");
 			}
-			printk(KERN_CONT "\n");
+			pr_cont("\n");
 			break;
 		}
 		case UBIFS_LPT_LTAB:
 			node_len = c->ltab_sz;
-			printk(KERN_DEBUG "LEB %d:%d, ltab\n",
-			       lnum, offs);
+			pr_err("LEB %d:%d, ltab\n", lnum, offs);
 			break;
 		case UBIFS_LPT_LSAVE:
 			node_len = c->lsave_sz;
-			printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs);
+			pr_err("LEB %d:%d, lsave len\n", lnum, offs);
 			break;
 		default:
 			ubifs_err("LPT node type %d not recognized", node_type);
@@ -1970,8 +1967,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
 		len -= node_len;
 	}
 
-	printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
-	       current->pid, lnum);
+	pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
 out:
 	vfree(buf);
 	return;
@@ -1988,12 +1984,10 @@ void ubifs_dump_lpt_lebs(const struct ubifs_info *c)
 {
 	int i;
 
-	printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n",
-	       current->pid);
+	pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid);
 	for (i = 0; i < c->lpt_lebs; i++)
 		dump_lpt_leb(c, i + c->lpt_first);
-	printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n",
-	       current->pid);
+	pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid);
 }
 
 /**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1e5a08623d11..e6a9275fadb0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -42,16 +42,15 @@
 #define UBIFS_VERSION 1
 
 /* Normal UBIFS messages */
-#define ubifs_msg(fmt, ...) \
-		printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__)
+#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__)
 /* UBIFS error messages */
-#define ubifs_err(fmt, ...)                                                  \
-	printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
+#define ubifs_err(fmt, ...)                                         \
+	pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
 	       __func__, ##__VA_ARGS__)
 /* UBIFS warning messages */
-#define ubifs_warn(fmt, ...)                                         \
-	printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \
-	       current->pid, __func__, ##__VA_ARGS__)
+#define ubifs_warn(fmt, ...)                                        \
+	pr_warn("UBIFS warning (pid %d): %s: " fmt "\n",            \
+		current->pid, __func__, ##__VA_ARGS__)
 
 /* UBIFS file system VFS magic number */
 #define UBIFS_SUPER_MAGIC 0x24051905
-- 
cgit 


From 3668b70fcf1fdc6799abf15f70fe3f50f407ec82 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 27 Aug 2012 16:56:58 +0300
Subject: UBIFS: print less
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

UBIFS currently prints a lot of information when it mounts a volume, which
bothers some people. Make it less chatty - print only important information
by default.

Get rid of 'dbg_msg()' macro completely.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/commit.c     |  6 ++--
 fs/ubifs/debug.h      |  5 ---
 fs/ubifs/lprops.c     |  8 ++---
 fs/ubifs/lpt.c        |  2 +-
 fs/ubifs/lpt_commit.c | 12 +++----
 fs/ubifs/replay.c     |  2 +-
 fs/ubifs/super.c      | 91 ++++++++++++++++++++++++---------------------------
 fs/ubifs/tnc_misc.c   |  4 +--
 8 files changed, 60 insertions(+), 70 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index a054cafd614b..ff8229340cd5 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -293,8 +293,8 @@ int ubifs_bg_thread(void *info)
 	int err;
 	struct ubifs_info *c = info;
 
-	dbg_msg("background thread \"%s\" started, PID %d",
-		c->bgt_name, current->pid);
+	ubifs_msg("background thread \"%s\" started, PID %d",
+		  c->bgt_name, current->pid);
 	set_freezable();
 
 	while (1) {
@@ -328,7 +328,7 @@ int ubifs_bg_thread(void *info)
 		cond_resched();
 	}
 
-	dbg_msg("background thread \"%s\" stops", c->bgt_name);
+	ubifs_msg("background thread \"%s\" stops", c->bgt_name);
 	return 0;
 }
 
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 38230b1e544e..e03d5179769a 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -176,11 +176,6 @@ struct ubifs_global_debug_info {
 		 dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN));    \
 } while (0)
 
-/* Just a debugging messages not related to any specific UBIFS subsystem */
-#define dbg_msg(fmt, ...)                                                      \
-	pr_err("UBIFS DBG (pid %d): %s: " fmt "\n", current->pid,              \
-	       __func__, ##__VA_ARGS__)
-
 /* General messages */
 #define dbg_gen(fmt, ...)   ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
 /* Additional journal messages */
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 132b8e3e7b0b..e5a2a35a46dc 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -982,9 +982,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
 			goto out;
 		}
 		if (lprops != lp) {
-			dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
-				(size_t)lprops, (size_t)lp, lprops->lnum,
-				lp->lnum);
+			ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
+				  (size_t)lprops, (size_t)lp, lprops->lnum,
+				  lp->lnum);
 			err = 4;
 			goto out;
 		}
@@ -1002,7 +1002,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
 	}
 out:
 	if (err) {
-		dbg_msg("failed cat %d hpos %d err %d", cat, i, err);
+		ubifs_err("failed cat %d hpos %d err %d", cat, i, err);
 		dump_stack();
 		ubifs_dump_heap(c, heap, cat);
 	}
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index c3e6bbf13ba5..d46b19ec1815 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1311,7 +1311,7 @@ out:
 	ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
 	ubifs_dump_pnode(c, pnode, parent, iip);
 	dump_stack();
-	dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
+	ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
 	kfree(pnode);
 	return err;
 }
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index a1de3cf9dba2..9daaeef675dd 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1662,19 +1662,19 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
 				continue;
 			}
 			if (!dbg_is_all_ff(p, len)) {
-				dbg_msg("invalid empty space in LEB %d at %d",
-					lnum, c->leb_size - len);
+				ubifs_err("invalid empty space in LEB %d at %d",
+					  lnum, c->leb_size - len);
 				err = -EINVAL;
 			}
 			i = lnum - c->lpt_first;
 			if (len != c->ltab[i].free) {
-				dbg_msg("invalid free space in LEB %d (free %d, expected %d)",
-					lnum, len, c->ltab[i].free);
+				ubifs_err("invalid free space in LEB %d (free %d, expected %d)",
+					  lnum, len, c->ltab[i].free);
 				err = -EINVAL;
 			}
 			if (dirty != c->ltab[i].dirty) {
-				dbg_msg("invalid dirty space in LEB %d (dirty %d, expected %d)",
-					lnum, dirty, c->ltab[i].dirty);
+				ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)",
+					  lnum, dirty, c->ltab[i].dirty);
 				err = -EINVAL;
 			}
 			goto out;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index ede6e5835ced..3187925e9879 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -141,7 +141,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
 		 * during the replay.
 		 */
 		if (dirty != 0)
-			dbg_msg("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
+			dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
 				b->bud->lnum, lp->free, lp->dirty, b->free,
 				b->dirty);
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 448bf24ef8d0..7d51f2802bda 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1160,7 +1160,7 @@ static int check_free_space(struct ubifs_info *c)
 static int mount_ubifs(struct ubifs_info *c)
 {
 	int err;
-	long long x;
+	long long x, y;
 	size_t sz;
 
 	c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
@@ -1410,74 +1410,69 @@ static int mount_ubifs(struct ubifs_info *c)
 
 	c->mounting = 0;
 
-	ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
-		  c->vi.ubi_num, c->vi.vol_id, c->vi.name);
-	if (c->ro_mount)
-		ubifs_msg("mounted read-only");
+	ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s",
+		  c->vi.ubi_num, c->vi.vol_id, c->vi.name,
+		  c->ro_mount ? ", R/O mode" : NULL);
 	x = (long long)c->main_lebs * c->leb_size;
-	ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
-		  x, x >> 10, x >> 20, c->main_lebs);
-	x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
-	ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
-		  x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
-	ubifs_msg("media format:       w%d/r%d (latest is w%d/r%d)",
+	y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
+	ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
+		  c->leb_size, c->leb_size >> 10, c->min_io_size,
+		  c->max_write_size);
+	ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
+		  x, x >> 20, c->main_lebs,
+		  y, y >> 20, c->log_lebs + c->max_bud_cnt);
+	ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
+		  c->report_rp_size, c->report_rp_size >> 10);
+	ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
 		  c->fmt_version, c->ro_compat_version,
-		  UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
-	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
-	ubifs_msg("reserved for root:  %llu bytes (%llu KiB)",
-		c->report_rp_size, c->report_rp_size >> 10);
-
-	dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size);
-	dbg_msg("max. write size:     %d bytes", c->max_write_size);
-	dbg_msg("LEB size:            %d bytes (%d KiB)",
-		c->leb_size, c->leb_size >> 10);
-	dbg_msg("data journal heads:  %d",
+		  UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
+		  c->big_lpt ? ", big LPT model" : ", small LPT model");
+
+	dbg_gen("default compressor:  %s", ubifs_compr_name(c->default_compr));
+	dbg_gen("data journal heads:  %d",
 		c->jhead_cnt - NONDATA_JHEADS_CNT);
-	dbg_msg("UUID:                %pUB", c->uuid);
-	dbg_msg("big_lpt              %d", c->big_lpt);
-	dbg_msg("log LEBs:            %d (%d - %d)",
+	dbg_gen("log LEBs:            %d (%d - %d)",
 		c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
-	dbg_msg("LPT area LEBs:       %d (%d - %d)",
+	dbg_gen("LPT area LEBs:       %d (%d - %d)",
 		c->lpt_lebs, c->lpt_first, c->lpt_last);
-	dbg_msg("orphan area LEBs:    %d (%d - %d)",
+	dbg_gen("orphan area LEBs:    %d (%d - %d)",
 		c->orph_lebs, c->orph_first, c->orph_last);
-	dbg_msg("main area LEBs:      %d (%d - %d)",
+	dbg_gen("main area LEBs:      %d (%d - %d)",
 		c->main_lebs, c->main_first, c->leb_cnt - 1);
-	dbg_msg("index LEBs:          %d", c->lst.idx_lebs);
-	dbg_msg("total index bytes:   %lld (%lld KiB, %lld MiB)",
+	dbg_gen("index LEBs:          %d", c->lst.idx_lebs);
+	dbg_gen("total index bytes:   %lld (%lld KiB, %lld MiB)",
 		c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
 		c->bi.old_idx_sz >> 20);
-	dbg_msg("key hash type:       %d", c->key_hash_type);
-	dbg_msg("tree fanout:         %d", c->fanout);
-	dbg_msg("reserved GC LEB:     %d", c->gc_lnum);
-	dbg_msg("first main LEB:      %d", c->main_first);
-	dbg_msg("max. znode size      %d", c->max_znode_sz);
-	dbg_msg("max. index node size %d", c->max_idx_node_sz);
-	dbg_msg("node sizes:          data %zu, inode %zu, dentry %zu",
+	dbg_gen("key hash type:       %d", c->key_hash_type);
+	dbg_gen("tree fanout:         %d", c->fanout);
+	dbg_gen("reserved GC LEB:     %d", c->gc_lnum);
+	dbg_gen("max. znode size      %d", c->max_znode_sz);
+	dbg_gen("max. index node size %d", c->max_idx_node_sz);
+	dbg_gen("node sizes:          data %zu, inode %zu, dentry %zu",
 		UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);
-	dbg_msg("node sizes:          trun %zu, sb %zu, master %zu",
+	dbg_gen("node sizes:          trun %zu, sb %zu, master %zu",
 		UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
-	dbg_msg("node sizes:          ref %zu, cmt. start %zu, orph %zu",
+	dbg_gen("node sizes:          ref %zu, cmt. start %zu, orph %zu",
 		UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
-	dbg_msg("max. node sizes:     data %zu, inode %zu dentry %zu, idx %d",
+	dbg_gen("max. node sizes:     data %zu, inode %zu dentry %zu, idx %d",
 		UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
 		UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
-	dbg_msg("dead watermark:      %d", c->dead_wm);
-	dbg_msg("dark watermark:      %d", c->dark_wm);
-	dbg_msg("LEB overhead:        %d", c->leb_overhead);
+	dbg_gen("dead watermark:      %d", c->dead_wm);
+	dbg_gen("dark watermark:      %d", c->dark_wm);
+	dbg_gen("LEB overhead:        %d", c->leb_overhead);
 	x = (long long)c->main_lebs * c->dark_wm;
-	dbg_msg("max. dark space:     %lld (%lld KiB, %lld MiB)",
+	dbg_gen("max. dark space:     %lld (%lld KiB, %lld MiB)",
 		x, x >> 10, x >> 20);
-	dbg_msg("maximum bud bytes:   %lld (%lld KiB, %lld MiB)",
+	dbg_gen("maximum bud bytes:   %lld (%lld KiB, %lld MiB)",
 		c->max_bud_bytes, c->max_bud_bytes >> 10,
 		c->max_bud_bytes >> 20);
-	dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
+	dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
 		c->bg_bud_bytes, c->bg_bud_bytes >> 10,
 		c->bg_bud_bytes >> 20);
-	dbg_msg("current bud bytes    %lld (%lld KiB, %lld MiB)",
+	dbg_gen("current bud bytes    %lld (%lld KiB, %lld MiB)",
 		c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
-	dbg_msg("max. seq. number:    %llu", c->max_sqnum);
-	dbg_msg("commit number:       %llu", c->cmt_no);
+	dbg_gen("max. seq. number:    %llu", c->max_sqnum);
+	dbg_gen("commit number:       %llu", c->cmt_no);
 
 	return 0;
 
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index d38ac7f9654b..f6bf8995c7b1 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
 		case UBIFS_XENT_KEY:
 			break;
 		default:
-			dbg_msg("bad key type at slot %d: %d",
-				i, key_type(c, &zbr->key));
+			ubifs_err("bad key type at slot %d: %d",
+				  i, key_type(c, &zbr->key));
 			err = 3;
 			goto out_dump;
 		}
-- 
cgit 


From 8bad3c024496e30948f576485c10ecc9ebdfe41d Mon Sep 17 00:00:00 2001
From: Liu Bo <liubo2009@cn.fujitsu.com>
Date: Mon, 18 Jun 2012 12:14:23 +0800
Subject: btrfs: fix comment typo in btrfs_finish_ordered_io

Fix typo errors in comments of btrfs_finish_ordered_io.

Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/btrfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb8d671d00e6..d8ed4fda0015 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1969,8 +1969,8 @@ out:
 				      ordered_extent->len - 1, NULL, GFP_NOFS);
 
 	/*
-	 * This needs to be dont to make sure anybody waiting knows we are done
-	 * upating everything for this ordered extent.
+	 * This needs to be done to make sure anybody waiting knows we are done
+	 * updating everything for this ordered extent.
 	 */
 	btrfs_remove_ordered_extent(inode, ordered_extent);
 
-- 
cgit 


From 1856b225ca1f80446938c9ec4a0b330c1772ec45 Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Sat, 18 Aug 2012 17:38:54 +0200
Subject: nfs: comment fix

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/nfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8b2a2977b720..120d8e98ee5f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1590,7 +1590,7 @@ static int nfs_parse_mount_options(char *raw,
 
 	/*
 	 * verify that any proto=/mountproto= options match the address
-	 * familiies in the addr=/mountaddr= options.
+	 * families in the addr=/mountaddr= options.
 	 */
 	if (protofamily != AF_UNSPEC &&
 	    protofamily != mnt->nfs_server.address.ss_family)
-- 
cgit 


From 4907cb7b193a4f91c1fd30cf679c035e3644c64d Mon Sep 17 00:00:00 2001
From: Anatol Pomozov <anatol.pomozov@gmail.com>
Date: Sat, 1 Sep 2012 10:31:09 -0700
Subject: treewide: fix comment/printk/variable typos

Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/ext2/balloc.c  |  2 +-
 fs/ext3/balloc.c  |  2 +-
 fs/ext3/inode.c   |  2 +-
 fs/ext4/inode.c   | 14 +++++++-------
 fs/ext4/mballoc.c |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 1c3613998862..905166a3ffb2 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode)
 /**
  * ext2_free_blocks() -- Free given blocks and update quota and i_blocks
  * @inode:		inode
- * @block:		start physcial block to free
+ * @block:		start physical block to free
  * @count:		number of blocks to free
  */
 void ext2_free_blocks (struct inode * inode, unsigned long block,
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 25cd60892116..7c6a31d44daf 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode)
  * ext3_free_blocks_sb() -- Free given blocks and update quota
  * @handle:			handle to this transaction
  * @sb:				super block
- * @block:			start physcial block to free
+ * @block:			start physical block to free
  * @count:			number of blocks to free
  * @pdquot_freed_blocks:	pointer to quota
  */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9a4a5c48b1c9..3aff616d4d03 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3196,7 +3196,7 @@ out_brelse:
  *
  * - Within generic_file_write() for O_SYNC files.
  *   Here, there will be no transaction running. We wait for any running
- *   trasnaction to commit.
+ *   transaction to commit.
  *
  * - Within sys_sync(), kupdate and such.
  *   We wait on commit, if tol to.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 02bc8cbe7281..189dae6b0964 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3227,7 +3227,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
  * handle: The journal handle
  * inode:  The files inode
  * page:   A locked page that contains the offset "from"
- * from:   The starting byte offset (from the begining of the file)
+ * from:   The starting byte offset (from the beginning of the file)
  *         to begin discarding
  * len:    The length of bytes to discard
  * flags:  Optional flags that may be used:
@@ -3235,11 +3235,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
  *         EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
  *         Only zero the regions of the page whose buffer heads
  *         have already been unmapped.  This flag is appropriate
- *         for updateing the contents of a page whose blocks may
+ *         for updating the contents of a page whose blocks may
  *         have already been released, and we only want to zero
  *         out the regions that correspond to those released blocks.
  *
- * Returns zero on sucess or negative on failure.
+ * Returns zero on success or negative on failure.
  */
 static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
 		struct inode *inode, struct page *page, loff_t from,
@@ -3400,7 +3400,7 @@ int ext4_can_truncate(struct inode *inode)
  * @offset: The offset where the hole will begin
  * @len:    The length of the hole
  *
- * Returns: 0 on sucess or negative on failure
+ * Returns: 0 on success or negative on failure
  */
 
 int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
@@ -3922,7 +3922,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
 
 	if (i_blocks <= ~0U) {
 		/*
-		 * i_blocks can be represnted in a 32 bit variable
+		 * i_blocks can be represented in a 32 bit variable
 		 * as multiple of 512 bytes
 		 */
 		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
@@ -4083,7 +4083,7 @@ out_brelse:
  *
  * - Within generic_file_write() for O_SYNC files.
  *   Here, there will be no transaction running. We wait for any running
- *   trasnaction to commit.
+ *   transaction to commit.
  *
  * - Within sys_sync(), kupdate and such.
  *   We wait on commit, if tol to.
@@ -4327,7 +4327,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  * worse case, the indexs blocks spread over different block groups
  *
  * If datablocks are discontiguous, they are possible to spread over
- * different block groups too. If they are contiuguous, with flexbg,
+ * different block groups too. If they are contiguous, with flexbg,
  * they could still across block group boundary.
  *
  * Also account for superblock, inode, quota and xattr blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1cd6994fc446..86831411e98b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4705,7 +4705,7 @@ error_return:
  * ext4_group_add_blocks() -- Add given blocks to an existing group
  * @handle:			handle to this transaction
  * @sb:				super block
- * @block:			start physcial block to add to the block group
+ * @block:			start physical block to add to the block group
  * @count:			number of blocks to free
  *
  * This marks the blocks as free in the bitmap and buddy.
-- 
cgit 


From 6f1cbd4a25c58323b57f1374e827c363b44683cb Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Tue, 4 Sep 2012 07:23:35 -0400
Subject: sysfs: Fix comment typo "sysf_create_link".

More pedantry.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/symlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index a7ac78f8e67a..3c9eb5624f5e 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -113,7 +113,7 @@ int sysfs_create_link(struct kobject *kobj, struct kobject *target,
  *	@target:	object we're pointing to.
  *	@name:		name of the symlink.
  *
- *	This function does the same as sysf_create_link(), but it
+ *	This function does the same as sysfs_create_link(), but it
  *	doesn't warn if the link already exists.
  */
 int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
-- 
cgit 


From caaa357dc0582f0d4504c22f1ef2347ad940de1b Mon Sep 17 00:00:00 2001
From: Michael Schutte <michi@uiae.at>
Date: Tue, 4 Sep 2012 22:54:16 -0700
Subject: Input: Add KD[GS]KBDIACRUC ioctls to the compatible list

Allow handling of Unicode compose sequences by 32-bit apps on a 64-bit
system.  The issue has been reported in <http://bugs.debian.org/540534>
and <http://lists.altlinux.org/pipermail/kbd/2009-December/000235.html>.

A formal check of the two affected ioctls in drivers/char/vt_ioctl.c
(introduced in 04c71976) and a test using x86 kbd 1.15.1 on a so patched
x86_64 kernel both confirm that KD[GS]KBDIACRUC are ioctl32()
compatible.

Signed-off-by: Michael Schutte <michi@uiae.at>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 fs/compat_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index debdfe0fc809..edd4ab67cd1b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -897,6 +897,8 @@ COMPATIBLE_IOCTL(KDGKBSENT)
 COMPATIBLE_IOCTL(KDSKBSENT)
 COMPATIBLE_IOCTL(KDGKBDIACR)
 COMPATIBLE_IOCTL(KDSKBDIACR)
+COMPATIBLE_IOCTL(KDGKBDIACRUC)
+COMPATIBLE_IOCTL(KDSKBDIACRUC)
 COMPATIBLE_IOCTL(KDKBDREP)
 COMPATIBLE_IOCTL(KDGKBLED)
 COMPATIBLE_IOCTL(KDGETLED)
-- 
cgit 


From ca1868309be96eb905a71174187a190543a1b90e Mon Sep 17 00:00:00 2001
From: Yanchuan Nian <ycnian@gmail.com>
Date: Wed, 5 Sep 2012 16:31:29 +0800
Subject: vfs: fix kerneldoc for generic_fh_to_parent()

Wrong function name in the kerneldoc description of generic_fh_to_parent().

Signed-off-by: Yanchuan Nian <ycnian@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/libfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/libfs.c b/fs/libfs.c
index a74cb1725ac6..7cc37ca19cd8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
 EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
 
 /**
- * generic_fh_to_dentry - generic helper for the fh_to_parent export operation
+ * generic_fh_to_parent - generic helper for the fh_to_parent export operation
  * @sb:		filesystem to do the file handle conversion on
  * @fid:	file handle to convert
  * @fh_len:	length of the file handle in bytes
-- 
cgit 


From 527a1361387cd132a577dc8a6ae676cfc1e8414b Mon Sep 17 00:00:00 2001
From: Wang Sheng-Hui <shhuiw@gmail.com>
Date: Thu, 6 Sep 2012 13:33:37 +0800
Subject: btrfs: fix trivial typo for the comment of BTRFS_FREE_INO_OBJECTID

It should be storing, not sotring.

Signed-off-by: Wang Sheng-Hui <shhuiw@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/btrfs/ctree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fa5c45b39075..11d0e09e1616 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -113,7 +113,7 @@ struct btrfs_ordered_sum;
 #define BTRFS_FREE_SPACE_OBJECTID -11ULL
 
 /*
- * The inode number assigned to the special inode for sotring
+ * The inode number assigned to the special inode for storing
  * free ino cache
  */
 #define BTRFS_FREE_INO_OBJECTID -12ULL
-- 
cgit 


From 7dc05881b64792e0ea41293e9595cc962a716225 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Apr 2012 14:01:31 -0700
Subject: userns: Convert debugfs to use kuid/kgid where appropriate.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/debugfs/inode.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4733eab34a23..36e2b667e822 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -128,8 +128,8 @@ static inline int debugfs_positive(struct dentry *dentry)
 }
 
 struct debugfs_mount_opts {
-	uid_t uid;
-	gid_t gid;
+	kuid_t uid;
+	kgid_t gid;
 	umode_t mode;
 };
 
@@ -156,6 +156,8 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
 	substring_t args[MAX_OPT_ARGS];
 	int option;
 	int token;
+	kuid_t uid;
+	kgid_t gid;
 	char *p;
 
 	opts->mode = DEBUGFS_DEFAULT_MODE;
@@ -169,12 +171,18 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			opts->uid = option;
+			uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(uid))
+				return -EINVAL;
+			opts->uid = uid;
 			break;
 		case Opt_gid:
 			if (match_octal(&args[0], &option))
 				return -EINVAL;
-			opts->gid = option;
+			gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(gid))
+				return -EINVAL;
+			opts->gid = gid;
 			break;
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
@@ -226,10 +234,12 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
 	struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
 	struct debugfs_mount_opts *opts = &fsi->mount_opts;
 
-	if (opts->uid != 0)
-		seq_printf(m, ",uid=%u", opts->uid);
-	if (opts->gid != 0)
-		seq_printf(m, ",gid=%u", opts->gid);
+	if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+			   from_kuid_munged(&init_user_ns, opts->uid));
+	if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+			   from_kgid_munged(&init_user_ns, opts->gid));
 	if (opts->mode != DEBUGFS_DEFAULT_MODE)
 		seq_printf(m, ",mode=%o", opts->mode);
 
-- 
cgit 


From 2ab51f3721f7abdf92d89cb79d3d6c0062ddc14b Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 23 Mar 2011 17:23:06 -0400
Subject: vfs: extend vfs_removexattr locking

This patch takes the i_mutex lock before security_inode_removexattr(),
instead of after, in preparation of calling ima_inode_removexattr().

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
---
 fs/xattr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xattr.c b/fs/xattr.c
index 4d45b7189e7e..107f457143c3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -295,11 +295,13 @@ vfs_removexattr(struct dentry *dentry, const char *name)
 	if (error)
 		return error;
 
+	mutex_lock(&inode->i_mutex);
 	error = security_inode_removexattr(dentry, name);
-	if (error)
+	if (error) {
+		mutex_unlock(&inode->i_mutex);
 		return error;
+	}
 
-	mutex_lock(&inode->i_mutex);
 	error = inode->i_op->removexattr(dentry, name);
 	mutex_unlock(&inode->i_mutex);
 
-- 
cgit 


From 4199d35cbc90c15db447d115bd96ffa5f1d60d3a Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 16 Mar 2011 22:48:43 -0400
Subject: vfs: move ima_file_free before releasing the file

ima_file_free(), called on __fput(), currently flags files that have
changed, so that the file is re-measured.  For appraising a files's
integrity, the file's hash must be re-calculated and stored in the
'security.ima' xattr to reflect any changes.

This patch moves the ima_file_free() call to before releasing the file
in preparation of ima-appraisal measuring the file and updating the
'security.ima' xattr.

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
---
 fs/file_table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 701985e4ccda..a41f23f90b17 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -243,10 +243,10 @@ static void __fput(struct file *file)
 		if (file->f_op && file->f_op->fasync)
 			file->f_op->fasync(-1, file, 0);
 	}
+	ima_file_free(file);
 	if (file->f_op && file->f_op->release)
 		file->f_op->release(inode, file);
 	security_file_free(file);
-	ima_file_free(file);
 	if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
 		     !(file->f_mode & FMODE_PATH))) {
 		cdev_put(inode->i_cdev);
-- 
cgit 


From 9957a5043e7b0b7361cdf48eea22b2900293e63a Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Wed, 9 Mar 2011 22:57:53 -0500
Subject: ima: add inode_post_setattr call

Changing an inode's metadata may result in our not needing to appraise
the file.  In such cases, we must remove 'security.ima'.

Changelog v1:
- use ima_inode_post_setattr() stub function, if IMA_APPRAISE not configured

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
---
 fs/attr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index 29e38a1f7f77..cce7df53b694 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,6 +14,7 @@
 #include <linux/fcntl.h>
 #include <linux/security.h>
 #include <linux/evm.h>
+#include <linux/ima.h>
 
 /**
  * inode_change_ok - check if attribute changes to an inode are allowed
@@ -247,6 +248,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 
 	if (!error) {
 		fsnotify_change(dentry, ia_valid);
+		ima_inode_post_setattr(dentry);
 		evm_inode_post_setattr(dentry, ia_valid);
 	}
 
-- 
cgit 


From 2b75bc9121e54e22537207b47b71373bcb0be41c Mon Sep 17 00:00:00 2001
From: Sasha Levin <levinsasha928@gmail.com>
Date: Sun, 9 Sep 2012 16:16:58 +0200
Subject: dlm: check the maximum size of a request from user

device_write only checks whether the request size is big enough, but it doesn't
check if the size is too big.

At that point, it also tries to allocate as much memory as the user has requested
even if it's too much. This can lead to OOM killer kicking in, or memory corruption
if (count + 1) overflows.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/user.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index eb4ed9ba3098..7ff49852b0cb 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,6 +503,13 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 #endif
 		return -EINVAL;
 
+#ifdef CONFIG_COMPAT
+	if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN)
+#else
+	if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
+#endif
+		return -EINVAL;
+
 	kbuf = kzalloc(count + 1, GFP_NOFS);
 	if (!kbuf)
 		return -ENOMEM;
-- 
cgit 


From 15e473046cb6e5d18a4d0057e61d76315230382b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 7 Sep 2012 20:12:54 +0000
Subject: netlink: Rename pid to portid to avoid confusion

It is a frequent mistake to confuse the netlink port identifier with a
process identifier.  Try to reduce this confusion by renaming fields
that hold port identifiers portid instead of pid.

I have carefully avoided changing the structures exported to
userspace to avoid changing the userspace API.

I have successfully built an allyesconfig kernel with this change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/netlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ef17e0169da1..60a327863b11 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -14,7 +14,7 @@
 #include "dlm_internal.h"
 
 static uint32_t dlm_nl_seqnum;
-static uint32_t listener_nlpid;
+static uint32_t listener_nlportid;
 
 static struct genl_family family = {
 	.id		= GENL_ID_GENERATE,
@@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb)
 		return rv;
 	}
 
-	return genlmsg_unicast(&init_net, skb, listener_nlpid);
+	return genlmsg_unicast(&init_net, skb, listener_nlportid);
 }
 
 static int user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
-	listener_nlpid = info->snd_pid;
-	printk("user_cmd nlpid %u\n", listener_nlpid);
+	listener_nlportid = info->snd_portid;
+	printk("user_cmd nlpid %u\n", listener_nlportid);
 	return 0;
 }
 
-- 
cgit 


From 4895768b6aab55bbdbebcf2da090cb1a5ccf5463 Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Tue, 11 Sep 2012 16:28:11 -0400
Subject: fs: add missing documentation to simple_xattr functions

v2: add function documentation instead of adding a separate file under
    Documentation/

tj: Updated comment a bit and rolled in Randy's suggestions.

Cc: Li Zefan <lizefan@huawei.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Lennart Poettering <lpoetter@redhat.com>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/xattr.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xattr.c b/fs/xattr.c
index e17e773517ef..f053c1135d0f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -892,8 +892,19 @@ out:
 
 }
 
-/*
- * xattr SET operation for in-memory/pseudo filesystems
+/**
+ * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
+ * @xattrs: target simple_xattr list
+ * @name: name of the new extended attribute
+ * @value: value of the new xattr. If %NULL, will remove the attribute
+ * @size: size of the new xattr
+ * @flags: %XATTR_{CREATE|REPLACE}
+ *
+ * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
+ * with -EEXIST.  If %XATTR_REPLACE is set, the xattr should exist;
+ * otherwise, fails with -ENODATA.
+ *
+ * Returns 0 on success, -errno on failure.
  */
 int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
 		     const void *value, size_t size, int flags)
@@ -950,6 +961,9 @@ ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
 	return used;
 }
 
+/*
+ * Adds an extended attribute to the list
+ */
 void simple_xattr_list_add(struct simple_xattrs *xattrs,
 			   struct simple_xattr *new_xattr)
 {
-- 
cgit 


From b9d6cfdeaf67cc34cdfd53ab234358dd2910a0f4 Mon Sep 17 00:00:00 2001
From: Aristeu Rozanski <aris@redhat.com>
Date: Wed, 12 Sep 2012 10:31:13 -0400
Subject: xattr: mark variable as uninitialized to make both gcc and smatch
 happy

new_xattr in __simple_xattr_set() is only initialized with a valid
pointer if value is not NULL, which only happens if this function is
called directly with the intention to remove an existing extended
attribute. Even being safe to be this way, smatch warns about possible
NULL dereference. Dan Carpenter suggested using uninitialized_var()
which will make both gcc and smatch happy.

Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xattr.c b/fs/xattr.c
index f053c1135d0f..014f11321fd9 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -845,7 +845,7 @@ static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
 			      const void *value, size_t size, int flags)
 {
 	struct simple_xattr *xattr;
-	struct simple_xattr *new_xattr = NULL;
+	struct simple_xattr *uninitialized_var(new_xattr);
 	int err = 0;
 
 	/* value == NULL means remove */
-- 
cgit 


From 0753f70f07fbbd23a48d61ffea37028bd0bd6c7d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 19 Mar 2012 15:13:51 +0000
Subject: fs: Build sys_stat64() and friends if __ARCH_WANT_COMPAT_STAT64

On AArch64 Linux, we want the sys_stat64() and related functions for
compat support but do not need the generic struct stat64, enabled
automatically if __ARCH_WANT_STAT64.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 fs/stat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/stat.c b/fs/stat.c
index b6ff11825fc8..6126c5da22e7 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -326,7 +326,7 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
 
 
 /* ---------- LFS-64 ----------- */
-#ifdef __ARCH_WANT_STAT64
+#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
 
 #ifndef INIT_STRUCT_STAT64_PADDING
 #  define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st))
@@ -415,7 +415,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 		return error;
 	return cp_new_stat64(&stat, statbuf);
 }
-#endif /* __ARCH_WANT_STAT64 */
+#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
 void __inode_add_bytes(struct inode *inode, loff_t bytes)
-- 
cgit 


From b40c2e665cd552eae5fbdbb878bc29a34357668e Mon Sep 17 00:00:00 2001
From: Tino Reichardt <milky-kernel@mcmilk.de>
Date: Mon, 17 Sep 2012 11:58:19 -0500
Subject: fs/jfs: TRIM support for JFS Filesystem

This patch adds support for the two linux interfaces of the discard/TRIM
command for SSD devices and sparse/thinly-provisioned LUNs.

JFS will support batched discard via FITRIM ioctl and online discard
with the discard mount option.

Signed-off-by: Tino Reichardt <list-jfs@mcmilk.de>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
---
 fs/jfs/Makefile      |   2 +-
 fs/jfs/ioctl.c       |  43 +++++++++++++++++-
 fs/jfs/jfs_discard.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/jfs/jfs_discard.h |  26 +++++++++++
 fs/jfs/jfs_dmap.c    | 125 +++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/jfs/jfs_dmap.h    |   2 +
 fs/jfs/jfs_filsys.h  |   3 ++
 fs/jfs/jfs_incore.h  |   1 +
 fs/jfs/super.c       |  71 ++++++++++++++++++++++-------
 9 files changed, 369 insertions(+), 21 deletions(-)
 create mode 100644 fs/jfs/jfs_discard.c
 create mode 100644 fs/jfs/jfs_discard.h

(limited to 'fs')

diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index a58fa72d7e59..d20d4737b3ef 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_JFS_FS) += jfs.o
 
 jfs-y    := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
 	    jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
-	    jfs_unicode.o jfs_dtree.o jfs_inode.o \
+	    jfs_unicode.o jfs_dtree.o jfs_inode.o jfs_discard.o \
 	    jfs_extent.o symlink.o jfs_metapage.o \
 	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
 	    resize.o xattr.o ioctl.o
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index f19d1e04a374..bc555ff417e9 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -11,13 +11,17 @@
 #include <linux/mount.h>
 #include <linux/time.h>
 #include <linux/sched.h>
+#include <linux/blkdev.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 
+#include "jfs_filsys.h"
+#include "jfs_debug.h"
 #include "jfs_incore.h"
 #include "jfs_dinode.h"
 #include "jfs_inode.h"
-
+#include "jfs_dmap.h"
+#include "jfs_discard.h"
 
 static struct {
 	long jfs_flag;
@@ -123,6 +127,40 @@ setflags_out:
 		mnt_drop_write_file(filp);
 		return err;
 	}
+
+	case FITRIM:
+	{
+		struct super_block *sb = inode->i_sb;
+		struct request_queue *q = bdev_get_queue(sb->s_bdev);
+		struct fstrim_range range;
+		s64 ret = 0;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (!blk_queue_discard(q)) {
+			jfs_warn("FITRIM not supported on device");
+			return -EOPNOTSUPP;
+		}
+
+		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
+		    sizeof(range)))
+			return -EFAULT;
+
+		range.minlen = max_t(unsigned int, range.minlen,
+			q->limits.discard_granularity);
+
+		ret = jfs_ioc_trim(inode, &range);
+		if (ret < 0)
+			return ret;
+
+		if (copy_to_user((struct fstrim_range __user *)arg, &range,
+		    sizeof(range)))
+			return -EFAULT;
+
+		return 0;
+	}
+
 	default:
 		return -ENOTTY;
 	}
@@ -142,6 +180,9 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	case JFS_IOC_SETFLAGS32:
 		cmd = JFS_IOC_SETFLAGS;
 		break;
+	case FITRIM:
+		cmd = FITRIM;
+		break;
 	}
 	return jfs_ioctl(filp, cmd, arg);
 }
diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c
new file mode 100644
index 000000000000..9947563e4175
--- /dev/null
+++ b/fs/jfs/jfs_discard.c
@@ -0,0 +1,117 @@
+/*
+ *   Copyright (C) Tino Reichardt, 2012
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+
+#include "jfs_incore.h"
+#include "jfs_superblock.h"
+#include "jfs_discard.h"
+#include "jfs_dmap.h"
+#include "jfs_debug.h"
+
+
+/*
+ * NAME:	jfs_issue_discard()
+ *
+ * FUNCTION:	TRIM the specified block range on device, if supported
+ *
+ * PARAMETERS:
+ *	ip	- pointer to in-core inode
+ *	blkno	- starting block number to be trimmed (0..N)
+ *	nblocks	- number of blocks to be trimmed
+ *
+ * RETURN VALUES:
+ *	none
+ *
+ * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
+ */
+void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks)
+{
+	struct super_block *sb = ip->i_sb;
+	int r = 0;
+
+	r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0);
+	if (unlikely(r != 0)) {
+		jfs_err("JFS: sb_issue_discard" \
+			"(%p, %llu, %llu, GFP_NOFS, 0) = %d => failed!\n",
+			sb, (unsigned long long)blkno,
+			(unsigned long long)nblocks, r);
+	}
+
+	jfs_info("JFS: sb_issue_discard" \
+		"(%p, %llu, %llu, GFP_NOFS, 0) = %d\n",
+		sb, (unsigned long long)blkno,
+		(unsigned long long)nblocks, r);
+
+	return;
+}
+
+/*
+ * NAME:	jfs_ioc_trim()
+ *
+ * FUNCTION:	attempt to discard (TRIM) all free blocks from the
+ *              filesystem.
+ *
+ * PARAMETERS:
+ *	ip	- pointer to in-core inode;
+ *	range	- the range, given by user space
+ *
+ * RETURN VALUES:
+ *	0	- success
+ *	-EIO	- i/o error
+ */
+int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
+{
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
+	struct super_block *sb = ipbmap->i_sb;
+	int agno, agno_end;
+	s64 start, end, minlen;
+	u64 trimmed = 0;
+
+	/**
+	 * convert byte values to block size of filesystem:
+	 * start:	First Byte to trim
+	 * len:		number of Bytes to trim from start
+	 * minlen:	minimum extent length in Bytes
+	 */
+	start = range->start >> sb->s_blocksize_bits;
+	if (start < 0)
+		start = 0;
+	end = start + (range->len >> sb->s_blocksize_bits) - 1;
+	if (end >= bmp->db_mapsize)
+		end = bmp->db_mapsize - 1;
+	minlen = range->minlen >> sb->s_blocksize_bits;
+	if (minlen <= 0)
+		minlen = 1;
+
+	/**
+	 * we trim all ag's within the range
+	 */
+	agno = BLKTOAG(start, JFS_SBI(ip->i_sb));
+	agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb));
+	while (agno <= agno_end) {
+		trimmed += dbDiscardAG(ip, agno, minlen);
+		agno++;
+	}
+	range->len = trimmed << sb->s_blocksize_bits;
+
+	return 0;
+}
diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h
new file mode 100644
index 000000000000..40d1ee6081a0
--- /dev/null
+++ b/fs/jfs/jfs_discard.h
@@ -0,0 +1,26 @@
+/*
+ *   Copyright (C) Tino Reichardt, 2012
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _H_JFS_DISCARD
+#define _H_JFS_DISCARD
+
+struct fstrim_range;
+
+extern void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks);
+extern int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range);
+
+#endif /* _H_JFS_DISCARD */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 9cbd11a3f804..174feb6a73c1 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1,5 +1,6 @@
 /*
  *   Copyright (C) International Business Machines Corp., 2000-2004
+ *   Portions Copyright (C) Tino Reichardt, 2012
  *
  *   This program is free software;  you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -25,6 +26,7 @@
 #include "jfs_lock.h"
 #include "jfs_metapage.h"
 #include "jfs_debug.h"
+#include "jfs_discard.h"
 
 /*
  *	SERIALIZATION of the Block Allocation Map.
@@ -104,7 +106,6 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 		      int nblocks);
 static int dbMaxBud(u8 * cp);
-s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
 static int blkstol2(s64 nb);
 
 static int cntlz(u32 value);
@@ -145,7 +146,6 @@ static const s8 budtab[256] = {
 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
 };
 
-
 /*
  * NAME:	dbMount()
  *
@@ -310,7 +310,6 @@ int dbSync(struct inode *ipbmap)
 	return (0);
 }
 
-
 /*
  * NAME:	dbFree()
  *
@@ -337,6 +336,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 	s64 lblkno, rem;
 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
+	struct super_block *sb = ipbmap->i_sb;
 
 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
 
@@ -351,6 +351,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 		return -EIO;
 	}
 
+	/**
+	 * TRIM the blocks, when mounted with discard option
+	 */
+	if (JFS_SBI(sb)->flag & JFS_DISCARD)
+		if (JFS_SBI(sb)->minblks_trim <= nblocks)
+			jfs_issue_discard(ipbmap, blkno, nblocks);
+
 	/*
 	 * free the blocks a dmap at a time.
 	 */
@@ -1095,7 +1102,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 		/* we were not successful */
 		release_metapage(mp);
 
-
 	return (rc);
 }
 
@@ -1589,6 +1595,117 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
 }
 
 
+/*
+ * NAME:	dbDiscardAG()
+ *
+ * FUNCTION:	attempt to discard (TRIM) all free blocks of specific AG
+ *
+ *		algorithm:
+ *		1) allocate blocks, as large as possible and save them
+ *		   while holding IWRITE_LOCK on ipbmap
+ *		2) trim all these saved block/length values
+ *		3) mark the blocks free again
+ *
+ *		benefit:
+ *		- we work only on one ag at some time, minimizing how long we
+ *		  need to lock ipbmap
+ *		- reading / writing the fs is possible most time, even on
+ *		  trimming
+ *
+ *		downside:
+ *		- we write two times to the dmapctl and dmap pages
+ *		- but for me, this seems the best way, better ideas?
+ *		/TR 2012
+ *
+ * PARAMETERS:
+ *	ip	- pointer to in-core inode
+ *	agno	- ag to trim
+ *	minlen	- minimum value of contiguous blocks
+ *
+ * RETURN VALUES:
+ *	s64	- actual number of blocks trimmed
+ */
+s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
+{
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
+	s64 nblocks, blkno;
+	u64 trimmed = 0;
+	int rc, l2nb;
+	struct super_block *sb = ipbmap->i_sb;
+
+	struct range2trim {
+		u64 blkno;
+		u64 nblocks;
+	} *totrim, *tt;
+
+	/* max blkno / nblocks pairs to trim */
+	int count = 0, range_cnt;
+
+	/* prevent others from writing new stuff here, while trimming */
+	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
+
+	nblocks = bmp->db_agfree[agno];
+	range_cnt = nblocks;
+	do_div(range_cnt, (int)minlen);
+	range_cnt = min(range_cnt + 1, 32 * 1024);
+	totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS);
+	if (totrim == NULL) {
+		jfs_error(bmp->db_ipbmap->i_sb,
+			  "dbDiscardAG: no memory for trim array");
+		IWRITE_UNLOCK(ipbmap);
+		return 0;
+	}
+
+	tt = totrim;
+	while (nblocks >= minlen) {
+		l2nb = BLKSTOL2(nblocks);
+
+		/* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */
+		rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno);
+		if (rc == 0) {
+			tt->blkno = blkno;
+			tt->nblocks = nblocks;
+			tt++; count++;
+
+			/* the whole ag is free, trim now */
+			if (bmp->db_agfree[agno] == 0)
+				break;
+
+			/* give a hint for the next while */
+			nblocks = bmp->db_agfree[agno];
+			continue;
+		} else if (rc == -ENOSPC) {
+			/* search for next smaller log2 block */
+			l2nb = BLKSTOL2(nblocks) - 1;
+			nblocks = 1 << l2nb;
+		} else {
+			/* Trim any already allocated blocks */
+			jfs_error(bmp->db_ipbmap->i_sb,
+				"dbDiscardAG: -EIO");
+			break;
+		}
+
+		/* check, if our trim array is full */
+		if (unlikely(count >= range_cnt - 1))
+			break;
+	}
+	IWRITE_UNLOCK(ipbmap);
+
+	tt->nblocks = 0; /* mark the current end */
+	for (tt = totrim; tt->nblocks != 0; tt++) {
+		/* when mounted with online discard, dbFree() will
+		 * call jfs_issue_discard() itself */
+		if (!(JFS_SBI(sb)->flag & JFS_DISCARD))
+			jfs_issue_discard(ip, tt->blkno, tt->nblocks);
+		dbFree(ip, tt->blkno, tt->nblocks);
+		trimmed += tt->nblocks;
+	}
+	kfree(totrim);
+
+	return trimmed;
+}
+
 /*
  * NAME:	dbFindCtl()
  *
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 6dcb906c55d8..562b9a7e4311 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -311,4 +311,6 @@ extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks);
 extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks);
 extern void dbFinalizeBmap(struct inode *ipbmap);
 extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
+extern s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen);
+
 #endif				/* _H_JFS_DMAP */
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index b3f5463fbe52..b67d64671bb4 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -45,6 +45,9 @@
 /* mount time flag to disable journaling to disk */
 #define JFS_NOINTEGRITY 0x00000040
 
+/* mount time flag to enable TRIM to ssd disks */
+#define JFS_DISCARD     0x00000080
+
 /* commit option */
 #define	JFS_COMMIT	0x00000f00	/* commit option mask */
 #define	JFS_GROUPCOMMIT	0x00000100	/* group (of 1) commit */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 584a4a1a6e81..4fa958ae1986 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -195,6 +195,7 @@ struct jfs_sb_info {
 	uint		uid;		/* uid to override on-disk uid */
 	uint		gid;		/* gid to override on-disk gid */
 	uint		umask;		/* umask to override on-disk umask */
+	uint		minblks_trim;	/* minimum blocks, for online trim */
 };
 
 /* jfs_sb_info commit_state */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index c55c7452d285..6f4ac1c070f0 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
+#include <linux/blkdev.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
@@ -100,7 +101,7 @@ void jfs_error(struct super_block *sb, const char * function, ...)
 	vsnprintf(error_buf, sizeof(error_buf), function, args);
 	va_end(args);
 
-	printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf);
+	pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf);
 
 	jfs_handle_error(sb);
 }
@@ -197,7 +198,8 @@ static void jfs_put_super(struct super_block *sb)
 enum {
 	Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
 	Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
-	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
+	Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
+	Opt_discard, Opt_nodiscard, Opt_discard_minblk
 };
 
 static const match_table_t tokens = {
@@ -214,6 +216,9 @@ static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_umask, "umask=%u"},
+	{Opt_discard, "discard"},
+	{Opt_nodiscard, "nodiscard"},
+	{Opt_discard_minblk, "discard=%u"},
 	{Opt_err, NULL}
 };
 
@@ -255,8 +260,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			else {
 				nls_map = load_nls(args[0].from);
 				if (!nls_map) {
-					printk(KERN_ERR
-					       "JFS: charset not found\n");
+					pr_err("JFS: charset not found\n");
 					goto cleanup;
 				}
 			}
@@ -272,8 +276,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			*newLVSize = sb->s_bdev->bd_inode->i_size >>
 				sb->s_blocksize_bits;
 			if (*newLVSize == 0)
-				printk(KERN_ERR
-				       "JFS: Cannot determine volume size\n");
+				pr_err("JFS: Cannot determine volume size\n");
 			break;
 		}
 		case Opt_errors:
@@ -294,8 +297,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 				*flag &= ~JFS_ERR_REMOUNT_RO;
 				*flag |= JFS_ERR_PANIC;
 			} else {
-				printk(KERN_ERR
-				       "JFS: %s is an invalid error handler\n",
+				pr_err("JFS: %s is an invalid error handler\n",
 				       errors);
 				goto cleanup;
 			}
@@ -314,8 +316,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 		case Opt_usrquota:
 		case Opt_grpquota:
 		case Opt_quota:
-			printk(KERN_ERR
-			       "JFS: quota operations not supported\n");
+			pr_err("JFS: quota operations not supported\n");
 			break;
 #endif
 		case Opt_uid:
@@ -324,23 +325,61 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 			sbi->uid = simple_strtoul(uid, &uid, 0);
 			break;
 		}
+
 		case Opt_gid:
 		{
 			char *gid = args[0].from;
 			sbi->gid = simple_strtoul(gid, &gid, 0);
 			break;
 		}
+
 		case Opt_umask:
 		{
 			char *umask = args[0].from;
 			sbi->umask = simple_strtoul(umask, &umask, 8);
 			if (sbi->umask & ~0777) {
-				printk(KERN_ERR
-				       "JFS: Invalid value of umask\n");
+				pr_err("JFS: Invalid value of umask\n");
 				goto cleanup;
 			}
 			break;
 		}
+
+		case Opt_discard:
+		{
+			struct request_queue *q = bdev_get_queue(sb->s_bdev);
+			/* if set to 1, even copying files will cause
+			 * trimming :O
+			 * -> user has more control over the online trimming
+			 */
+			sbi->minblks_trim = 64;
+			if (blk_queue_discard(q)) {
+				*flag |= JFS_DISCARD;
+			} else {
+				pr_err("JFS: discard option " \
+					"not supported on device\n");
+			}
+			break;
+		}
+
+		case Opt_nodiscard:
+			*flag &= ~JFS_DISCARD;
+			break;
+
+		case Opt_discard_minblk:
+		{
+			struct request_queue *q = bdev_get_queue(sb->s_bdev);
+			char *minblks_trim = args[0].from;
+			if (blk_queue_discard(q)) {
+				*flag |= JFS_DISCARD;
+				sbi->minblks_trim = simple_strtoull(
+					minblks_trim, &minblks_trim, 0);
+			} else {
+				pr_err("JFS: discard option " \
+					"not supported on device\n");
+			}
+			break;
+		}
+
 		default:
 			printk("jfs: Unrecognized mount option \"%s\" "
 					" or missing value\n", p);
@@ -374,8 +413,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 
 	if (newLVSize) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_ERR
-		  "JFS: resize requires volume to be mounted read-write\n");
+			pr_err("JFS: resize requires volume" \
+				" to be mounted read-write\n");
 			return -EROFS;
 		}
 		rc = jfs_extendfs(sb, newLVSize, 0);
@@ -457,7 +496,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 #endif
 
 	if (newLVSize) {
-		printk(KERN_ERR "resize option for remount only\n");
+		pr_err("resize option for remount only\n");
 		goto out_kfree;
 	}
 
@@ -625,6 +664,8 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",umask=%03o", sbi->umask);
 	if (sbi->flag & JFS_NOINTEGRITY)
 		seq_puts(seq, ",nointegrity");
+	if (sbi->flag & JFS_DISCARD)
+		seq_printf(seq, ",discard=%u", sbi->minblks_trim);
 	if (sbi->nls_tab)
 		seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset);
 	if (sbi->flag & JFS_ERR_CONTINUE)
-- 
cgit 


From 550d6da288df57f154ca27c4acb1c398ced42ea9 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Thu, 6 Sep 2012 15:33:09 +0800
Subject: JFS: use list_move instead of list_del/list_add

Using list_move() instead of list_del() + list_add().

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
---
 fs/jfs/jfs_txnmgr.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index bb8b661bcc50..5fcc02eaa64c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2977,12 +2977,9 @@ int jfs_sync(void *arg)
 				 * put back on the anon_list.
 				 */
 
-				/* Take off anon_list */
-				list_del(&jfs_ip->anon_inode_list);
-
-				/* Put on anon_list2 */
-				list_add(&jfs_ip->anon_inode_list,
-					 &TxAnchor.anon_list2);
+				/* Move from anon_list to anon_list2 */
+				list_move(&jfs_ip->anon_inode_list,
+					  &TxAnchor.anon_list2);
 
 				TXN_UNLOCK();
 				iput(ip);
-- 
cgit 


From 4026c9fde9c67266932afd209e25bfef4474a1be Mon Sep 17 00:00:00 2001
From: Ben Myers <bpm@sgi.com>
Date: Thu, 13 Sep 2012 16:18:47 -0500
Subject: xfs: stop the sync worker before xfs_unmountfs

Cancel work of the xfs_sync_worker before teardown of the log in
xfs_unmountfs.  This prevents occasional crashes on unmount like so:

PID: 21602  TASK: ee9df060  CPU: 0   COMMAND: "kworker/0:3"
 #0 [c5377d28] crash_kexec at c0292c94
 #1 [c5377d80] oops_end at c07090c2
 #2 [c5377d98] no_context at c06f614e
 #3 [c5377dbc] __bad_area_nosemaphore at c06f6281
 #4 [c5377df4] bad_area_nosemaphore at c06f629b
 #5 [c5377e00] do_page_fault at c070b0cb
 #6 [c5377e7c] error_code (via page_fault) at c070892c
    EAX: f300c6a8  EBX: f300c6a8  ECX: 000000c0  EDX: 000000c0  EBP: c5377ed0
    DS:  007b      ESI: 00000000  ES:  007b      EDI: 00000001  GS:  ffffad20
    CS:  0060      EIP: c0481ad0  ERR: ffffffff  EFLAGS: 00010246
 #7 [c5377eb0] atomic64_read_cx8 at c0481ad0
 #8 [c5377ebc] xlog_assign_tail_lsn_locked at f7cc7c6e [xfs]
 #9 [c5377ed4] xfs_trans_ail_delete_bulk at f7ccd520 [xfs]
#10 [c5377f0c] xfs_buf_iodone at f7ccb602 [xfs]
#11 [c5377f24] xfs_buf_do_callbacks at f7cca524 [xfs]
#12 [c5377f30] xfs_buf_iodone_callbacks at f7cca5da [xfs]
#13 [c5377f4c] xfs_buf_iodone_work at f7c718d0 [xfs]
#14 [c5377f58] process_one_work at c024ee4c
#15 [c5377f98] worker_thread at c024f43d
#16 [c5377fbc] kthread at c025326b
#17 [c5377fe8] kernel_thread_helper at c070e834

PID: 26653  TASK: e79143b0  CPU: 3   COMMAND: "umount"
 #0 [cde0fda0] __schedule at c0706595
 #1 [cde0fe28] schedule at c0706b89
 #2 [cde0fe30] schedule_timeout at c0705600
 #3 [cde0fe94] __down_common at c0706098
 #4 [cde0fec8] __down at c0706122
 #5 [cde0fed0] down at c025936f
 #6 [cde0fee0] xfs_buf_lock at f7c7131d [xfs]
 #7 [cde0ff00] xfs_freesb at f7cc2236 [xfs]
 #8 [cde0ff10] xfs_fs_put_super at f7c80f21 [xfs]
 #9 [cde0ff1c] generic_shutdown_super at c0333d7a
#10 [cde0ff38] kill_block_super at c0333e0f
#11 [cde0ff48] deactivate_locked_super at c0334218
#12 [cde0ff58] deactivate_super at c033495d
#13 [cde0ff68] mntput_no_expire at c034bc13
#14 [cde0ff7c] sys_umount at c034cc69
#15 [cde0ffa0] sys_oldumount at c034ccd4
#16 [cde0ffb0] system_call at c0707e66

commit 11159a05 added this to xfs_log_unmount and needs to be cleaned up
at a later date.

Signed-off-by: Ben Myers <bpm@sgi.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
---
 fs/xfs/xfs_super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a2..19e2380fb867 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -919,6 +919,7 @@ xfs_fs_put_super(
 	struct xfs_mount	*mp = XFS_M(sb);
 
 	xfs_filestream_unmount(mp);
+	cancel_delayed_work_sync(&mp->m_sync_work);
 	xfs_unmountfs(mp);
 	xfs_syncd_stop(mp);
 	xfs_freesb(mp);
-- 
cgit 


From e1760bd5ffae8cb98cffb030ee8e631eba28f3d8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 10 Sep 2012 22:39:43 -0700
Subject: userns: Convert the audit loginuid  to be a kuid

Always store audit loginuids in type kuid_t.

Print loginuids by converting them into uids in the appropriate user
namespace, and then printing the resulting uid.

Modify audit_get_loginuid to return a kuid_t.

Modify audit_set_loginuid to take a kuid_t.

Modify /proc/<pid>/loginuid on read to convert the loginuid into the
user namespace of the opener of the file.

Modify /proc/<pid>/loginud on write to convert the loginuid
rom the user namespace of the opener of the file.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Paris <eparis@redhat.com>
Cc: Paul Moore <paul@paul-moore.com> ?
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/base.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b6c84cbdb73..138cff4b05dd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1089,7 +1089,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 	if (!task)
 		return -ESRCH;
 	length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
-				audit_get_loginuid(task));
+			   from_kuid(file->f_cred->user_ns,
+				     audit_get_loginuid(task)));
 	put_task_struct(task);
 	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
 }
@@ -1101,6 +1102,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 	char *page, *tmp;
 	ssize_t length;
 	uid_t loginuid;
+	kuid_t kloginuid;
 
 	rcu_read_lock();
 	if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
@@ -1130,7 +1132,13 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 		goto out_free_page;
 
 	}
-	length = audit_set_loginuid(loginuid);
+	kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+	if (!uid_valid(kloginuid)) {
+		length = -EINVAL;
+		goto out_free_page;
+	}
+
+	length = audit_set_loginuid(kloginuid);
 	if (likely(length == 0))
 		length = count;
 
-- 
cgit 


From 2f6f0654ab61961fd0f7701fe3be89ea111f0cda Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 18:52:57 -0800
Subject: userns: Convert vfs posix_acl support to use kuids and kgids

- In setxattr if we are setting a posix acl convert uids and gids from
  the current user namespace into the initial user namespace, before
  the xattrs are passed to the underlying filesystem.

  Untranslatable uids and gids are represented as -1 which
  posix_acl_from_xattr will represent as INVALID_UID or INVALID_GID.
  posix_acl_valid will fail if an acl from userspace has any
  INVALID_UID or INVALID_GID values.  In net this guarantees that
  untranslatable posix acls will not be stored by filesystems.

- In getxattr if we are reading a posix acl convert uids and gids from
  the initial user namespace into the current user namespace.

  Uids and gids that can not be tranlsated into the current user namespace
  will be represented as -1.

- Replace e_id in struct posix_acl_entry with an anymouns union of
  e_uid and e_gid.  For the short term retain the e_id field
  until all of the users are converted.

- Don't set struct posix_acl.e_id in the cases where the acl type
  does not use e_id.  Greatly reducing the use of ACL_UNDEFINED_ID.

- Rework the ordering checks in posix_acl_valid so that I use kuid_t
  and kgid_t types throughout the code, and so that I don't need
  arithmetic on uid and gid types.

Cc: Theodore Tso <tytso@mit.edu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/posix_acl.c | 30 ++++++++++----------
 fs/xattr.c     |  7 +++++
 fs/xattr_acl.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 107 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 5e325a42e33d..8bd2135b7f82 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -78,7 +78,8 @@ posix_acl_valid(const struct posix_acl *acl)
 {
 	const struct posix_acl_entry *pa, *pe;
 	int state = ACL_USER_OBJ;
-	unsigned int id = 0;  /* keep gcc happy */
+	kuid_t prev_uid = INVALID_UID;
+	kgid_t prev_gid = INVALID_GID;
 	int needs_mask = 0;
 
 	FOREACH_ACL_ENTRY(pa, acl, pe) {
@@ -87,7 +88,6 @@ posix_acl_valid(const struct posix_acl *acl)
 		switch (pa->e_tag) {
 			case ACL_USER_OBJ:
 				if (state == ACL_USER_OBJ) {
-					id = 0;
 					state = ACL_USER;
 					break;
 				}
@@ -96,16 +96,17 @@ posix_acl_valid(const struct posix_acl *acl)
 			case ACL_USER:
 				if (state != ACL_USER)
 					return -EINVAL;
-				if (pa->e_id == ACL_UNDEFINED_ID ||
-				    pa->e_id < id)
+				if (!uid_valid(pa->e_uid))
 					return -EINVAL;
-				id = pa->e_id + 1;
+				if (uid_valid(prev_uid) &&
+				    uid_lte(pa->e_uid, prev_uid))
+					return -EINVAL;
+				prev_uid = pa->e_uid;
 				needs_mask = 1;
 				break;
 
 			case ACL_GROUP_OBJ:
 				if (state == ACL_USER) {
-					id = 0;
 					state = ACL_GROUP;
 					break;
 				}
@@ -114,10 +115,12 @@ posix_acl_valid(const struct posix_acl *acl)
 			case ACL_GROUP:
 				if (state != ACL_GROUP)
 					return -EINVAL;
-				if (pa->e_id == ACL_UNDEFINED_ID ||
-				    pa->e_id < id)
+				if (!gid_valid(pa->e_gid))
+					return -EINVAL;
+				if (gid_valid(prev_gid) &&
+				    gid_lte(pa->e_gid, prev_gid))
 					return -EINVAL;
-				id = pa->e_id + 1;
+				prev_gid = pa->e_gid;
 				needs_mask = 1;
 				break;
 
@@ -195,15 +198,12 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
 		return ERR_PTR(-ENOMEM);
 
 	acl->a_entries[0].e_tag  = ACL_USER_OBJ;
-	acl->a_entries[0].e_id   = ACL_UNDEFINED_ID;
 	acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6;
 
 	acl->a_entries[1].e_tag  = ACL_GROUP_OBJ;
-	acl->a_entries[1].e_id   = ACL_UNDEFINED_ID;
 	acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3;
 
 	acl->a_entries[2].e_tag  = ACL_OTHER;
-	acl->a_entries[2].e_id   = ACL_UNDEFINED_ID;
 	acl->a_entries[2].e_perm = (mode & S_IRWXO);
 	return acl;
 }
@@ -224,11 +224,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
                 switch(pa->e_tag) {
                         case ACL_USER_OBJ:
 				/* (May have been checked already) */
-				if (inode->i_uid == current_fsuid())
+				if (uid_eq(inode->i_uid, current_fsuid()))
                                         goto check_perm;
                                 break;
                         case ACL_USER:
-				if (pa->e_id == current_fsuid())
+				if (uid_eq(pa->e_uid, current_fsuid()))
                                         goto mask;
 				break;
                         case ACL_GROUP_OBJ:
@@ -239,7 +239,7 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
                                 }
 				break;
                         case ACL_GROUP:
-                                if (in_group_p(pa->e_id)) {
+				if (in_group_p(pa->e_gid)) {
 					found = 1;
 					if ((pa->e_perm & want) == want)
 						goto mask;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4d45b7189e7e..c111745c2da9 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -20,6 +20,7 @@
 #include <linux/fsnotify.h>
 #include <linux/audit.h>
 #include <linux/vmalloc.h>
+#include <linux/posix_acl_xattr.h>
 
 #include <asm/uaccess.h>
 
@@ -347,6 +348,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
 			error = -EFAULT;
 			goto out;
 		}
+		if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+		    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
+			posix_acl_fix_xattr_from_user(kvalue, size);
 	}
 
 	error = vfs_setxattr(d, kname, kvalue, size, flags);
@@ -450,6 +454,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
 
 	error = vfs_getxattr(d, kname, kvalue, size);
 	if (error > 0) {
+		if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
+		    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
+			posix_acl_fix_xattr_to_user(kvalue, size);
 		if (size && copy_to_user(value, kvalue, error))
 			error = -EFAULT;
 	} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index 69d06b07b169..bf472ca1b348 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -9,7 +9,65 @@
 #include <linux/fs.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/gfp.h>
+#include <linux/user_namespace.h>
 
+/*
+ * Fix up the uids and gids in posix acl extended attributes in place.
+ */
+static void posix_acl_fix_xattr_userns(
+	struct user_namespace *to, struct user_namespace *from,
+	void *value, size_t size)
+{
+	posix_acl_xattr_header *header = (posix_acl_xattr_header *)value;
+	posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end;
+	int count;
+	kuid_t uid;
+	kgid_t gid;
+
+	if (!value)
+		return;
+	if (size < sizeof(posix_acl_xattr_header))
+		return;
+	if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
+		return;
+
+	count = posix_acl_xattr_count(size);
+	if (count < 0)
+		return;
+	if (count == 0)
+		return;
+
+	for (end = entry + count; entry != end; entry++) {
+		switch(le16_to_cpu(entry->e_tag)) {
+		case ACL_USER:
+			uid = make_kuid(from, le32_to_cpu(entry->e_id));
+			entry->e_id = cpu_to_le32(from_kuid(to, uid));
+			break;
+		case ACL_GROUP:
+			gid = make_kgid(from, le32_to_cpu(entry->e_id));
+			entry->e_id = cpu_to_le32(from_kuid(to, uid));
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+void posix_acl_fix_xattr_from_user(void *value, size_t size)
+{
+	struct user_namespace *user_ns = current_user_ns();
+	if (user_ns == &init_user_ns)
+		return;
+	posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
+}
+
+void posix_acl_fix_xattr_to_user(void *value, size_t size)
+{
+	struct user_namespace *user_ns = current_user_ns();
+	if (user_ns == &init_user_ns)
+		return;
+	posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
+}
 
 /*
  * Convert from extended attribute to in-memory representation.
@@ -50,12 +108,21 @@ posix_acl_from_xattr(const void *value, size_t size)
 			case ACL_GROUP_OBJ:
 			case ACL_MASK:
 			case ACL_OTHER:
-				acl_e->e_id = ACL_UNDEFINED_ID;
 				break;
 
 			case ACL_USER:
+				acl_e->e_uid =
+					make_kuid(&init_user_ns,
+						  le32_to_cpu(entry->e_id));
+				if (!uid_valid(acl_e->e_uid))
+					goto fail;
+				break;
 			case ACL_GROUP:
-				acl_e->e_id = le32_to_cpu(entry->e_id);
+				acl_e->e_gid =
+					make_kgid(&init_user_ns,
+						  le32_to_cpu(entry->e_id));
+				if (!gid_valid(acl_e->e_gid))
+					goto fail;
 				break;
 
 			default:
@@ -89,9 +156,22 @@ posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size)
 	ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
 
 	for (n=0; n < acl->a_count; n++, ext_entry++) {
-		ext_entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
-		ext_entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-		ext_entry->e_id   = cpu_to_le32(acl->a_entries[n].e_id);
+		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
+		ext_entry->e_tag  = cpu_to_le16(acl_e->e_tag);
+		ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
+		switch(acl_e->e_tag) {
+		case ACL_USER:
+			ext_entry->e_id =
+				cpu_to_le32(from_kuid(&init_user_ns, acl_e->e_uid));
+			break;
+		case ACL_GROUP:
+			ext_entry->e_id =
+				cpu_to_le32(from_kgid(&init_user_ns, acl_e->e_gid));
+			break;
+		default:
+			ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
+			break;
+		}
 	}
 	return real_size;
 }
-- 
cgit 


From 5f3a4a28ec140a90e6058d1d09f6b1f235d485e5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 10 Sep 2012 20:17:44 -0700
Subject: userns: Pass a userns parameter into posix_acl_to_xattr and
 posix_acl_from_xattr

 - Pass the user namespace the uid and gid values in the xattr are stored
   in into posix_acl_from_xattr.

 - Pass the user namespace kuid and kgid values should be converted into
   when storing uid and gid values in an xattr in posix_acl_to_xattr.

- Modify all callers of posix_acl_from_xattr and posix_acl_to_xattr to
  pass in &init_user_ns.

In the short term this change is not strictly needed but it makes the
code clearer.  In the longer term this change is necessary to be able to
mount filesystems outside of the initial user namespace that natively
store posix acls in the linux xattr format.

Cc: Theodore Tso <tytso@mit.edu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/9p/acl.c             |  8 ++++----
 fs/btrfs/acl.c          |  8 ++++----
 fs/ext2/acl.c           |  4 ++--
 fs/ext3/acl.c           |  4 ++--
 fs/ext4/acl.c           |  4 ++--
 fs/generic_acl.c        |  4 ++--
 fs/gfs2/acl.c           | 14 +++++++-------
 fs/jffs2/acl.c          |  4 ++--
 fs/jfs/acl.c            |  4 ++--
 fs/jfs/xattr.c          |  4 ++--
 fs/nfs/nfs3acl.c        |  4 ++--
 fs/nfsd/vfs.c           |  8 ++++----
 fs/ocfs2/acl.c          |  4 ++--
 fs/reiserfs/xattr_acl.c |  4 ++--
 fs/xattr_acl.c          | 14 ++++++++------
 fs/xfs/xfs_acl.c        |  4 ++--
 16 files changed, 49 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 9a1d42630751..15b679166201 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -37,7 +37,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
 			return ERR_PTR(-ENOMEM);
 		size = v9fs_fid_xattr_get(fid, name, value, size);
 		if (size > 0) {
-			acl = posix_acl_from_xattr(value, size);
+			acl = posix_acl_from_xattr(&init_user_ns, value, size);
 			if (IS_ERR(acl))
 				goto err_out;
 		}
@@ -131,7 +131,7 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
 	buffer = kmalloc(size, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
-	retval = posix_acl_to_xattr(acl, buffer, size);
+	retval = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	if (retval < 0)
 		goto err_free_out;
 	switch (type) {
@@ -251,7 +251,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -304,7 +304,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
 		return -EPERM;
 	if (value) {
 		/* update the cached acl value */
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		else if (acl) {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 761e2cd8fed1..0c16e3dbfd56 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -61,7 +61,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		size = __btrfs_getxattr(inode, name, value, size);
 	}
 	if (size > 0) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 	} else if (size == -ENOENT || size == -ENODATA || size == 0) {
 		/* FIXME, who returns -ENOENT?  I think nobody */
 		acl = NULL;
@@ -91,7 +91,7 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	ret = posix_acl_to_xattr(acl, value, size);
+	ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
 	posix_acl_release(acl);
 
 	return ret;
@@ -141,7 +141,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
 			goto out;
 		}
 
-		ret = posix_acl_to_xattr(acl, value, size);
+		ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
 		if (ret < 0)
 			goto out;
 	}
@@ -169,7 +169,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
 		return -EOPNOTSUPP;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 35d6a3cfd9ff..70bb1bccc957 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -350,7 +350,7 @@ ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -371,7 +371,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
 		return -EPERM;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		else if (acl) {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c76832c8d192..2cf6a8044c80 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -369,7 +369,7 @@ ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -392,7 +392,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
 		return -EPERM;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		else if (acl) {
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a5c29bb3b835..42b95fccfb2f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -374,7 +374,7 @@ ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -397,7 +397,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
 		return -EPERM;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		else if (acl) {
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index d0dddaceac59..b3f3676796d3 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -56,7 +56,7 @@ generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
 	acl = get_cached_acl(dentry->d_inode, type);
 	if (!acl)
 		return -ENODATA;
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -77,7 +77,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
 	if (!inode_owner_or_capable(inode))
 		return -EPERM;
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 	}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index bd4a5892c93c..f850020ad906 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -63,7 +63,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
 	if (len == 0)
 		return NULL;
 
-	acl = posix_acl_from_xattr(data, len);
+	acl = posix_acl_from_xattr(&init_user_ns, data, len);
 	kfree(data);
 	return acl;
 }
@@ -88,13 +88,13 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
 	const char *name = gfs2_acl_name(type);
 
 	BUG_ON(name == NULL);
-	len = posix_acl_to_xattr(acl, NULL, 0);
+	len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
 	if (len == 0)
 		return 0;
 	data = kmalloc(len, GFP_NOFS);
 	if (data == NULL)
 		return -ENOMEM;
-	error = posix_acl_to_xattr(acl, data, len);
+	error = posix_acl_to_xattr(&init_user_ns, acl, data, len);
 	if (error < 0)
 		goto out;
 	error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
@@ -166,12 +166,12 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
 	if (error)
 		return error;
 
-	len = posix_acl_to_xattr(acl, NULL, 0);
+	len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
 	data = kmalloc(len, GFP_NOFS);
 	error = -ENOMEM;
 	if (data == NULL)
 		goto out;
-	posix_acl_to_xattr(acl, data, len);
+	posix_acl_to_xattr(&init_user_ns, acl, data, len);
 	error = gfs2_xattr_acl_chmod(ip, attr, data);
 	kfree(data);
 	set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl);
@@ -212,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
 	if (acl == NULL)
 		return -ENODATA;
 
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -245,7 +245,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
 	if (!value)
 		goto set_acl;
 
-	acl = posix_acl_from_xattr(value, size);
+	acl = posix_acl_from_xattr(&init_user_ns, value, size);
 	if (!acl) {
 		/*
 		 * acl_set_file(3) may request that we set default ACLs with
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 922f146e4235..42e4edc17a90 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -363,7 +363,7 @@ static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
 		return PTR_ERR(acl);
 	if (!acl)
 		return -ENODATA;
-	rc = posix_acl_to_xattr(acl, buffer, size);
+	rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return rc;
@@ -381,7 +381,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
 		return -EPERM;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		if (acl) {
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 45559dc3ea2f..d254d6d35995 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -64,7 +64,7 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 		else
 			acl = ERR_PTR(size);
 	} else {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 	}
 	kfree(value);
 	if (!IS_ERR(acl))
@@ -100,7 +100,7 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
-		rc = posix_acl_to_xattr(acl, value, size);
+		rc = posix_acl_to_xattr(&init_user_ns, acl, value, size);
 		if (rc < 0)
 			goto out;
 	}
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 26683e15b3ac..42d67f9757bf 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -685,7 +685,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 	 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
 	 */
 	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
-		acl = posix_acl_from_xattr(value, value_len);
+		acl = posix_acl_from_xattr(&init_user_ns, value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
 			printk(KERN_ERR "posix_acl_from_xattr returned %d\n",
@@ -710,7 +710,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 
 		return 0;
 	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
-		acl = posix_acl_from_xattr(value, value_len);
+		acl = posix_acl_from_xattr(&init_user_ns, value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
 			printk(KERN_ERR "posix_acl_from_xattr returned %d\n",
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index e4498dc351a8..4a1aafba6a20 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -70,7 +70,7 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
 		if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
 			error = -ENODATA;
 		else
-			error = posix_acl_to_xattr(acl, buffer, size);
+			error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 		posix_acl_release(acl);
 	} else
 		error = -ENODATA;
@@ -92,7 +92,7 @@ int nfs3_setxattr(struct dentry *dentry, const char *name,
 	else
 		return -EOPNOTSUPP;
 
-	acl = posix_acl_from_xattr(value, size);
+	acl = posix_acl_from_xattr(&init_user_ns, value, size);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	error = nfs3_proc_setacl(inode, type, acl);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a9269f142cc4..3f67b8e12251 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -480,7 +480,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
 	if (buf == NULL)
 		goto out;
 
-	len = posix_acl_to_xattr(pacl, buf, buflen);
+	len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen);
 	if (len < 0) {
 		error = len;
 		goto out;
@@ -549,7 +549,7 @@ _get_posix_acl(struct dentry *dentry, char *key)
 	if (buflen <= 0)
 		return ERR_PTR(buflen);
 
-	pacl = posix_acl_from_xattr(buf, buflen);
+	pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen);
 	kfree(buf);
 	return pacl;
 }
@@ -2264,7 +2264,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
 	if (size < 0)
 		return ERR_PTR(size);
 
-	acl = posix_acl_from_xattr(value, size);
+	acl = posix_acl_from_xattr(&init_user_ns, value, size);
 	kfree(value);
 	return acl;
 }
@@ -2297,7 +2297,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
-		error = posix_acl_to_xattr(acl, value, size);
+		error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
 		if (error < 0)
 			goto getout;
 		size = error;
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a7219075b4de..260b16281fc3 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -452,7 +452,7 @@ static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	ret = posix_acl_to_xattr(acl, buffer, size);
+	ret = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return ret;
@@ -475,7 +475,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
 		return -EPERM;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
 		else if (acl) {
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44474f9b990d..87d6911c659d 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -30,7 +30,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value,
 		return -EPERM;
 
 	if (value) {
-		acl = posix_acl_from_xattr(value, size);
+		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl)) {
 			return PTR_ERR(acl);
 		} else if (acl) {
@@ -77,7 +77,7 @@ posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
 		return PTR_ERR(acl);
 	if (acl == NULL)
 		return -ENODATA;
-	error = posix_acl_to_xattr(acl, buffer, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	posix_acl_release(acl);
 
 	return error;
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index bf472ca1b348..11efd830b5f5 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -73,7 +73,8 @@ void posix_acl_fix_xattr_to_user(void *value, size_t size)
  * Convert from extended attribute to in-memory representation.
  */
 struct posix_acl *
-posix_acl_from_xattr(const void *value, size_t size)
+posix_acl_from_xattr(struct user_namespace *user_ns,
+		     const void *value, size_t size)
 {
 	posix_acl_xattr_header *header = (posix_acl_xattr_header *)value;
 	posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end;
@@ -112,14 +113,14 @@ posix_acl_from_xattr(const void *value, size_t size)
 
 			case ACL_USER:
 				acl_e->e_uid =
-					make_kuid(&init_user_ns,
+					make_kuid(user_ns,
 						  le32_to_cpu(entry->e_id));
 				if (!uid_valid(acl_e->e_uid))
 					goto fail;
 				break;
 			case ACL_GROUP:
 				acl_e->e_gid =
-					make_kgid(&init_user_ns,
+					make_kgid(user_ns,
 						  le32_to_cpu(entry->e_id));
 				if (!gid_valid(acl_e->e_gid))
 					goto fail;
@@ -141,7 +142,8 @@ EXPORT_SYMBOL (posix_acl_from_xattr);
  * Convert from in-memory to extended attribute representation.
  */
 int
-posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size)
+posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
+		   void *buffer, size_t size)
 {
 	posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer;
 	posix_acl_xattr_entry *ext_entry = ext_acl->a_entries;
@@ -162,11 +164,11 @@ posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size)
 		switch(acl_e->e_tag) {
 		case ACL_USER:
 			ext_entry->e_id =
-				cpu_to_le32(from_kuid(&init_user_ns, acl_e->e_uid));
+				cpu_to_le32(from_kuid(user_ns, acl_e->e_uid));
 			break;
 		case ACL_GROUP:
 			ext_entry->e_id =
-				cpu_to_le32(from_kgid(&init_user_ns, acl_e->e_gid));
+				cpu_to_le32(from_kgid(user_ns, acl_e->e_gid));
 			break;
 		default:
 			ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ac702a6eab9b..1d32f1d52763 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -337,7 +337,7 @@ xfs_xattr_acl_get(struct dentry *dentry, const char *name,
 	if (acl == NULL)
 		return -ENODATA;
 
-	error = posix_acl_to_xattr(acl, value, size);
+	error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
 	posix_acl_release(acl);
 
 	return error;
@@ -361,7 +361,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
 	if (!value)
 		goto set_acl;
 
-	acl = posix_acl_from_xattr(value, size);
+	acl = posix_acl_from_xattr(&init_user_ns, value, size);
 	if (!acl) {
 		/*
 		 * acl_set_file(3) may request that we set default ACLs with
-- 
cgit 


From af84df93ffe3603fc6fc40a4338f9e740aad3b4e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 10 Sep 2012 20:44:54 -0700
Subject: userns: Convert extN to support kuids and kgids in posix acls

Convert ext2, ext3, and ext4 to fully support the posix acl changes,
using e_uid e_gid instead e_id.

Enabled building with posix acls enabled, all filesystems supporting
user namespaces, now also support posix acls when user namespaces are enabled.

Cc: Theodore Tso <tytso@mit.edu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/ext2/acl.c | 28 ++++++++++++++++++++--------
 fs/ext3/acl.c | 28 ++++++++++++++++++++--------
 fs/ext4/acl.c | 27 ++++++++++++++++++++-------
 3 files changed, 60 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 70bb1bccc957..110b6b371a4e 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -53,16 +53,23 @@ ext2_acl_from_disk(const void *value, size_t size)
 			case ACL_OTHER:
 				value = (char *)value +
 					sizeof(ext2_acl_entry_short);
-				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
 				break;
 
 			case ACL_USER:
+				value = (char *)value + sizeof(ext2_acl_entry);
+				if ((char *)value > end)
+					goto fail;
+				acl->a_entries[n].e_uid =
+					make_kuid(&init_user_ns,
+						  le32_to_cpu(entry->e_id));
+				break;
 			case ACL_GROUP:
 				value = (char *)value + sizeof(ext2_acl_entry);
 				if ((char *)value > end)
 					goto fail;
-				acl->a_entries[n].e_id =
-					le32_to_cpu(entry->e_id);
+				acl->a_entries[n].e_gid =
+					make_kgid(&init_user_ns,
+						  le32_to_cpu(entry->e_id));
 				break;
 
 			default:
@@ -96,14 +103,19 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
 	ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION);
 	e = (char *)ext_acl + sizeof(ext2_acl_header);
 	for (n=0; n < acl->a_count; n++) {
+		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
 		ext2_acl_entry *entry = (ext2_acl_entry *)e;
-		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
-		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-		switch(acl->a_entries[n].e_tag) {
+		entry->e_tag  = cpu_to_le16(acl_e->e_tag);
+		entry->e_perm = cpu_to_le16(acl_e->e_perm);
+		switch(acl_e->e_tag) {
 			case ACL_USER:
+				entry->e_id = cpu_to_le32(
+					from_kuid(&init_user_ns, acl_e->e_uid));
+				e += sizeof(ext2_acl_entry);
+				break;
 			case ACL_GROUP:
-				entry->e_id =
-					cpu_to_le32(acl->a_entries[n].e_id);
+				entry->e_id = cpu_to_le32(
+					from_kgid(&init_user_ns, acl_e->e_gid));
 				e += sizeof(ext2_acl_entry);
 				break;
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 2cf6a8044c80..dbb5ad59a7fc 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -48,16 +48,23 @@ ext3_acl_from_disk(const void *value, size_t size)
 			case ACL_OTHER:
 				value = (char *)value +
 					sizeof(ext3_acl_entry_short);
-				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
 				break;
 
 			case ACL_USER:
+				value = (char *)value + sizeof(ext3_acl_entry);
+				if ((char *)value > end)
+					goto fail;
+				acl->a_entries[n].e_uid =
+					make_kuid(&init_user_ns,
+						  le32_to_cpu(entry->e_id));
+				break;
 			case ACL_GROUP:
 				value = (char *)value + sizeof(ext3_acl_entry);
 				if ((char *)value > end)
 					goto fail;
-				acl->a_entries[n].e_id =
-					le32_to_cpu(entry->e_id);
+				acl->a_entries[n].e_gid =
+					make_kgid(&init_user_ns,
+						  le32_to_cpu(entry->e_id));
 				break;
 
 			default:
@@ -91,14 +98,19 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
 	ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
 	e = (char *)ext_acl + sizeof(ext3_acl_header);
 	for (n=0; n < acl->a_count; n++) {
+		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
 		ext3_acl_entry *entry = (ext3_acl_entry *)e;
-		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
-		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-		switch(acl->a_entries[n].e_tag) {
+		entry->e_tag  = cpu_to_le16(acl_e->e_tag);
+		entry->e_perm = cpu_to_le16(acl_e->e_perm);
+		switch(acl_e->e_tag) {
 			case ACL_USER:
+				entry->e_id = cpu_to_le32(
+					from_kuid(&init_user_ns, acl_e->e_uid));
+				e += sizeof(ext3_acl_entry);
+				break;
 			case ACL_GROUP:
-				entry->e_id =
-					cpu_to_le32(acl->a_entries[n].e_id);
+				entry->e_id = cpu_to_le32(
+					from_kgid(&init_user_ns, acl_e->e_gid));
 				e += sizeof(ext3_acl_entry);
 				break;
 
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 42b95fccfb2f..d3c5b88fd89f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -55,16 +55,23 @@ ext4_acl_from_disk(const void *value, size_t size)
 		case ACL_OTHER:
 			value = (char *)value +
 				sizeof(ext4_acl_entry_short);
-			acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
 			break;
 
 		case ACL_USER:
+			value = (char *)value + sizeof(ext4_acl_entry);
+			if ((char *)value > end)
+				goto fail;
+			acl->a_entries[n].e_uid =
+				make_kuid(&init_user_ns,
+					  le32_to_cpu(entry->e_id));
+			break;
 		case ACL_GROUP:
 			value = (char *)value + sizeof(ext4_acl_entry);
 			if ((char *)value > end)
 				goto fail;
-			acl->a_entries[n].e_id =
-				le32_to_cpu(entry->e_id);
+			acl->a_entries[n].e_gid =
+				make_kgid(&init_user_ns,
+					  le32_to_cpu(entry->e_id));
 			break;
 
 		default:
@@ -98,13 +105,19 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
 	ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
 	e = (char *)ext_acl + sizeof(ext4_acl_header);
 	for (n = 0; n < acl->a_count; n++) {
+		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
 		ext4_acl_entry *entry = (ext4_acl_entry *)e;
-		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
-		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
-		switch (acl->a_entries[n].e_tag) {
+		entry->e_tag  = cpu_to_le16(acl_e->e_tag);
+		entry->e_perm = cpu_to_le16(acl_e->e_perm);
+		switch (acl_e->e_tag) {
 		case ACL_USER:
+			entry->e_id = cpu_to_le32(
+				from_kuid(&init_user_ns, acl_e->e_uid));
+			e += sizeof(ext4_acl_entry);
+			break;
 		case ACL_GROUP:
-			entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+			entry->e_id = cpu_to_le32(
+				from_kgid(&init_user_ns, acl_e->e_gid));
 			e += sizeof(ext4_acl_entry);
 			break;
 
-- 
cgit 


From 69552c0c50f3f950f304fb07a4320e46f7f60c21 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:28:09 -0800
Subject: userns: Convert configfs to use kuid and kgid where appropriate

Cc: Joel Becker <jlbec@evilplan.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/configfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 0074362d9f7f..a9d35b0e06cf 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -79,8 +79,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
 			return -ENOMEM;
 		/* assign default attributes */
 		sd_iattr->ia_mode = sd->s_mode;
-		sd_iattr->ia_uid = 0;
-		sd_iattr->ia_gid = 0;
+		sd_iattr->ia_uid = GLOBAL_ROOT_UID;
+		sd_iattr->ia_gid = GLOBAL_ROOT_GID;
 		sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
 		sd->s_iattr = sd_iattr;
 	}
-- 
cgit 


From f76d207a66c3a53defea67e7d36c3eb1b7d6d61d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 30 Aug 2012 01:24:05 -0700
Subject: userns: Add kprojid_t and associated infrastructure in projid.h

Implement kprojid_t a cousin of the kuid_t and kgid_t.

The per user namespace mapping of project id values can be set with
/proc/<pid>/projid_map.

A full compliment of helpers is provided: make_kprojid, from_kprojid,
from_kprojid_munged, kporjid_has_mapping, projid_valid, projid_eq,
projid_eq, projid_lt.

Project identifiers are part of the generic disk quota interface,
although it appears only xfs implements project identifiers currently.

The xfs code allows anyone who has permission to set the project
identifier on a file to use any project identifier so when
setting up the user namespace project identifier mappings I do
not require a capability.

Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/base.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 138cff4b05dd..acd1960c28a2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2991,6 +2991,11 @@ static int proc_gid_map_open(struct inode *inode, struct file *file)
 	return proc_id_map_open(inode, file, &proc_gid_seq_operations);
 }
 
+static int proc_projid_map_open(struct inode *inode, struct file *file)
+{
+	return proc_id_map_open(inode, file, &proc_projid_seq_operations);
+}
+
 static const struct file_operations proc_uid_map_operations = {
 	.open		= proc_uid_map_open,
 	.write		= proc_uid_map_write,
@@ -3006,6 +3011,14 @@ static const struct file_operations proc_gid_map_operations = {
 	.llseek		= seq_lseek,
 	.release	= proc_id_map_release,
 };
+
+static const struct file_operations proc_projid_map_operations = {
+	.open		= proc_projid_map_open,
+	.write		= proc_projid_map_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_id_map_release,
+};
 #endif /* CONFIG_USER_NS */
 
 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -3113,6 +3126,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_USER_NS
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
+	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
 #endif
 };
 
@@ -3476,6 +3490,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_USER_NS
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
+	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
 #endif
 };
 
-- 
cgit 


From e8a3e4719b7ec19288c56f22623f537cb78885c1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 01:11:45 -0700
Subject: userns: Implement struct kqid

Add the data type struct kqid which holds the kernel internal form of
the owning identifier of a quota.  struct kqid is a replacement for
the implicit union of uid, gid and project id stored in an unsigned
int and the quota type field that is was used in the quota data
structures.  Making the data type explicit allows the kuid_t and
kgid_t type safety to propogate more thoroughly through the code,
revealing more places where uid/gid conversions need be made.

Along with the data type struct kqid comes the helper functions
qid_eq, qid_lt, from_kqid, from_kqid_munged, qid_valid, make_kqid,
make_kqid_invalid, make_kqid_uid, make_kqid_gid.

Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/quota/Makefile |   2 +-
 fs/quota/kqid.c   | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 fs/quota/kqid.c

(limited to 'fs')

diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 5f9e9e276af0..c66c37cdaa39 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -2,6 +2,6 @@ obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
 obj-$(CONFIG_QUOTA_TREE)	+= quota_tree.o
-obj-$(CONFIG_QUOTACTL)		+= quota.o
+obj-$(CONFIG_QUOTACTL)		+= quota.o kqid.o
 obj-$(CONFIG_QUOTACTL_COMPAT)	+= compat.o
 obj-$(CONFIG_QUOTA_NETLINK_INTERFACE)	+= netlink.o
diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c
new file mode 100644
index 000000000000..2f97b0e2c501
--- /dev/null
+++ b/fs/quota/kqid.c
@@ -0,0 +1,132 @@
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/export.h>
+
+/**
+ *	qid_eq - Test to see if to kquid values are the same
+ *	@left: A qid value
+ *	@right: Another quid value
+ *
+ *	Return true if the two qid values are equal and false otherwise.
+ */
+bool qid_eq(struct kqid left, struct kqid right)
+{
+	if (left.type != right.type)
+		return false;
+	switch(left.type) {
+	case USRQUOTA:
+		return uid_eq(left.uid, right.uid);
+	case GRPQUOTA:
+		return gid_eq(left.gid, right.gid);
+	case PRJQUOTA:
+		return projid_eq(left.projid, right.projid);
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(qid_eq);
+
+/**
+ *	qid_lt - Test to see if one qid value is less than another
+ *	@left: The possibly lesser qid value
+ *	@right: The possibly greater qid value
+ *
+ *	Return true if left is less than right and false otherwise.
+ */
+bool qid_lt(struct kqid left, struct kqid right)
+{
+	if (left.type < right.type)
+		return true;
+	if (left.type > right.type)
+		return false;
+	switch (left.type) {
+	case USRQUOTA:
+		return uid_lt(left.uid, right.uid);
+	case GRPQUOTA:
+		return gid_lt(left.gid, right.gid);
+	case PRJQUOTA:
+		return projid_lt(left.projid, right.projid);
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(qid_lt);
+
+/**
+ *	from_kqid - Create a qid from a kqid user-namespace pair.
+ *	@targ: The user namespace we want a qid in.
+ *	@kuid: The kernel internal quota identifier to start with.
+ *
+ *	Map @kqid into the user-namespace specified by @targ and
+ *	return the resulting qid.
+ *
+ *	There is always a mapping into the initial user_namespace.
+ *
+ *	If @kqid has no mapping in @targ (qid_t)-1 is returned.
+ */
+qid_t from_kqid(struct user_namespace *targ, struct kqid kqid)
+{
+	switch (kqid.type) {
+	case USRQUOTA:
+		return from_kuid(targ, kqid.uid);
+	case GRPQUOTA:
+		return from_kgid(targ, kqid.gid);
+	case PRJQUOTA:
+		return from_kprojid(targ, kqid.projid);
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(from_kqid);
+
+/**
+ *	from_kqid_munged - Create a qid from a kqid user-namespace pair.
+ *	@targ: The user namespace we want a qid in.
+ *	@kqid: The kernel internal quota identifier to start with.
+ *
+ *	Map @kqid into the user-namespace specified by @targ and
+ *	return the resulting qid.
+ *
+ *	There is always a mapping into the initial user_namespace.
+ *
+ *	Unlike from_kqid from_kqid_munged never fails and always
+ *	returns a valid projid.  This makes from_kqid_munged
+ *	appropriate for use in places where failing to provide
+ *	a qid_t is not a good option.
+ *
+ *	If @kqid has no mapping in @targ the kqid.type specific
+ *	overflow identifier is returned.
+ */
+qid_t from_kqid_munged(struct user_namespace *targ, struct kqid kqid)
+{
+	switch (kqid.type) {
+	case USRQUOTA:
+		return from_kuid_munged(targ, kqid.uid);
+	case GRPQUOTA:
+		return from_kgid_munged(targ, kqid.gid);
+	case PRJQUOTA:
+		return from_kprojid_munged(targ, kqid.projid);
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(from_kqid_munged);
+
+/**
+ *	qid_valid - Report if a valid value is stored in a kqid.
+ *	@qid: The kernel internal quota identifier to test.
+ */
+bool qid_valid(struct kqid qid)
+{
+	switch (qid.type) {
+	case USRQUOTA:
+		return uid_valid(qid.uid);
+	case GRPQUOTA:
+		return gid_valid(qid.gid);
+	case PRJQUOTA:
+		return projid_valid(qid.projid);
+	default:
+		BUG();
+	}
+}
+EXPORT_SYMBOL(qid_valid);
-- 
cgit 


From 74a8a103789465c4e67f38d1abb5cea770002601 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 02:07:49 -0700
Subject: userns: Convert qutoactl

Update the quotactl user space interface to successfull compile with
user namespaces support enabled and to hand off quota identifiers to
lower layers of the kernel in struct kqid instead of type and qid
pairs.

The quota on function is not converted because while it takes a quota
type and an id.  The id is the on disk quota format to use, which
is something completely different.

The signature of two struct quotactl_ops methods were changed to take
struct kqid argumetns get_dqblk and set_dqblk.

The dquot, xfs, and ocfs2 implementations of get_dqblk and set_dqblk
are minimally changed so that the code continues to work with
the change in parameter type.

This is the first in a series of changes to always store quota
identifiers in the kernel in struct kqid and only use raw type and qid
values when interacting with on disk structures or userspace.  Always
using struct kqid internally makes it hard to miss places that need
conversion to or from the kernel internal values.

Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Ben Myers <bpm@sgi.com>
Cc: Alex Elder <elder@kernel.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/gfs2/quota.c       | 20 +++++++++++---------
 fs/quota/dquot.c      |  8 ++++----
 fs/quota/quota.c      | 28 ++++++++++++++++++++++------
 fs/xfs/xfs_quotaops.c | 12 ++++++------
 4 files changed, 43 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3bde91645c2..b3115392d68f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1469,7 +1469,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
 	return 0;
 }
 
-static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
+static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
 			  struct fs_disk_quota *fdq)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1477,20 +1477,21 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
 	struct gfs2_quota_data *qd;
 	struct gfs2_holder q_gh;
 	int error;
+	int type;
 
 	memset(fdq, 0, sizeof(struct fs_disk_quota));
 
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return -ESRCH; /* Crazy XFS error code */
 
-	if (type == USRQUOTA)
+	if (qid.type == USRQUOTA)
 		type = QUOTA_USER;
-	else if (type == GRPQUOTA)
+	else if (qid.type == GRPQUOTA)
 		type = QUOTA_GROUP;
 	else
 		return -EINVAL;
 
-	error = qd_get(sdp, type, id, &qd);
+	error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
 	if (error)
 		return error;
 	error = do_glock(qd, FORCE, &q_gh);
@@ -1500,7 +1501,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
 	qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
 	fdq->d_version = FS_DQUOT_VERSION;
 	fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
-	fdq->d_id = id;
+	fdq->d_id = from_kqid(&init_user_ns, qid);
 	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
 	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
 	fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
@@ -1514,7 +1515,7 @@ out:
 /* GFS2 only supports a subset of the XFS fields */
 #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
 
-static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
+static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
 			  struct fs_disk_quota *fdq)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1526,11 +1527,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 	int alloc_required;
 	loff_t offset;
 	int error;
+	int type;
 
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return -ESRCH; /* Crazy XFS error code */
 
-	switch(type) {
+	switch(qid.type) {
 	case USRQUOTA:
 		type = QUOTA_USER;
 		if (fdq->d_flags != FS_USER_QUOTA)
@@ -1547,10 +1549,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 
 	if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
 		return -EINVAL;
-	if (fdq->d_id != id)
+	if (fdq->d_id != from_kqid(&init_user_ns, qid))
 		return -EINVAL;
 
-	error = qd_get(sdp, type, id, &qd);
+	error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
 	if (error)
 		return error;
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 36a29b753c79..7714b169d646 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2376,12 +2376,12 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 	spin_unlock(&dq_data_lock);
 }
 
-int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
+int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
 		    struct fs_disk_quota *di)
 {
 	struct dquot *dquot;
 
-	dquot = dqget(sb, id, type);
+	dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid));
 	if (!dquot)
 		return -ESRCH;
 	do_get_dqblk(dquot, di);
@@ -2488,13 +2488,13 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 	return 0;
 }
 
-int dquot_set_dqblk(struct super_block *sb, int type, qid_t id,
+int dquot_set_dqblk(struct super_block *sb, struct kqid qid,
 		  struct fs_disk_quota *di)
 {
 	struct dquot *dquot;
 	int rc;
 
-	dquot = dqget(sb, id, type);
+	dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid));
 	if (!dquot) {
 		rc = -ESRCH;
 		goto out;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 6f155788cbc6..ff0135d6bc51 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -32,8 +32,8 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 	/* allow to query information for dquots we "own" */
 	case Q_GETQUOTA:
 	case Q_XGETQUOTA:
-		if ((type == USRQUOTA && current_euid() == id) ||
-		    (type == GRPQUOTA && in_egroup_p(id)))
+		if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) ||
+		    (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id))))
 			break;
 		/*FALLTHROUGH*/
 	default:
@@ -130,13 +130,17 @@ static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
 static int quota_getquota(struct super_block *sb, int type, qid_t id,
 			  void __user *addr)
 {
+	struct kqid qid;
 	struct fs_disk_quota fdq;
 	struct if_dqblk idq;
 	int ret;
 
 	if (!sb->s_qcop->get_dqblk)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
+	qid = make_kqid(current_user_ns(), type, id);
+	if (!qid_valid(qid))
+		return -EINVAL;
+	ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
 	if (ret)
 		return ret;
 	copy_to_if_dqblk(&idq, &fdq);
@@ -176,13 +180,17 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id,
 {
 	struct fs_disk_quota fdq;
 	struct if_dqblk idq;
+	struct kqid qid;
 
 	if (copy_from_user(&idq, addr, sizeof(idq)))
 		return -EFAULT;
 	if (!sb->s_qcop->set_dqblk)
 		return -ENOSYS;
+	qid = make_kqid(current_user_ns(), type, id);
+	if (!qid_valid(qid))
+		return -EINVAL;
 	copy_from_if_dqblk(&fdq, &idq);
-	return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
+	return sb->s_qcop->set_dqblk(sb, qid, &fdq);
 }
 
 static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
@@ -213,23 +221,31 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 			   void __user *addr)
 {
 	struct fs_disk_quota fdq;
+	struct kqid qid;
 
 	if (copy_from_user(&fdq, addr, sizeof(fdq)))
 		return -EFAULT;
 	if (!sb->s_qcop->set_dqblk)
 		return -ENOSYS;
-	return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
+	qid = make_kqid(current_user_ns(), type, id);
+	if (!qid_valid(qid))
+		return -EINVAL;
+	return sb->s_qcop->set_dqblk(sb, qid, &fdq);
 }
 
 static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 			   void __user *addr)
 {
 	struct fs_disk_quota fdq;
+	struct kqid qid;
 	int ret;
 
 	if (!sb->s_qcop->get_dqblk)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
+	qid = make_kqid(current_user_ns(), type, id);
+	if (!qid_valid(qid))
+		return -EINVAL;
+	ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
 	if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
 		return -EFAULT;
 	return ret;
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index fed504fc2999..71926d630527 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -97,8 +97,7 @@ xfs_fs_set_xstate(
 STATIC int
 xfs_fs_get_dqblk(
 	struct super_block	*sb,
-	int			type,
-	qid_t			id,
+	struct kqid		qid,
 	struct fs_disk_quota	*fdq)
 {
 	struct xfs_mount	*mp = XFS_M(sb);
@@ -108,14 +107,14 @@ xfs_fs_get_dqblk(
 	if (!XFS_IS_QUOTA_ON(mp))
 		return -ESRCH;
 
-	return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
+	return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
+				      xfs_quota_type(qid.type), fdq);
 }
 
 STATIC int
 xfs_fs_set_dqblk(
 	struct super_block	*sb,
-	int			type,
-	qid_t			id,
+	struct kqid		qid,
 	struct fs_disk_quota	*fdq)
 {
 	struct xfs_mount	*mp = XFS_M(sb);
@@ -127,7 +126,8 @@ xfs_fs_set_dqblk(
 	if (!XFS_IS_QUOTA_ON(mp))
 		return -ESRCH;
 
-	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
+	return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
+				     xfs_quota_type(qid.type), fdq);
 }
 
 const struct quotactl_ops xfs_quotactl_operations = {
-- 
cgit 


From 431f19744d15531825cdbc8e771b43854b0d005b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 02:32:43 -0700
Subject: userns: Convert quota netlink aka quota_send_warning

Modify quota_send_warning to take struct kqid instead a type and
identifier pair.

When sending netlink broadcasts always convert uids and quota
identifiers into the intial user namespace.  There is as yet no way to
send a netlink broadcast message with different contents to receivers
in different namespaces, so for the time being just map all of the
identifiers into the initial user namespace which preserves the
current behavior.

Change the callers of quota_send_warning in gfs2, xfs and dquot
to generate a struct kqid to pass to quota send warning.  When
all of the user namespaces convesions are complete a struct kqid
values will be availbe without need for conversion, but a conversion
is needed now to avoid needing to convert everything at once.

Cc: Ben Myers <bpm@sgi.com>
Cc: Alex Elder <elder@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/gfs2/quota.c          | 12 ++++++++----
 fs/quota/dquot.c         |  2 +-
 fs/quota/netlink.c       | 10 ++++++----
 fs/xfs/xfs_trans_dquot.c |  8 +++++---
 4 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b3115392d68f..d554dfff58e3 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1070,8 +1070,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 
 		if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
 			print_message(qd, "exceeded");
-			quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
-					   USRQUOTA : GRPQUOTA, qd->qd_id,
+			quota_send_warning(make_kqid(&init_user_ns,
+						     test_bit(QDF_USER, &qd->qd_flags) ?
+						     USRQUOTA : GRPQUOTA,
+						     qd->qd_id),
 					   sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
 
 			error = -EDQUOT;
@@ -1081,8 +1083,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 			   time_after_eq(jiffies, qd->qd_last_warn +
 					 gfs2_tune_get(sdp,
 						gt_quota_warn_period) * HZ)) {
-			quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
-					   USRQUOTA : GRPQUOTA, qd->qd_id,
+			quota_send_warning(make_kqid(&init_user_ns,
+						     test_bit(QDF_USER, &qd->qd_flags) ?
+						     USRQUOTA : GRPQUOTA,
+						     qd->qd_id),
 					   sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
 			error = print_message(qd, "warning");
 			qd->qd_last_warn = jiffies;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 7714b169d646..80d337822462 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1236,7 +1236,7 @@ static void flush_warnings(struct dquot_warn *warn)
 #ifdef CONFIG_PRINT_QUOTA_WARNING
 		print_warning(&warn[i]);
 #endif
-		quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id,
+		quota_send_warning(make_kqid(&init_user_ns, warn[i].w_dq_type, warn[i].w_dq_id),
 				   warn[i].w_sb->s_dev, warn[i].w_type);
 	}
 }
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index d67908b407d9..16e8abb7709b 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -30,7 +30,7 @@ static struct genl_family quota_genl_family = {
  *
  */
 
-void quota_send_warning(short type, unsigned int id, dev_t dev,
+void quota_send_warning(struct kqid qid, dev_t dev,
 			const char warntype)
 {
 	static atomic_t seq;
@@ -56,10 +56,11 @@ void quota_send_warning(short type, unsigned int id, dev_t dev,
 		  "VFS: Cannot store netlink header in quota warning.\n");
 		goto err_out;
 	}
-	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
+	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, qid.type);
 	if (ret)
 		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
+	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID,
+			  from_kqid_munged(&init_user_ns, qid));
 	if (ret)
 		goto attr_err_out;
 	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
@@ -71,7 +72,8 @@ void quota_send_warning(short type, unsigned int id, dev_t dev,
 	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
 	if (ret)
 		goto attr_err_out;
-	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
+	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID,
+			  from_kuid_munged(&init_user_ns, current_uid()));
 	if (ret)
 		goto attr_err_out;
 	genlmsg_end(skb, msg_head);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index bcb60542fcf1..0c7fa54f309e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -578,9 +578,11 @@ xfs_quota_warn(
 	/* no warnings for project quotas - we just return ENOSPC later */
 	if (dqp->dq_flags & XFS_DQ_PROJ)
 		return;
-	quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
-			   be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
-			   type);
+	quota_send_warning(make_kqid(&init_user_ns,
+				     (dqp->dq_flags & XFS_DQ_USER) ?
+				     USRQUOTA : GRPQUOTA,
+				     be32_to_cpu(dqp->q_core.d_id)),
+			   mp->m_super->s_dev, type);
 }
 
 /*
-- 
cgit 


From aca645a6a54e001e004f1f1e0eafd94f994bb1b3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 03:11:50 -0700
Subject: userns: Modify dqget to take struct kqid

Modify dqget to take struct kqid instead of a type and an identifier
pair.

Modify the callers of dqget in ocfs2 and dquot to take generate
a struct kqid so they can continue to call dqget.  The conversion
to create struct kqid should all be the final conversions that
are needed in those code paths.

Cc: Jan Kara <jack@suse.cz>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/ocfs2/file.c        |  6 ++----
 fs/ocfs2/quota_local.c |  4 +++-
 fs/quota/dquot.c       | 20 +++++++++++---------
 3 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46a1f6d75104..5a4ee77cec51 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1184,8 +1184,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
 		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
 		    OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
-			transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
-						      USRQUOTA);
+			transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
 			if (!transfer_to[USRQUOTA]) {
 				status = -ESRCH;
 				goto bail_unlock;
@@ -1194,8 +1193,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 		if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
 		    && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
 		    OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
-			transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
-						      GRPQUOTA);
+			transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
 			if (!transfer_to[GRPQUOTA]) {
 				status = -ESRCH;
 				goto bail_unlock;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index f100bf70a906..020f0ba29ee5 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -501,7 +501,9 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 			}
 			dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
 				ol_dqblk_block_off(sb, chunk, bit));
-			dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type);
+			dquot = dqget(sb,
+				      make_kqid(&init_user_ns, type,
+						le64_to_cpu(dqblk->dqb_id)));
 			if (!dquot) {
 				status = -EIO;
 				mlog(ML_ERROR, "Failed to get quota structure "
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 80d337822462..53e377a59b05 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -829,8 +829,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
  *   a) checking for quota flags under dq_list_lock and
  *   b) getting a reference to dquot before we release dq_list_lock
  */
-struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
+struct dquot *dqget(struct super_block *sb, struct kqid qid)
 {
+	unsigned int type = qid.type;
+	unsigned int id = from_kqid(&init_user_ns, qid);
 	unsigned int hashent = hashfn(sb, id, type);
 	struct dquot *dquot = NULL, *empty = NULL;
 
@@ -1390,7 +1392,6 @@ static int dquot_active(const struct inode *inode)
  */
 static void __dquot_initialize(struct inode *inode, int type)
 {
-	unsigned int id = 0;
 	int cnt;
 	struct dquot *got[MAXQUOTAS];
 	struct super_block *sb = inode->i_sb;
@@ -1403,18 +1404,19 @@ static void __dquot_initialize(struct inode *inode, int type)
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		struct kqid qid;
 		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
 		switch (cnt) {
 		case USRQUOTA:
-			id = inode->i_uid;
+			qid = make_kqid_uid(inode->i_uid);
 			break;
 		case GRPQUOTA:
-			id = inode->i_gid;
+			qid = make_kqid_gid(inode->i_gid);
 			break;
 		}
-		got[cnt] = dqget(sb, id, cnt);
+		got[cnt] = dqget(sb, qid);
 	}
 
 	down_write(&sb_dqopt(sb)->dqptr_sem);
@@ -1898,9 +1900,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		return 0;
 
 	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
-		transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
+		transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid));
 	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
-		transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA);
+		transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid));
 
 	ret = __dquot_transfer(inode, transfer_to);
 	dqput_all(transfer_to);
@@ -2381,7 +2383,7 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
 {
 	struct dquot *dquot;
 
-	dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid));
+	dquot = dqget(sb, qid);
 	if (!dquot)
 		return -ESRCH;
 	do_get_dqblk(dquot, di);
@@ -2494,7 +2496,7 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid qid,
 	struct dquot *dquot;
 	int rc;
 
-	dquot = dqget(sb, qid.type, from_kqid(&init_user_ns, qid));
+	dquot = dqget(sb, qid);
 	if (!dquot) {
 		rc = -ESRCH;
 		goto out;
-- 
cgit 


From 4c376dcae892e5b5daf8576c864061d076d4e4dc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 03:56:19 -0700
Subject: userns: Convert struct dquot dq_id to be a struct kqid

Change struct dquot dq_id to a struct kqid and remove the now
unecessary dq_type.

Make minimal changes to dquot, quota_tree, quota_v1, quota_v2, ext3,
ext4, and ocfs2 to deal with the change in quota structures and
signatures.  The ocfs2 changes are larger than most because of the
extensive tracing throughout the ocfs2 quota code that prints out
dq_id.

quota_tree.c:get_index is modified to take a struct kqid instead of a
qid_t because all of it's callers pass in dquot->dq_id and it allows
me to introduce only a single conversion.

The rest of the changes are either just replacing dq_type with dq_id.type,
adding conversions to deal with the change in type and occassionally
adding qid_eq to allow quota id comparisons in a user namespace safe way.

Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Theodore Tso <tytso@mit.edu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/ext3/super.c         |  2 +-
 fs/ext4/super.c         |  2 +-
 fs/ocfs2/quota_global.c | 43 ++++++++++++++++++++-------------
 fs/ocfs2/quota_local.c  | 11 +++++----
 fs/quota/dquot.c        | 63 +++++++++++++++++++++++++------------------------
 fs/quota/quota_tree.c   | 22 ++++++++++-------
 fs/quota/quota_v1.c     | 12 ++++++----
 fs/quota/quota_v2.c     | 26 +++++++++++---------
 8 files changed, 101 insertions(+), 80 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ff9bcdc5b0d5..73e42f5c7009 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2814,7 +2814,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 
 static inline struct inode *dquot_to_inode(struct dquot *dquot)
 {
-	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
 }
 
 static int ext3_write_dquot(struct dquot *dquot)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d76ec8277d3f..78e6036ff244 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4796,7 +4796,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static inline struct inode *dquot_to_inode(struct dquot *dquot)
 {
-	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
 }
 
 static int ext4_write_dquot(struct dquot *dquot)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 0a86e302655f..332a281f217e 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -95,7 +95,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
 	struct ocfs2_global_disk_dqblk *d = dp;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 
-	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
 	d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
 	d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
@@ -112,11 +112,14 @@ static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
 {
 	struct ocfs2_global_disk_dqblk *d = dp;
 	struct ocfs2_mem_dqinfo *oinfo =
-			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
 
 	if (qtree_entry_unused(&oinfo->dqi_gi, dp))
 		return 0;
-	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+
+	return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type,
+				le32_to_cpu(d->dqb_id)),
+		      dquot->dq_id);
 }
 
 struct qtree_fmt_operations ocfs2_global_ops = {
@@ -475,7 +478,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
 {
 	int err, err2;
 	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
 	struct ocfs2_global_disk_dqblk dqblk;
 	s64 spacechange, inodechange;
@@ -504,7 +507,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
 	olditime = dquot->dq_dqb.dqb_itime;
 	oldbtime = dquot->dq_dqb.dqb_btime;
 	ocfs2_global_disk2memdqb(dquot, &dqblk);
-	trace_ocfs2_sync_dquot(dquot->dq_id, dquot->dq_dqb.dqb_curspace,
+	trace_ocfs2_sync_dquot(from_kqid(&init_user_ns, dquot->dq_id),
+			       dquot->dq_dqb.dqb_curspace,
 			       (long long)spacechange,
 			       dquot->dq_dqb.dqb_curinodes,
 			       (long long)inodechange);
@@ -555,8 +559,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
 	err = ocfs2_qinfo_lock(info, freeing);
 	if (err < 0) {
 		mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
-			       " (type=%d, id=%u)\n", dquot->dq_type,
-			       (unsigned)dquot->dq_id);
+			       " (type=%d, id=%u)\n", dquot->dq_id.type,
+			       (unsigned)from_kqid(&init_user_ns, dquot->dq_id));
 		goto out;
 	}
 	if (freeing)
@@ -591,9 +595,10 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 	int status = 0;
 
-	trace_ocfs2_sync_dquot_helper(dquot->dq_id, dquot->dq_type,
+	trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id),
+				      dquot->dq_id.type,
 				      type, sb->s_id);
-	if (type != dquot->dq_type)
+	if (type != dquot->dq_id.type)
 		goto out;
 	status = ocfs2_lock_global_qf(oinfo, 1);
 	if (status < 0)
@@ -643,7 +648,8 @@ static int ocfs2_write_dquot(struct dquot *dquot)
 	struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
 	int status = 0;
 
-	trace_ocfs2_write_dquot(dquot->dq_id, dquot->dq_type);
+	trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id),
+				dquot->dq_id.type);
 
 	handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
 	if (IS_ERR(handle)) {
@@ -677,11 +683,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
 {
 	handle_t *handle;
 	struct ocfs2_mem_dqinfo *oinfo =
-			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
 	struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
 	int status = 0;
 
-	trace_ocfs2_release_dquot(dquot->dq_id, dquot->dq_type);
+	trace_ocfs2_release_dquot(from_kqid(&init_user_ns, dquot->dq_id),
+				  dquot->dq_id.type);
 
 	mutex_lock(&dquot->dq_lock);
 	/* Check whether we are not racing with some other dqget() */
@@ -691,7 +698,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
 	if (status < 0)
 		goto out;
 	handle = ocfs2_start_trans(osb,
-		ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type));
+		ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type));
 	if (IS_ERR(handle)) {
 		status = PTR_ERR(handle);
 		mlog_errno(status);
@@ -733,13 +740,14 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
 	int ex = 0;
 	struct super_block *sb = dquot->dq_sb;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
 	struct inode *gqinode = info->dqi_gqinode;
 	int need_alloc = ocfs2_global_qinit_alloc(sb, type);
 	handle_t *handle;
 
-	trace_ocfs2_acquire_dquot(dquot->dq_id, type);
+	trace_ocfs2_acquire_dquot(from_kqid(&init_user_ns, dquot->dq_id),
+				  type);
 	mutex_lock(&dquot->dq_lock);
 	/*
 	 * We need an exclusive lock, because we're going to update use count
@@ -821,12 +829,13 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
 	int sync = 0;
 	int status;
 	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
 	handle_t *handle;
 	struct ocfs2_super *osb = OCFS2_SB(sb);
 
-	trace_ocfs2_mark_dquot_dirty(dquot->dq_id, type);
+	trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id),
+				     type);
 
 	/* In case user set some limits, sync dquot immediately to global
 	 * quota file so that information propagates quicker */
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 020f0ba29ee5..27fe7ee4874c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -883,7 +883,8 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
 	dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
 		+ ol_dqblk_block_offset(sb, od->dq_local_off));
 
-	dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
+	dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns,
+					      od->dq_dquot.dq_id));
 	spin_lock(&dq_data_lock);
 	dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
 					  od->dq_origspace);
@@ -893,7 +894,7 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
 	trace_olq_set_dquot(
 		(unsigned long long)le64_to_cpu(dqblk->dqb_spacemod),
 		(unsigned long long)le64_to_cpu(dqblk->dqb_inodemod),
-		od->dq_dquot.dq_id);
+		from_kqid(&init_user_ns, od->dq_dquot.dq_id));
 }
 
 /* Write dquot to local quota file */
@@ -902,7 +903,7 @@ int ocfs2_local_write_dquot(struct dquot *dquot)
 	struct super_block *sb = dquot->dq_sb;
 	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
 	struct buffer_head *bh;
-	struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type];
+	struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_id.type];
 	int status;
 
 	status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk,
@@ -1223,7 +1224,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private)
 int ocfs2_create_local_dquot(struct dquot *dquot)
 {
 	struct super_block *sb = dquot->dq_sb;
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct inode *lqinode = sb_dqopt(sb)->files[type];
 	struct ocfs2_quota_chunk *chunk;
 	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
@@ -1277,7 +1278,7 @@ out:
 int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
 {
 	int status;
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
 	struct super_block *sb = dquot->dq_sb;
 	struct ocfs2_local_disk_chunk *dchunk;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 53e377a59b05..efaeed35476f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -267,7 +267,7 @@ hashfn(const struct super_block *sb, unsigned int id, int type)
 static inline void insert_dquot_hash(struct dquot *dquot)
 {
 	struct hlist_head *head;
-	head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type);
+	head = dquot_hash + hashfn(dquot->dq_sb, from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type);
 	hlist_add_head(&dquot->dq_hash, head);
 }
 
@@ -279,13 +279,13 @@ static inline void remove_dquot_hash(struct dquot *dquot)
 static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
 				unsigned int id, int type)
 {
+	struct kqid qid = make_kqid(&init_user_ns, type, id);
 	struct hlist_node *node;
 	struct dquot *dquot;
 
 	hlist_for_each (node, dquot_hash+hashent) {
 		dquot = hlist_entry(node, struct dquot, dq_hash);
-		if (dquot->dq_sb == sb && dquot->dq_id == id &&
-		    dquot->dq_type == type)
+		if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid))
 			return dquot;
 	}
 	return NULL;
@@ -351,7 +351,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
 	spin_lock(&dq_list_lock);
 	if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
 		list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
-				info[dquot->dq_type].dqi_dirty_list);
+				info[dquot->dq_id.type].dqi_dirty_list);
 		ret = 0;
 	}
 	spin_unlock(&dq_list_lock);
@@ -410,17 +410,17 @@ int dquot_acquire(struct dquot *dquot)
 	mutex_lock(&dquot->dq_lock);
 	mutex_lock(&dqopt->dqio_mutex);
 	if (!test_bit(DQ_READ_B, &dquot->dq_flags))
-		ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot);
+		ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
 	if (ret < 0)
 		goto out_iolock;
 	set_bit(DQ_READ_B, &dquot->dq_flags);
 	/* Instantiate dquot if needed */
 	if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) {
-		ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
+		ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
 		/* Write the info if needed */
-		if (info_dirty(&dqopt->info[dquot->dq_type])) {
-			ret2 = dqopt->ops[dquot->dq_type]->write_file_info(
-						dquot->dq_sb, dquot->dq_type);
+		if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
+			ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info(
+					dquot->dq_sb, dquot->dq_id.type);
 		}
 		if (ret < 0)
 			goto out_iolock;
@@ -455,7 +455,7 @@ int dquot_commit(struct dquot *dquot)
 	/* Inactive dquot can be only if there was error during read/init
 	 * => we have better not writing it */
 	if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
-		ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
+		ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
 	else
 		ret = -EIO;
 out_sem:
@@ -477,12 +477,12 @@ int dquot_release(struct dquot *dquot)
 	if (atomic_read(&dquot->dq_count) > 1)
 		goto out_dqlock;
 	mutex_lock(&dqopt->dqio_mutex);
-	if (dqopt->ops[dquot->dq_type]->release_dqblk) {
-		ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot);
+	if (dqopt->ops[dquot->dq_id.type]->release_dqblk) {
+		ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot);
 		/* Write the info */
-		if (info_dirty(&dqopt->info[dquot->dq_type])) {
-			ret2 = dqopt->ops[dquot->dq_type]->write_file_info(
-						dquot->dq_sb, dquot->dq_type);
+		if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
+			ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info(
+						dquot->dq_sb, dquot->dq_id.type);
 		}
 		if (ret >= 0)
 			ret = ret2;
@@ -521,7 +521,7 @@ restart:
 	list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
 		if (dquot->dq_sb != sb)
 			continue;
-		if (dquot->dq_type != type)
+		if (dquot->dq_id.type != type)
 			continue;
 		/* Wait for dquot users */
 		if (atomic_read(&dquot->dq_count)) {
@@ -741,7 +741,8 @@ void dqput(struct dquot *dquot)
 #ifdef CONFIG_QUOTA_DEBUG
 	if (!atomic_read(&dquot->dq_count)) {
 		quota_error(dquot->dq_sb, "trying to free free dquot of %s %d",
-			    quotatypes[dquot->dq_type], dquot->dq_id);
+			    quotatypes[dquot->dq_id.type],
+			    from_kqid(&init_user_ns, dquot->dq_id));
 		BUG();
 	}
 #endif
@@ -752,7 +753,7 @@ we_slept:
 		/* We have more than one user... nothing to do */
 		atomic_dec(&dquot->dq_count);
 		/* Releasing dquot during quotaoff phase? */
-		if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
+		if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) &&
 		    atomic_read(&dquot->dq_count) == 1)
 			wake_up(&dquot->dq_wait_unused);
 		spin_unlock(&dq_list_lock);
@@ -815,7 +816,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 	INIT_LIST_HEAD(&dquot->dq_dirty);
 	init_waitqueue_head(&dquot->dq_wait_unused);
 	dquot->dq_sb = sb;
-	dquot->dq_type = type;
+	dquot->dq_id.type = type;
 	atomic_set(&dquot->dq_count, 1);
 
 	return dquot;
@@ -859,7 +860,7 @@ we_slept:
 		}
 		dquot = empty;
 		empty = NULL;
-		dquot->dq_id = id;
+		dquot->dq_id = qid;
 		/* all dquots go on the inuse_list */
 		put_inuse(dquot);
 		/* hash it first so it can be found */
@@ -1219,8 +1220,8 @@ static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot,
 		return;
 	warn->w_type = warntype;
 	warn->w_sb = dquot->dq_sb;
-	warn->w_dq_id = dquot->dq_id;
-	warn->w_dq_type = dquot->dq_type;
+	warn->w_dq_id = from_kqid(&init_user_ns, dquot->dq_id);
+	warn->w_dq_type = dquot->dq_id.type;
 }
 
 /*
@@ -1245,7 +1246,7 @@ static void flush_warnings(struct dquot_warn *warn)
 
 static int ignore_hardlimit(struct dquot *dquot)
 {
-	struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
+	struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
 
 	return capable(CAP_SYS_RESOURCE) &&
 	       (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
@@ -1258,7 +1259,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
 {
 	qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
 
-	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+	if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return 0;
 
@@ -1283,7 +1284,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
 	    dquot->dq_dqb.dqb_itime == 0) {
 		prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN);
 		dquot->dq_dqb.dqb_itime = get_seconds() +
-		    sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
+		    sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace;
 	}
 
 	return 0;
@@ -1296,7 +1297,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
 	qsize_t tspace;
 	struct super_block *sb = dquot->dq_sb;
 
-	if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) ||
+	if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
 	    test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return 0;
 
@@ -1327,7 +1328,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
 		if (!prealloc) {
 			prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
 			dquot->dq_dqb.dqb_btime = get_seconds() +
-			    sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace;
+			    sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace;
 		}
 		else
 			/*
@@ -1346,7 +1347,7 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes)
 
 	if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
 	    dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
-	    !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type))
+	    !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type))
 		return QUOTA_NL_NOWARN;
 
 	newinodes = dquot->dq_dqb.dqb_curinodes - inodes;
@@ -2362,9 +2363,9 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 
 	memset(di, 0, sizeof(*di));
 	di->d_version = FS_DQUOT_VERSION;
-	di->d_flags = dquot->dq_type == USRQUOTA ?
+	di->d_flags = dquot->dq_id.type == USRQUOTA ?
 			FS_USER_QUOTA : FS_GROUP_QUOTA;
-	di->d_id = dquot->dq_id;
+	di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id);
 
 	spin_lock(&dq_data_lock);
 	di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
@@ -2403,7 +2404,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 	int check_blim = 0, check_ilim = 0;
-	struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
+	struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
 
 	if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
 		return -EINVAL;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index e41c1becf096..d65877fbe8f4 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -22,9 +22,10 @@ MODULE_LICENSE("GPL");
 
 #define __QUOTA_QT_PARANOIA
 
-static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
 {
 	unsigned int epb = info->dqi_usable_bs >> 2;
+	qid_t id = from_kqid(&init_user_ns, qid);
 
 	depth = info->dqi_qtree_depth - depth - 1;
 	while (depth--)
@@ -244,7 +245,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
 		/* This is enough as the block is already zeroed and the entry
 		 * list is empty... */
 		info->dqi_free_entry = blk;
-		mark_info_dirty(dquot->dq_sb, dquot->dq_type);
+		mark_info_dirty(dquot->dq_sb, dquot->dq_id.type);
 	}
 	/* Block will be full? */
 	if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
@@ -357,7 +358,7 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
  */
 int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 {
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct super_block *sb = dquot->dq_sb;
 	ssize_t ret;
 	char *ddquot = getdqbuf(info->dqi_entry_size);
@@ -538,8 +539,9 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
 		ddquot += info->dqi_entry_size;
 	}
 	if (i == qtree_dqstr_in_blk(info)) {
-		quota_error(dquot->dq_sb, "Quota for id %u referenced "
-			    "but not present", dquot->dq_id);
+		quota_error(dquot->dq_sb,
+			    "Quota for id %u referenced but not present",
+			    from_kqid(&init_user_ns, dquot->dq_id));
 		ret = -EIO;
 		goto out_buf;
 	} else {
@@ -589,7 +591,7 @@ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
 
 int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 {
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct super_block *sb = dquot->dq_sb;
 	loff_t offset;
 	char *ddquot;
@@ -607,8 +609,10 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 		offset = find_dqentry(info, dquot);
 		if (offset <= 0) {	/* Entry not present? */
 			if (offset < 0)
-				quota_error(sb, "Can't read quota structure "
-					    "for id %u", dquot->dq_id);
+				quota_error(sb,"Can't read quota structure "
+					    "for id %u",
+					    from_kqid(&init_user_ns,
+						      dquot->dq_id));
 			dquot->dq_off = 0;
 			set_bit(DQ_FAKE_B, &dquot->dq_flags);
 			memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
@@ -626,7 +630,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 		if (ret >= 0)
 			ret = -EIO;
 		quota_error(sb, "Error while reading quota structure for id %u",
-			    dquot->dq_id);
+			    from_kqid(&init_user_ns, dquot->dq_id));
 		set_bit(DQ_FAKE_B, &dquot->dq_flags);
 		memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
 		kfree(ddquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 34b37a67bb16..469c6848b322 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -54,7 +54,7 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
 
 static int v1_read_dqblk(struct dquot *dquot)
 {
-	int type = dquot->dq_type;
+	int type = dquot->dq_id.type;
 	struct v1_disk_dqblk dqblk;
 
 	if (!sb_dqopt(dquot->dq_sb)->files[type])
@@ -63,7 +63,8 @@ static int v1_read_dqblk(struct dquot *dquot)
 	/* Set structure to 0s in case read fails/is after end of file */
 	memset(&dqblk, 0, sizeof(struct v1_disk_dqblk));
 	dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk,
-			sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id));
+			sizeof(struct v1_disk_dqblk),
+			v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id)));
 
 	v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk);
 	if (dquot->dq_dqb.dqb_bhardlimit == 0 &&
@@ -78,12 +79,13 @@ static int v1_read_dqblk(struct dquot *dquot)
 
 static int v1_commit_dqblk(struct dquot *dquot)
 {
-	short type = dquot->dq_type;
+	short type = dquot->dq_id.type;
 	ssize_t ret;
 	struct v1_disk_dqblk dqblk;
 
 	v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
-	if (dquot->dq_id == 0) {
+	if (((type == USRQUOTA) && uid_eq(dquot->dq_id.uid, GLOBAL_ROOT_UID)) ||
+	    ((type == GRPQUOTA) && gid_eq(dquot->dq_id.gid, GLOBAL_ROOT_GID))) {
 		dqblk.dqb_btime =
 			sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
 		dqblk.dqb_itime =
@@ -93,7 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
 	if (sb_dqopt(dquot->dq_sb)->files[type])
 		ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
 			(char *)&dqblk, sizeof(struct v1_disk_dqblk),
-			v1_dqoff(dquot->dq_id));
+			v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id)));
 	if (ret != sizeof(struct v1_disk_dqblk)) {
 		quota_error(dquot->dq_sb, "dquota write failed");
 		if (ret >= 0)
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index f1ab3604db5a..02751ec695c5 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -196,7 +196,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
 	struct v2r0_disk_dqblk *d = dp;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 	struct qtree_mem_dqinfo *info =
-			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
 
 	d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
@@ -206,7 +206,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
 	d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
 	if (qtree_entry_unused(info, dp))
 		d->dqb_itime = cpu_to_le64(1);
 }
@@ -215,11 +215,13 @@ static int v2r0_is_id(void *dp, struct dquot *dquot)
 {
 	struct v2r0_disk_dqblk *d = dp;
 	struct qtree_mem_dqinfo *info =
-			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
 
 	if (qtree_entry_unused(info, dp))
 		return 0;
-	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+	return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type,
+				le32_to_cpu(d->dqb_id)),
+		      dquot->dq_id);
 }
 
 static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
@@ -247,7 +249,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
 	struct v2r1_disk_dqblk *d = dp;
 	struct mem_dqblk *m = &dquot->dq_dqb;
 	struct qtree_mem_dqinfo *info =
-			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
 
 	d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
 	d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
@@ -257,7 +259,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
 	d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
 	d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
 	d->dqb_btime = cpu_to_le64(m->dqb_btime);
-	d->dqb_id = cpu_to_le32(dquot->dq_id);
+	d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
 	if (qtree_entry_unused(info, dp))
 		d->dqb_itime = cpu_to_le64(1);
 }
@@ -266,26 +268,28 @@ static int v2r1_is_id(void *dp, struct dquot *dquot)
 {
 	struct v2r1_disk_dqblk *d = dp;
 	struct qtree_mem_dqinfo *info =
-			sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+			sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
 
 	if (qtree_entry_unused(info, dp))
 		return 0;
-	return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+	return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type,
+				le32_to_cpu(d->dqb_id)),
+		      dquot->dq_id);
 }
 
 static int v2_read_dquot(struct dquot *dquot)
 {
-	return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
+	return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
 }
 
 static int v2_write_dquot(struct dquot *dquot)
 {
-	return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
+	return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
 }
 
 static int v2_release_dquot(struct dquot *dquot)
 {
-	return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
+	return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
 }
 
 static int v2_free_file_info(struct super_block *sb, int type)
-- 
cgit 


From 7b9c7321ca6b13d2baeb82b3c9c9f78e9885bcf5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 04:05:34 -0700
Subject: userns: Convert struct dquot_warn

Convert w_dq_id to be a struct kquid and remove the now unncessary
w_dq_type.

This is a simple conversion and enough other places have already
been converted that this actually reduces the code complexity
by a little bit, when removing now unnecessary type conversions.

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/quota/dquot.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index efaeed35476f..f3db8456dc8a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1132,8 +1132,7 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number)
 
 struct dquot_warn {
 	struct super_block *w_sb;
-	qid_t w_dq_id;
-	short w_dq_type;
+	struct kqid w_dq_id;
 	short w_type;
 };
 
@@ -1157,11 +1156,11 @@ static int need_print_warning(struct dquot_warn *warn)
 	if (!flag_print_warnings)
 		return 0;
 
-	switch (warn->w_dq_type) {
+	switch (warn->w_dq_id.type) {
 		case USRQUOTA:
-			return current_fsuid() == warn->w_dq_id;
+			return current_fsuid() == warn->w_dq_id.uid;
 		case GRPQUOTA:
-			return in_group_p(warn->w_dq_id);
+			return in_group_p(warn->w_dq_id.gid);
 	}
 	return 0;
 }
@@ -1187,7 +1186,7 @@ static void print_warning(struct dquot_warn *warn)
 		tty_write_message(tty, ": warning, ");
 	else
 		tty_write_message(tty, ": write failed, ");
-	tty_write_message(tty, quotatypes[warn->w_dq_type]);
+	tty_write_message(tty, quotatypes[warn->w_dq_id.type]);
 	switch (warntype) {
 		case QUOTA_NL_IHARDWARN:
 			msg = " file limit reached.\r\n";
@@ -1220,8 +1219,7 @@ static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot,
 		return;
 	warn->w_type = warntype;
 	warn->w_sb = dquot->dq_sb;
-	warn->w_dq_id = from_kqid(&init_user_ns, dquot->dq_id);
-	warn->w_dq_type = dquot->dq_id.type;
+	warn->w_dq_id = dquot->dq_id;
 }
 
 /*
@@ -1239,7 +1237,7 @@ static void flush_warnings(struct dquot_warn *warn)
 #ifdef CONFIG_PRINT_QUOTA_WARNING
 		print_warning(&warn[i]);
 #endif
-		quota_send_warning(make_kqid(&init_user_ns, warn[i].w_dq_type, warn[i].w_dq_id),
+		quota_send_warning(warn[i].w_dq_id,
 				   warn[i].w_sb->s_dev, warn[i].w_type);
 	}
 }
-- 
cgit 


From 1a06d420ce9d60b98f5bdf5fd6e4200abfbd3c35 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 Sep 2012 05:45:30 -0700
Subject: userns: Convert quota

Now that the type changes are done, here is the final set of
changes to make the quota code work when user namespaces are enabled.

Small cleanups and fixes to make the code build when user namespaces
are enabled.

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/quota/dquot.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f3db8456dc8a..c4564d0a4a9b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -253,8 +253,10 @@ static qsize_t inode_get_rsv_space(struct inode *inode);
 static void __dquot_initialize(struct inode *inode, int type);
 
 static inline unsigned int
-hashfn(const struct super_block *sb, unsigned int id, int type)
+hashfn(const struct super_block *sb, struct kqid qid)
 {
+	unsigned int id = from_kqid(&init_user_ns, qid);
+	int type = qid.type;
 	unsigned long tmp;
 
 	tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
@@ -267,7 +269,7 @@ hashfn(const struct super_block *sb, unsigned int id, int type)
 static inline void insert_dquot_hash(struct dquot *dquot)
 {
 	struct hlist_head *head;
-	head = dquot_hash + hashfn(dquot->dq_sb, from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type);
+	head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id);
 	hlist_add_head(&dquot->dq_hash, head);
 }
 
@@ -277,9 +279,8 @@ static inline void remove_dquot_hash(struct dquot *dquot)
 }
 
 static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
-				unsigned int id, int type)
+				struct kqid qid)
 {
-	struct kqid qid = make_kqid(&init_user_ns, type, id);
 	struct hlist_node *node;
 	struct dquot *dquot;
 
@@ -816,7 +817,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 	INIT_LIST_HEAD(&dquot->dq_dirty);
 	init_waitqueue_head(&dquot->dq_wait_unused);
 	dquot->dq_sb = sb;
-	dquot->dq_id.type = type;
+	dquot->dq_id = make_kqid_invalid(type);
 	atomic_set(&dquot->dq_count, 1);
 
 	return dquot;
@@ -832,28 +833,26 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
  */
 struct dquot *dqget(struct super_block *sb, struct kqid qid)
 {
-	unsigned int type = qid.type;
-	unsigned int id = from_kqid(&init_user_ns, qid);
-	unsigned int hashent = hashfn(sb, id, type);
+	unsigned int hashent = hashfn(sb, qid);
 	struct dquot *dquot = NULL, *empty = NULL;
 
-        if (!sb_has_quota_active(sb, type))
+        if (!sb_has_quota_active(sb, qid.type))
 		return NULL;
 we_slept:
 	spin_lock(&dq_list_lock);
 	spin_lock(&dq_state_lock);
-	if (!sb_has_quota_active(sb, type)) {
+	if (!sb_has_quota_active(sb, qid.type)) {
 		spin_unlock(&dq_state_lock);
 		spin_unlock(&dq_list_lock);
 		goto out;
 	}
 	spin_unlock(&dq_state_lock);
 
-	dquot = find_dquot(hashent, sb, id, type);
+	dquot = find_dquot(hashent, sb, qid);
 	if (!dquot) {
 		if (!empty) {
 			spin_unlock(&dq_list_lock);
-			empty = get_empty_dquot(sb, type);
+			empty = get_empty_dquot(sb, qid.type);
 			if (!empty)
 				schedule();	/* Try to wait for a moment... */
 			goto we_slept;
@@ -1158,7 +1157,7 @@ static int need_print_warning(struct dquot_warn *warn)
 
 	switch (warn->w_dq_id.type) {
 		case USRQUOTA:
-			return current_fsuid() == warn->w_dq_id.uid;
+			return uid_eq(current_fsuid(), warn->w_dq_id.uid);
 		case GRPQUOTA:
 			return in_group_p(warn->w_dq_id.gid);
 	}
@@ -1898,9 +1897,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 	if (!dquot_active(inode))
 		return 0;
 
-	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
+	if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid))
 		transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid));
-	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
+	if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))
 		transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid));
 
 	ret = __dquot_transfer(inode, transfer_to);
-- 
cgit 


From 84f4141ee3ea11035f741d4298cb6bbad1850fcf Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Tue, 18 Sep 2012 11:27:22 -0500
Subject: jfs: Fix do_div precision in commit b40c2e66

In a hasty fix to replace a 64-bit division with do_div, I
unintentionally assigned the divisor to a 32-bit variable.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Tino Reichardt <milky-kernel@mcmilk.de>
---
 fs/jfs/jfs_dmap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 174feb6a73c1..9a55f53be5ff 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1641,14 +1641,15 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
 
 	/* max blkno / nblocks pairs to trim */
 	int count = 0, range_cnt;
+	u64 max_ranges;
 
 	/* prevent others from writing new stuff here, while trimming */
 	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
 
 	nblocks = bmp->db_agfree[agno];
-	range_cnt = nblocks;
-	do_div(range_cnt, (int)minlen);
-	range_cnt = min(range_cnt + 1, 32 * 1024);
+	max_ranges = nblocks;
+	do_div(max_ranges, minlen);
+	range_cnt = min_t(u64, max_ranges + 1, 32 * 1024);
 	totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS);
 	if (totrim == NULL) {
 		jfs_error(bmp->db_ipbmap->i_sb,
-- 
cgit 


From 170782eb89462d30302cec12378253115b492b38 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:25:39 -0800
Subject: userns: Convert fat to use kuid/kgid where appropriate

Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/fat/fat.h   |  4 ++--
 fs/fat/file.c  |  6 +++---
 fs/fat/inode.c | 18 ++++++++++++------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 2deeeb86f331..7d8e0dcac5d5 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -23,8 +23,8 @@
 #define FAT_ERRORS_RO		3      /* remount r/o on error */
 
 struct fat_mount_options {
-	uid_t fs_uid;
-	gid_t fs_gid;
+	kuid_t fs_uid;
+	kgid_t fs_gid;
 	unsigned short fs_fmask;
 	unsigned short fs_dmask;
 	unsigned short codepage;  /* Codepage for shortname conversions */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index e007b8bd8e5e..a62e0ecbe2db 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -352,7 +352,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 {
 	umode_t allow_utime = sbi->options.allow_utime;
 
-	if (current_fsuid() != inode->i_uid) {
+	if (!uid_eq(current_fsuid(), inode->i_uid)) {
 		if (in_group_p(inode->i_gid))
 			allow_utime >>= 3;
 		if (allow_utime & MAY_WRITE)
@@ -407,9 +407,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (((attr->ia_valid & ATTR_UID) &&
-	     (attr->ia_uid != sbi->options.fs_uid)) ||
+	     (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
 	    ((attr->ia_valid & ATTR_GID) &&
-	     (attr->ia_gid != sbi->options.fs_gid)) ||
+	     (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
 	    ((attr->ia_valid & ATTR_MODE) &&
 	     (attr->ia_mode & ~FAT_VALID_MODE)))
 		error = -EPERM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 05e897fe9866..47d9eb0be886 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -791,10 +791,12 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
 	struct fat_mount_options *opts = &sbi->options;
 	int isvfat = opts->isvfat;
 
-	if (opts->fs_uid != 0)
-		seq_printf(m, ",uid=%u", opts->fs_uid);
-	if (opts->fs_gid != 0)
-		seq_printf(m, ",gid=%u", opts->fs_gid);
+	if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID))
+		seq_printf(m, ",uid=%u",
+				from_kuid_munged(&init_user_ns, opts->fs_uid));
+	if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID))
+		seq_printf(m, ",gid=%u",
+				from_kgid_munged(&init_user_ns, opts->fs_gid));
 	seq_printf(m, ",fmask=%04o", opts->fs_fmask);
 	seq_printf(m, ",dmask=%04o", opts->fs_dmask);
 	if (opts->allow_utime)
@@ -1037,12 +1039,16 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			opts->fs_uid = option;
+			opts->fs_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(opts->fs_uid))
+				return 0;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			opts->fs_gid = option;
+			opts->fs_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(opts->fs_gid))
+				return 0;
 			break;
 		case Opt_umask:
 			if (match_octal(&args[0], &option))
-- 
cgit 


From 44a075bde9ab32d13bb2ff89f3e72fd869f272c4 Mon Sep 17 00:00:00 2001
From: Wang Sheng-Hui <shhuiw@gmail.com>
Date: Fri, 21 Sep 2012 08:21:01 +0800
Subject: btrfs: fix the commment for the action flags in delayed-ref.h

The action field has been merged into struct btrfs_delayed_ref_node,
and no struct btrfs_delayed_ref is available now.

Signed-off-by: Wang Sheng-Hui <shhuiw@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/btrfs/delayed-ref.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 413927fb9957..25589456d1c9 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -18,7 +18,7 @@
 #ifndef __DELAYED_REF__
 #define __DELAYED_REF__
 
-/* these are the possible values of struct btrfs_delayed_ref->action */
+/* these are the possible values of struct btrfs_delayed_ref_node->action */
 #define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
 #define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
 #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
-- 
cgit 


From 7dfd8cc536dc4984ee92d9a8863ea705b955bc37 Mon Sep 17 00:00:00 2001
From: Liu Bo <liub.liubo@gmail.com>
Date: Thu, 20 Sep 2012 15:49:57 +0800
Subject: fs/fs-writeback.c: cleanup riteback_sb_inodes kerneldoc

Argument @only_this_sb has been removed.

Signed-off-by: Liu Bo <liub.liubo@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/fs-writeback.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8f660dd6137a..3451d3769598 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -582,10 +582,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi,
 /*
  * Write a portion of b_io inodes which belong to @sb.
  *
- * If @only_this_sb is true, then find and write all such
- * inodes. Otherwise write only ones which go sequentially
- * in reverse order.
- *
  * Return the number of pages and/or inodes written.
  */
 static long writeback_sb_inodes(struct super_block *sb,
-- 
cgit 


From a0eb3a05a8cbe9cd1a41dde3d1b2e5bcc10634f2 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:19:25 -0800
Subject: userns: Convert hugetlbfs to use kuid/kgid where appropriate

Note sysctl_hugetlb_shm_group can only be written in the root user
in the initial user namespace, so we can assume sysctl_hugetlb_shm_group
is in the initial user namespace.

Cc: William Irwin <wli@holomorphy.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hugetlbfs/inode.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8349a899912e..6e572c4fbf68 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -42,8 +42,8 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
 static const struct inode_operations hugetlbfs_inode_operations;
 
 struct hugetlbfs_config {
-	uid_t   uid;
-	gid_t   gid;
+	kuid_t   uid;
+	kgid_t   gid;
 	umode_t mode;
 	long	nr_blocks;
 	long	nr_inodes;
@@ -785,13 +785,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
  				goto bad_val;
-			pconfig->uid = option;
+			pconfig->uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(pconfig->uid))
+				goto bad_val;
 			break;
 
 		case Opt_gid:
 			if (match_int(&args[0], &option))
  				goto bad_val;
-			pconfig->gid = option;
+			pconfig->gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(pconfig->gid))
+				goto bad_val;
 			break;
 
 		case Opt_mode:
@@ -924,7 +928,9 @@ static struct vfsmount *hugetlbfs_vfsmount;
 
 static int can_do_hugetlb_shm(void)
 {
-	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
+	kgid_t shm_group;
+	shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group);
+	return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
 }
 
 struct file *hugetlb_file_setup(const char *name, unsigned long addr,
-- 
cgit 


From c010d1ff4f69c9f4aa331dfd8266262fb1b478ce Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 15:58:38 -0800
Subject: userns: Convert adfs to use kuid and kgid where appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/adfs/adfs.h  |  4 ++--
 fs/adfs/inode.c |  4 ++--
 fs/adfs/super.c | 21 +++++++++++++--------
 3 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 718ac1f440c6..585adafb0cc2 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -46,8 +46,8 @@ struct adfs_sb_info {
 	struct adfs_discmap *s_map;	/* bh list containing map		 */
 	struct adfs_dir_ops *s_dir;	/* directory operations			 */
 
-	uid_t		s_uid;		/* owner uid				 */
-	gid_t		s_gid;		/* owner gid				 */
+	kuid_t		s_uid;		/* owner uid				 */
+	kgid_t		s_gid;		/* owner gid				 */
 	umode_t		s_owner_mask;	/* ADFS owner perm -> unix perm		 */
 	umode_t		s_other_mask;	/* ADFS other perm -> unix perm		 */
 	int		s_ftsuffix;	/* ,xyz hex filetype suffix option */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 1dab6a174d6a..e9bad5093a3f 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -304,8 +304,8 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
 	 * we can't change the UID or GID of any file -
 	 * we have a global UID/GID in the superblock
 	 */
-	if ((ia_valid & ATTR_UID && attr->ia_uid != ADFS_SB(sb)->s_uid) ||
-	    (ia_valid & ATTR_GID && attr->ia_gid != ADFS_SB(sb)->s_gid))
+	if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, ADFS_SB(sb)->s_uid)) ||
+	    (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, ADFS_SB(sb)->s_gid)))
 		error = -EPERM;
 
 	if (error)
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index bdaec92353c2..22a0d7ed5fa1 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/statfs.h>
+#include <linux/user_namespace.h>
 #include "adfs.h"
 #include "dir_f.h"
 #include "dir_fplus.h"
@@ -130,10 +131,10 @@ static int adfs_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
 
-	if (asb->s_uid != 0)
-		seq_printf(seq, ",uid=%u", asb->s_uid);
-	if (asb->s_gid != 0)
-		seq_printf(seq, ",gid=%u", asb->s_gid);
+	if (!uid_eq(asb->s_uid, GLOBAL_ROOT_UID))
+		seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, asb->s_uid));
+	if (!gid_eq(asb->s_gid, GLOBAL_ROOT_GID))
+		seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, asb->s_gid));
 	if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK)
 		seq_printf(seq, ",ownmask=%o", asb->s_owner_mask);
 	if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK)
@@ -175,12 +176,16 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_uid:
 			if (match_int(args, &option))
 				return -EINVAL;
-			asb->s_uid = option;
+			asb->s_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(asb->s_uid))
+				return -EINVAL;
 			break;
 		case Opt_gid:
 			if (match_int(args, &option))
 				return -EINVAL;
-			asb->s_gid = option;
+			asb->s_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(asb->s_gid))
+				return -EINVAL;
 			break;
 		case Opt_ownmask:
 			if (match_octal(args, &option))
@@ -369,8 +374,8 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = asb;
 
 	/* set default options */
-	asb->s_uid = 0;
-	asb->s_gid = 0;
+	asb->s_uid = GLOBAL_ROOT_UID;
+	asb->s_gid = GLOBAL_ROOT_GID;
 	asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
 	asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
 	asb->s_ftsuffix = 0;
-- 
cgit 


From 31aba059bb9522ff730301a7d567117fcff0952c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 10:51:24 -0800
Subject: userns: Convert befs to use kuid/kgid where appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/befs/befs.h     |  4 ++--
 fs/befs/linuxvfs.c | 27 +++++++++++++++++++--------
 2 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index d9a40abda6b7..b26642839156 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -20,8 +20,8 @@ typedef u64 befs_blocknr_t;
  */
 
 typedef struct befs_mount_options {
-	gid_t gid;
-	uid_t uid;
+	kgid_t gid;
+	kuid_t uid;
 	int use_gid;
 	int use_uid;
 	int debug;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index cf7f3c67c8b7..7f73a692bfd0 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -15,6 +15,7 @@
 #include <linux/vfs.h>
 #include <linux/parser.h>
 #include <linux/namei.h>
+#include <linux/sched.h>
 
 #include "befs.h"
 #include "btree.h"
@@ -352,9 +353,11 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	 */   
 
 	inode->i_uid = befs_sb->mount_opts.use_uid ?
-	    befs_sb->mount_opts.uid : (uid_t) fs32_to_cpu(sb, raw_inode->uid);
+		befs_sb->mount_opts.uid :
+		make_kuid(&init_user_ns, fs32_to_cpu(sb, raw_inode->uid));
 	inode->i_gid = befs_sb->mount_opts.use_gid ?
-	    befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid);
+		befs_sb->mount_opts.gid :
+		make_kgid(&init_user_ns, fs32_to_cpu(sb, raw_inode->gid));
 
 	set_nlink(inode, 1);
 
@@ -674,10 +677,12 @@ parse_options(char *options, befs_mount_options * opts)
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
 	int option;
+	kuid_t uid;
+	kgid_t gid;
 
 	/* Initialize options */
-	opts->uid = 0;
-	opts->gid = 0;
+	opts->uid = GLOBAL_ROOT_UID;
+	opts->gid = GLOBAL_ROOT_GID;
 	opts->use_uid = 0;
 	opts->use_gid = 0;
 	opts->iocharset = NULL;
@@ -696,23 +701,29 @@ parse_options(char *options, befs_mount_options * opts)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			if (option < 0) {
+			uid = INVALID_UID;
+			if (option >= 0)
+				uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(uid)) {
 				printk(KERN_ERR "BeFS: Invalid uid %d, "
 						"using default\n", option);
 				break;
 			}
-			opts->uid = option;
+			opts->uid = uid;
 			opts->use_uid = 1;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			if (option < 0) {
+			gid = INVALID_GID;
+			if (option >= 0)
+				gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(gid)) {
 				printk(KERN_ERR "BeFS: Invalid gid %d, "
 						"using default\n", option);
 				break;
 			}
-			opts->gid = option;
+			opts->gid = gid;
 			opts->use_gid = 1;
 			break;
 		case Opt_charset:
-- 
cgit 


From a7d9cfe97b450b27c82e6e41b2fde6214708560d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:06:08 -0800
Subject: userns: Convert cramfs to use kuid/kgid where appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/cramfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 28cca01ca9c9..c6c3f91ecf06 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -90,8 +90,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
 	}
 
 	inode->i_mode = cramfs_inode->mode;
-	inode->i_uid = cramfs_inode->uid;
-	inode->i_gid = cramfs_inode->gid;
+	i_uid_write(inode, cramfs_inode->uid);
+	i_gid_write(inode, cramfs_inode->gid);
 
 	/* if the lower 2 bits are zero, the inode contains data */
 	if (!(inode->i_ino & 3)) {
-- 
cgit 


From cdf8c58a3546443073cab77d07a88ee439319c19 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:24:33 -0800
Subject: userns: Convert ecryptfs to use kuid/kgid where appropriate

Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Dustin Kirkland <dustin.kirkland@gazzang.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/ecryptfs/main.c      | 5 +++--
 fs/ecryptfs/messaging.c | 5 ++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 2768138eefee..1d6ce91b7061 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -544,11 +544,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 		goto out_free;
 	}
 
-	if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) {
+	if (check_ruid && !uid_eq(path.dentry->d_inode->i_uid, current_uid())) {
 		rc = -EPERM;
 		printk(KERN_ERR "Mount of device (uid: %d) not owned by "
 		       "requested user (uid: %d)\n",
-		       path.dentry->d_inode->i_uid, current_uid());
+			i_uid_read(path.dentry->d_inode),
+			from_kuid(&init_user_ns, current_uid()));
 		goto out_free;
 	}
 
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index b29bb8bfa8d9..5fa2471796c2 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -33,7 +33,7 @@ static struct hlist_head *ecryptfs_daemon_hash;
 struct mutex ecryptfs_daemon_hash_mux;
 static int ecryptfs_hash_bits;
 #define ecryptfs_current_euid_hash(uid) \
-		hash_long((unsigned long)current_euid(), ecryptfs_hash_bits)
+	hash_long((unsigned long)from_kuid(&init_user_ns, current_euid()), ecryptfs_hash_bits)
 
 static u32 ecryptfs_msg_counter;
 static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -121,8 +121,7 @@ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
 	hlist_for_each_entry(*daemon, elem,
 			    &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
 			    euid_chain) {
-		if ((*daemon)->file->f_cred->euid == current_euid() &&
-		    (*daemon)->file->f_cred->user_ns == current_user_ns()) {
+		if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) {
 			rc = 0;
 			goto out;
 		}
-- 
cgit 


From 5d4ea4da6a6fb5c91e433db4addf3bb23866d821 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:10:33 -0800
Subject: userns: Convert efs to use kuid/kgid where appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/efs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index bc84f365d75c..f3913eb2c474 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -97,8 +97,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
     
 	inode->i_mode  = be16_to_cpu(efs_inode->di_mode);
 	set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
-	inode->i_uid   = (uid_t)be16_to_cpu(efs_inode->di_uid);
-	inode->i_gid   = (gid_t)be16_to_cpu(efs_inode->di_gid);
+	i_uid_write(inode, (uid_t)be16_to_cpu(efs_inode->di_uid));
+	i_gid_write(inode, (gid_t)be16_to_cpu(efs_inode->di_gid));
 	inode->i_size  = be32_to_cpu(efs_inode->di_size);
 	inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime);
 	inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime);
-- 
cgit 


From d001b0536562f816af7eb9947e49de58f504958a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:11:19 -0800
Subject: userns: Convert exofs to use kuid/kgid where appropriate

Cc: Benny Halevy <bhalevy@tonian.com>
Acked-by: Boaz Harrosh <bharrosh@panasas.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/exofs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5badb0c039de..190c3d69e569 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1163,8 +1163,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
 
 	/* copy stuff from on-disk struct to in-memory struct */
 	inode->i_mode = le16_to_cpu(fcb.i_mode);
-	inode->i_uid = le32_to_cpu(fcb.i_uid);
-	inode->i_gid = le32_to_cpu(fcb.i_gid);
+	i_uid_write(inode, le32_to_cpu(fcb.i_uid));
+	i_gid_write(inode, le32_to_cpu(fcb.i_gid));
 	set_nlink(inode, le16_to_cpu(fcb.i_links_count));
 	inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
 	inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
@@ -1376,8 +1376,8 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
 	fcb = &args->fcb;
 
 	fcb->i_mode = cpu_to_le16(inode->i_mode);
-	fcb->i_uid = cpu_to_le32(inode->i_uid);
-	fcb->i_gid = cpu_to_le32(inode->i_gid);
+	fcb->i_uid = cpu_to_le32(i_uid_read(inode));
+	fcb->i_gid = cpu_to_le32(i_gid_read(inode));
 	fcb->i_links_count = cpu_to_le16(inode->i_nlink);
 	fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
 	fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
-- 
cgit 


From 43b5e4ccd463a5b42e0531c32628ac462081cab8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:26:59 -0800
Subject: userns: Convert hfs to use kuid and kgid where appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hfs/hfs_fs.h |  4 ++--
 fs/hfs/inode.c  |  4 ++--
 fs/hfs/super.c  | 16 +++++++++++++---
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 8275175acf6e..693df9fe52b2 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -134,8 +134,8 @@ struct hfs_sb_info {
 						   permissions on all files */
 	umode_t s_dir_umask;			/* The umask applied to the
 						   permissions on all dirs */
-	uid_t s_uid;				/* The uid of all files */
-	gid_t s_gid;				/* The gid of all files */
+	kuid_t s_uid;				/* The uid of all files */
+	kgid_t s_gid;				/* The gid of all files */
 
 	int session, part;
 	struct nls_table *nls_io, *nls_disk;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index ee1bc55677f1..5d5c22da1960 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -594,9 +594,9 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 
 	/* no uig/gid changes and limit which mode bits can be set */
 	if (((attr->ia_valid & ATTR_UID) &&
-	     (attr->ia_uid != hsb->s_uid)) ||
+	     (!uid_eq(attr->ia_uid, hsb->s_uid))) ||
 	    ((attr->ia_valid & ATTR_GID) &&
-	     (attr->ia_gid != hsb->s_gid)) ||
+	     (!gid_eq(attr->ia_gid, hsb->s_gid))) ||
 	    ((attr->ia_valid & ATTR_MODE) &&
 	     ((S_ISDIR(inode->i_mode) &&
 	       (attr->ia_mode != inode->i_mode)) ||
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4eb873e0c07b..0b63d135a092 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -138,7 +138,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
 	if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
 		seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
-	seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid);
+	seq_printf(seq, ",uid=%u,gid=%u",
+			from_kuid_munged(&init_user_ns, sbi->s_uid),
+			from_kgid_munged(&init_user_ns, sbi->s_gid));
 	if (sbi->s_file_umask != 0133)
 		seq_printf(seq, ",file_umask=%o", sbi->s_file_umask);
 	if (sbi->s_dir_umask != 0022)
@@ -254,14 +256,22 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
 				printk(KERN_ERR "hfs: uid requires an argument\n");
 				return 0;
 			}
-			hsb->s_uid = (uid_t)tmp;
+			hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp);
+			if (!uid_valid(hsb->s_uid)) {
+				printk(KERN_ERR "hfs: invalid uid %d\n", tmp);
+				return 0;
+			}
 			break;
 		case opt_gid:
 			if (match_int(&args[0], &tmp)) {
 				printk(KERN_ERR "hfs: gid requires an argument\n");
 				return 0;
 			}
-			hsb->s_gid = (gid_t)tmp;
+			hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp);
+			if (!gid_valid(hsb->s_gid)) {
+				printk(KERN_ERR "hfs: invalid gid %d\n", tmp);
+				return 0;
+			}
 			break;
 		case opt_umask:
 			if (match_octal(&args[0], &tmp)) {
-- 
cgit 


From 16525e3f146fbba1ae43740c7d3895d4f396a768 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:27:17 -0800
Subject: userns: Convert hfsplus to use kuid and kgid where appropriate

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hfsplus/catalog.c    |  4 ++--
 fs/hfsplus/hfsplus_fs.h |  4 ++--
 fs/hfsplus/inode.c      |  8 ++++----
 fs/hfsplus/options.c    | 15 ++++++++++++---
 4 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index ec2a9c23f0c9..798d9c4c5e71 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -80,8 +80,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms)
 
 	perms->userflags = HFSPLUS_I(inode)->userflags;
 	perms->mode = cpu_to_be16(inode->i_mode);
-	perms->owner = cpu_to_be32(inode->i_uid);
-	perms->group = cpu_to_be32(inode->i_gid);
+	perms->owner = cpu_to_be32(i_uid_read(inode));
+	perms->group = cpu_to_be32(i_gid_read(inode));
 
 	if (S_ISREG(inode->i_mode))
 		perms->dev = cpu_to_be32(inode->i_nlink);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 558dbb463a4e..c571de224b15 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -149,8 +149,8 @@ struct hfsplus_sb_info {
 	u32 type;
 
 	umode_t umask;
-	uid_t uid;
-	gid_t gid;
+	kuid_t uid;
+	kgid_t gid;
 
 	int part, session;
 	unsigned long flags;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 3d8b4a675ba0..2172aa5976f5 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -233,12 +233,12 @@ static void hfsplus_get_perms(struct inode *inode,
 
 	mode = be16_to_cpu(perms->mode);
 
-	inode->i_uid = be32_to_cpu(perms->owner);
-	if (!inode->i_uid && !mode)
+	i_uid_write(inode, be32_to_cpu(perms->owner));
+	if (!i_uid_read(inode) && !mode)
 		inode->i_uid = sbi->uid;
 
-	inode->i_gid = be32_to_cpu(perms->group);
-	if (!inode->i_gid && !mode)
+	i_gid_write(inode, be32_to_cpu(perms->group));
+	if (!i_gid_read(inode) && !mode)
 		inode->i_gid = sbi->gid;
 
 	if (dir) {
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 06fa5618600c..ed257c671615 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -135,14 +135,22 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
 				printk(KERN_ERR "hfs: uid requires an argument\n");
 				return 0;
 			}
-			sbi->uid = (uid_t)tmp;
+			sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp);
+			if (!uid_valid(sbi->uid)) {
+				printk(KERN_ERR "hfs: invalid uid specified\n");
+				return 0;
+			}
 			break;
 		case opt_gid:
 			if (match_int(&args[0], &tmp)) {
 				printk(KERN_ERR "hfs: gid requires an argument\n");
 				return 0;
 			}
-			sbi->gid = (gid_t)tmp;
+			sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp);
+			if (!gid_valid(sbi->gid)) {
+				printk(KERN_ERR "hfs: invalid gid specified\n");
+				return 0;
+			}
 			break;
 		case opt_part:
 			if (match_int(&args[0], &sbi->part)) {
@@ -215,7 +223,8 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
 	if (sbi->type != HFSPLUS_DEF_CR_TYPE)
 		seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
 	seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask,
-		sbi->uid, sbi->gid);
+			from_kuid_munged(&init_user_ns, sbi->uid),
+			from_kgid_munged(&init_user_ns, sbi->gid));
 	if (sbi->part >= 0)
 		seq_printf(seq, ",part=%u", sbi->part);
 	if (sbi->session >= 0)
-- 
cgit 


From ba64e2b9e368fbe588ed5e3bb1494cc1dc4664a4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:35:50 -0800
Subject: userns: Convert isofs to use kuid/kgid where appropriate

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/isofs/inode.c | 17 +++++++++++------
 fs/isofs/isofs.h |  4 ++--
 fs/isofs/rock.c  |  4 ++--
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 29037c365ba4..a7d8e6cc5e0c 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -21,6 +21,7 @@
 #include <linux/cdrom.h>
 #include <linux/parser.h>
 #include <linux/mpage.h>
+#include <linux/user_namespace.h>
 
 #include "isofs.h"
 #include "zisofs.h"
@@ -171,8 +172,8 @@ struct iso9660_options{
 	unsigned int blocksize;
 	umode_t fmode;
 	umode_t dmode;
-	gid_t gid;
-	uid_t uid;
+	kgid_t gid;
+	kuid_t uid;
 	char *iocharset;
 	/* LVE */
 	s32 session;
@@ -383,8 +384,8 @@ static int parse_options(char *options, struct iso9660_options *popt)
 	popt->fmode = popt->dmode = ISOFS_INVALID_MODE;
 	popt->uid_set = 0;
 	popt->gid_set = 0;
-	popt->gid = 0;
-	popt->uid = 0;
+	popt->gid = GLOBAL_ROOT_GID;
+	popt->uid = GLOBAL_ROOT_UID;
 	popt->iocharset = NULL;
 	popt->utf8 = 0;
 	popt->overriderockperm = 0;
@@ -460,13 +461,17 @@ static int parse_options(char *options, struct iso9660_options *popt)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			popt->uid = option;
+			popt->uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(popt->uid))
+				return 0;
 			popt->uid_set = 1;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			popt->gid = option;
+			popt->gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(popt->gid))
+				return 0;
 			popt->gid_set = 1;
 			break;
 		case Opt_mode:
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 3620ad1ea9bc..99167238518d 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -52,8 +52,8 @@ struct isofs_sb_info {
 
 	umode_t s_fmode;
 	umode_t s_dmode;
-	gid_t s_gid;
-	uid_t s_uid;
+	kgid_t s_gid;
+	kuid_t s_uid;
 	struct nls_table *s_nls_iocharset; /* Native language support table */
 };
 
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 70e79d0c756a..c0bf42472e40 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -364,8 +364,8 @@ repeat:
 		case SIG('P', 'X'):
 			inode->i_mode = isonum_733(rr->u.PX.mode);
 			set_nlink(inode, isonum_733(rr->u.PX.n_links));
-			inode->i_uid = isonum_733(rr->u.PX.uid);
-			inode->i_gid = isonum_733(rr->u.PX.gid);
+			i_uid_write(inode, isonum_733(rr->u.PX.uid));
+			i_gid_write(inode, isonum_733(rr->u.PX.gid));
 			break;
 		case SIG('P', 'N'):
 			{
-- 
cgit 


From 1a0a994ebe851206d02469782da6c1f9a0547d7d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:41:28 -0800
Subject: userns: Convert logfs to use kuid/kgid where appropriate

Cc: Joern Engel <joern@logfs.org>
Cc: Prasad Joshi <prasadjoshi.linux@gmail.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/logfs/inode.c     | 4 ++--
 fs/logfs/readwrite.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index a422f42238b2..43f61c2013f9 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -192,8 +192,8 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
 	li->li_height	= 0;
 	li->li_used_bytes = 0;
 	li->li_block	= NULL;
-	inode->i_uid	= 0;
-	inode->i_gid	= 0;
+	i_uid_write(inode, 0);
+	i_gid_write(inode, 0);
 	inode->i_size	= 0;
 	inode->i_blocks	= 0;
 	inode->i_ctime	= CURRENT_TIME;
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index f1cb512c5019..a8d492d69213 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -119,8 +119,8 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
 	inode->i_mode	= be16_to_cpu(di->di_mode);
 	li->li_height	= di->di_height;
 	li->li_flags	= be32_to_cpu(di->di_flags);
-	inode->i_uid	= be32_to_cpu(di->di_uid);
-	inode->i_gid	= be32_to_cpu(di->di_gid);
+	i_uid_write(inode, be32_to_cpu(di->di_uid));
+	i_gid_write(inode, be32_to_cpu(di->di_gid));
 	inode->i_size	= be64_to_cpu(di->di_size);
 	logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes));
 	inode->i_atime	= be64_to_timespec(di->di_atime);
@@ -156,8 +156,8 @@ static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
 	di->di_height	= li->li_height;
 	di->di_pad	= 0;
 	di->di_flags	= cpu_to_be32(li->li_flags);
-	di->di_uid	= cpu_to_be32(inode->i_uid);
-	di->di_gid	= cpu_to_be32(inode->i_gid);
+	di->di_uid	= cpu_to_be32(i_uid_read(inode));
+	di->di_gid	= cpu_to_be32(i_gid_read(inode));
 	di->di_size	= cpu_to_be64(i_size_read(inode));
 	di->di_used_bytes = cpu_to_be64(li->li_used_bytes);
 	di->di_atime	= timespec_to_be64(inode->i_atime);
-- 
cgit 


From f303bdc55e9e74890eadc836c1edd8b5c21a7b89 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:45:03 -0800
Subject: userns: Convert minix to use kuid/kgid where appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/minix/inode.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2a503ad020d5..d0e42c678923 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -460,8 +460,8 @@ static struct inode *V1_minix_iget(struct inode *inode)
 		return ERR_PTR(-EIO);
 	}
 	inode->i_mode = raw_inode->i_mode;
-	inode->i_uid = (uid_t)raw_inode->i_uid;
-	inode->i_gid = (gid_t)raw_inode->i_gid;
+	i_uid_write(inode, raw_inode->i_uid);
+	i_gid_write(inode, raw_inode->i_gid);
 	set_nlink(inode, raw_inode->i_nlinks);
 	inode->i_size = raw_inode->i_size;
 	inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
@@ -493,8 +493,8 @@ static struct inode *V2_minix_iget(struct inode *inode)
 		return ERR_PTR(-EIO);
 	}
 	inode->i_mode = raw_inode->i_mode;
-	inode->i_uid = (uid_t)raw_inode->i_uid;
-	inode->i_gid = (gid_t)raw_inode->i_gid;
+	i_uid_write(inode, raw_inode->i_uid);
+	i_gid_write(inode, raw_inode->i_gid);
 	set_nlink(inode, raw_inode->i_nlinks);
 	inode->i_size = raw_inode->i_size;
 	inode->i_mtime.tv_sec = raw_inode->i_mtime;
@@ -545,8 +545,8 @@ static struct buffer_head * V1_minix_update_inode(struct inode * inode)
 	if (!raw_inode)
 		return NULL;
 	raw_inode->i_mode = inode->i_mode;
-	raw_inode->i_uid = fs_high2lowuid(inode->i_uid);
-	raw_inode->i_gid = fs_high2lowgid(inode->i_gid);
+	raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode));
+	raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode));
 	raw_inode->i_nlinks = inode->i_nlink;
 	raw_inode->i_size = inode->i_size;
 	raw_inode->i_time = inode->i_mtime.tv_sec;
@@ -572,8 +572,8 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
 	if (!raw_inode)
 		return NULL;
 	raw_inode->i_mode = inode->i_mode;
-	raw_inode->i_uid = fs_high2lowuid(inode->i_uid);
-	raw_inode->i_gid = fs_high2lowgid(inode->i_gid);
+	raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode));
+	raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode));
 	raw_inode->i_nlinks = inode->i_nlink;
 	raw_inode->i_size = inode->i_size;
 	raw_inode->i_mtime = inode->i_mtime.tv_sec;
-- 
cgit 


From 305d3d0dbc22aa62fa4d8b9224826fd8c7104bc5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 12:31:23 -0800
Subject: userns: Convert nillfs2 to use kuid/kgid where appropriate

Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/nilfs2/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6e2c3db976b2..4d31d2cca7fd 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -401,8 +401,8 @@ int nilfs_read_inode_common(struct inode *inode,
 	int err;
 
 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
-	inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid);
-	inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid);
+	i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
+	i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
 	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
 	inode->i_size = le64_to_cpu(raw_inode->i_size);
 	inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
@@ -590,8 +590,8 @@ void nilfs_write_inode_common(struct inode *inode,
 	struct nilfs_inode_info *ii = NILFS_I(inode);
 
 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
-	raw_inode->i_uid = cpu_to_le32(inode->i_uid);
-	raw_inode->i_gid = cpu_to_le32(inode->i_gid);
+	raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
+	raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
 	raw_inode->i_size = cpu_to_le64(inode->i_size);
 	raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-- 
cgit 


From b29f7751c9a880e842e48f421daf313b997ddd65 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:29:36 -0800
Subject: userns: Convert ntfs to use kuid and kgid where appropriate

Cc: Anton Altaparmakov <anton@tuxera.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/ntfs/inode.c  |  7 ++++---
 fs/ntfs/super.c  | 39 ++++++++++++++++++++++++++++++++-------
 fs/ntfs/volume.h |  5 +++--
 3 files changed, 39 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c6dbd3db6ca8..1d27331e6fc9 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2124,7 +2124,8 @@ int ntfs_read_inode_mount(struct inode *vi)
 			 * ntfs_read_inode() will have set up the default ones.
 			 */
 			/* Set uid and gid to root. */
-			vi->i_uid = vi->i_gid = 0;
+			vi->i_uid = GLOBAL_ROOT_UID;
+			vi->i_gid = GLOBAL_ROOT_GID;
 			/* Regular file. No access for anyone. */
 			vi->i_mode = S_IFREG;
 			/* No VFS initiated operations allowed for $MFT. */
@@ -2312,8 +2313,8 @@ int ntfs_show_options(struct seq_file *sf, struct dentry *root)
 	ntfs_volume *vol = NTFS_SB(root->d_sb);
 	int i;
 
-	seq_printf(sf, ",uid=%i", vol->uid);
-	seq_printf(sf, ",gid=%i", vol->gid);
+	seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
+	seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
 	if (vol->fmask == vol->dmask)
 		seq_printf(sf, ",umask=0%o", vol->fmask);
 	else {
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 2bc149d6a784..da01c165067d 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -102,8 +102,8 @@ static bool parse_options(ntfs_volume *vol, char *opt)
 	char *p, *v, *ov;
 	static char *utf8 = "utf8";
 	int errors = 0, sloppy = 0;
-	uid_t uid = (uid_t)-1;
-	gid_t gid = (gid_t)-1;
+	kuid_t uid = INVALID_UID;
+	kgid_t gid = INVALID_GID;
 	umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
 	int mft_zone_multiplier = -1, on_errors = -1;
 	int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
@@ -128,6 +128,30 @@ static bool parse_options(ntfs_volume *vol, char *opt)
 		if (*v)							\
 			goto needs_val;					\
 	}
+#define NTFS_GETOPT_UID(option, variable)				\
+	if (!strcmp(p, option)) {					\
+		uid_t uid_value;					\
+		if (!v || !*v)						\
+			goto needs_arg;					\
+		uid_value = simple_strtoul(ov = v, &v, 0);		\
+		if (*v)							\
+			goto needs_val;					\
+		variable = make_kuid(current_user_ns(), uid_value);	\
+		if (!uid_valid(variable))				\
+			goto needs_val;					\
+	}
+#define NTFS_GETOPT_GID(option, variable)				\
+	if (!strcmp(p, option)) {					\
+		gid_t gid_value;					\
+		if (!v || !*v)						\
+			goto needs_arg;					\
+		gid_value = simple_strtoul(ov = v, &v, 0);		\
+		if (*v)							\
+			goto needs_val;					\
+		variable = make_kgid(current_user_ns(), gid_value);	\
+		if (!gid_valid(variable))				\
+			goto needs_val;					\
+	}
 #define NTFS_GETOPT_OCTAL(option, variable)				\
 	if (!strcmp(p, option)) {					\
 		if (!v || !*v)						\
@@ -165,8 +189,8 @@ static bool parse_options(ntfs_volume *vol, char *opt)
 	while ((p = strsep(&opt, ","))) {
 		if ((v = strchr(p, '=')))
 			*v++ = 0;
-		NTFS_GETOPT("uid", uid)
-		else NTFS_GETOPT("gid", gid)
+		NTFS_GETOPT_UID("uid", uid)
+		else NTFS_GETOPT_GID("gid", gid)
 		else NTFS_GETOPT_OCTAL("umask", fmask = dmask)
 		else NTFS_GETOPT_OCTAL("fmask", fmask)
 		else NTFS_GETOPT_OCTAL("dmask", dmask)
@@ -283,9 +307,9 @@ no_mount_options:
 		vol->on_errors = on_errors;
 	if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER)
 		vol->on_errors |= ON_ERRORS_CONTINUE;
-	if (uid != (uid_t)-1)
+	if (uid_valid(uid))
 		vol->uid = uid;
-	if (gid != (gid_t)-1)
+	if (gid_valid(gid))
 		vol->gid = gid;
 	if (fmask != (umode_t)-1)
 		vol->fmask = fmask;
@@ -1023,7 +1047,8 @@ static bool load_and_init_mft_mirror(ntfs_volume *vol)
 	 * ntfs_read_inode() will have set up the default ones.
 	 */
 	/* Set uid and gid to root. */
-	tmp_ino->i_uid = tmp_ino->i_gid = 0;
+	tmp_ino->i_uid = GLOBAL_ROOT_UID;
+	tmp_ino->i_gid = GLOBAL_ROOT_GID;
 	/* Regular file.  No access for anyone. */
 	tmp_ino->i_mode = S_IFREG;
 	/* No VFS initiated operations allowed for $MFTMirr. */
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 15e3ba8d521a..4f579b02bc76 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -25,6 +25,7 @@
 #define _LINUX_NTFS_VOLUME_H
 
 #include <linux/rwsem.h>
+#include <linux/uidgid.h>
 
 #include "types.h"
 #include "layout.h"
@@ -46,8 +47,8 @@ typedef struct {
 					   sized blocks on the device. */
 	/* Configuration provided by user at mount time. */
 	unsigned long flags;		/* Miscellaneous flags, see below. */
-	uid_t uid;			/* uid that files will be mounted as. */
-	gid_t gid;			/* gid that files will be mounted as. */
+	kuid_t uid;			/* uid that files will be mounted as. */
+	kgid_t gid;			/* gid that files will be mounted as. */
 	umode_t fmask;			/* The mask for file permissions. */
 	umode_t dmask;			/* The mask for directory
 					   permissions. */
-- 
cgit 


From 80fcbe751f01bea34759bebd3d213c4ee244a719 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:29:49 -0800
Subject: userns: Convert omfs to use kuid and kgid where appropriate

Acked-by: Bob Copeland <me@bobcopeland.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/omfs/inode.c | 8 ++++++--
 fs/omfs/omfs.h  | 4 ++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index e6213b3725d1..25d715c7c87a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -391,12 +391,16 @@ static int parse_options(char *options, struct omfs_sb_info *sbi)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			sbi->s_uid = option;
+			sbi->s_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(sbi->s_uid))
+				return 0;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			sbi->s_gid = option;
+			sbi->s_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(sbi->s_gid))
+				return 0;
 			break;
 		case Opt_umask:
 			if (match_octal(&args[0], &option))
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 8941f12c6b01..f0f8bc75e609 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -19,8 +19,8 @@ struct omfs_sb_info {
 	unsigned long **s_imap;
 	int s_imap_size;
 	struct mutex s_bitmap_lock;
-	int s_uid;
-	int s_gid;
+	kuid_t s_uid;
+	kgid_t s_gid;
 	int s_dmask;
 	int s_fmask;
 };
-- 
cgit 


From 511728d778e8d441e42df68101722548a0c97fe3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 12:11:12 -0800
Subject: userns: Convert the qnx4 filesystem to use kuid/kgid where
 appropriate

Acked-by: Anders Larsen <al@alarsen.net>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/qnx4/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 552e994e3aa1..5c3c7b02e17b 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -312,8 +312,8 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
 	    (ino % QNX4_INODES_PER_BLOCK);
 
 	inode->i_mode    = le16_to_cpu(raw_inode->di_mode);
-	inode->i_uid     = (uid_t)le16_to_cpu(raw_inode->di_uid);
-	inode->i_gid     = (gid_t)le16_to_cpu(raw_inode->di_gid);
+	i_uid_write(inode, (uid_t)le16_to_cpu(raw_inode->di_uid));
+	i_gid_write(inode, (gid_t)le16_to_cpu(raw_inode->di_gid));
 	set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
 	inode->i_size    = le32_to_cpu(raw_inode->di_size);
 	inode->i_mtime.tv_sec   = le32_to_cpu(raw_inode->di_mtime);
-- 
cgit 


From 85a03d1bba3b9132adc1aa8f4b8b9e11bac429ec Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 7 Apr 2012 17:58:48 -0700
Subject: userns: Convert the qnx6 filesystem to use kuid/kgid where
 appropriate

Cc: Kai Bankett <chaosman@ontika.net>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/qnx6/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 2049c814bda4..f4eef0b5e7b5 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -574,8 +574,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
 	raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
 
 	inode->i_mode    = fs16_to_cpu(sbi, raw_inode->di_mode);
-	inode->i_uid     = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid);
-	inode->i_gid     = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid);
+	i_uid_write(inode, (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid));
+	i_gid_write(inode, (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid));
 	inode->i_size    = fs64_to_cpu(sbi, raw_inode->di_size);
 	inode->i_mtime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_mtime);
 	inode->i_mtime.tv_nsec = 0;
-- 
cgit 


From a726ecce75896b2045f1646a2d7e31a5562b25a0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 12:19:23 -0800
Subject: userns: Convert the sysv filesystem to use kuid/kgid where
 appropriate

Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/sysv/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 80e1e2b18df1..b23ab736685d 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -202,8 +202,8 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
 	}
 	/* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */
 	inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
-	inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
-	inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
+	i_uid_write(inode, (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid));
+	i_gid_write(inode, (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid));
 	set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
 	inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
 	inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
@@ -256,8 +256,8 @@ static int __sysv_write_inode(struct inode *inode, int wait)
 	}
 
 	raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
-	raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid));
-	raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid));
+	raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(i_uid_read(inode)));
+	raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(i_gid_read(inode)));
 	raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink);
 	raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size);
 	raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec);
-- 
cgit 


From 2f83ffa874ccb6ea4596220188d3e54b20203124 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:26:34 -0800
Subject: userns: Convert freevxfs to use kuid/kgid where appropriate

Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/freevxfs/vxfs_inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ef67c95f12d4..f47df72cef17 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -224,8 +224,8 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
 {
 
 	ip->i_mode = vxfs_transmod(vip);
-	ip->i_uid = (uid_t)vip->vii_uid;
-	ip->i_gid = (gid_t)vip->vii_gid;
+	i_uid_write(ip, (uid_t)vip->vii_uid);
+	i_gid_write(ip, (gid_t)vip->vii_gid);
 
 	set_nlink(ip, vip->vii_nlink);
 	ip->i_size = vip->vii_size;
-- 
cgit 


From 29f82ae56e8798f7907d60145e0186082800d130 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:28:57 -0800
Subject: userns: Convert hostfs to use kuid and kgid where appropriate

Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hostfs/hostfs_kern.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 124146543aa7..6c9f3a9d5e21 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -542,8 +542,8 @@ static int read_name(struct inode *ino, char *name)
 	ino->i_ino = st.ino;
 	ino->i_mode = st.mode;
 	set_nlink(ino, st.nlink);
-	ino->i_uid = st.uid;
-	ino->i_gid = st.gid;
+	i_uid_write(ino, st.uid);
+	i_gid_write(ino, st.gid);
 	ino->i_atime = st.atime;
 	ino->i_mtime = st.mtime;
 	ino->i_ctime = st.ctime;
@@ -808,11 +808,11 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 	if (attr->ia_valid & ATTR_UID) {
 		attrs.ia_valid |= HOSTFS_ATTR_UID;
-		attrs.ia_uid = attr->ia_uid;
+		attrs.ia_uid = from_kuid(&init_user_ns, attr->ia_uid);
 	}
 	if (attr->ia_valid & ATTR_GID) {
 		attrs.ia_valid |= HOSTFS_ATTR_GID;
-		attrs.ia_gid = attr->ia_gid;
+		attrs.ia_gid = from_kgid(&init_user_ns, attr->ia_gid);
 	}
 	if (attr->ia_valid & ATTR_SIZE) {
 		attrs.ia_valid |= HOSTFS_ATTR_SIZE;
-- 
cgit 


From d2b31ca644fdc8704de3367a6a56a5c958c77f53 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 1 Jun 2012 16:14:19 -0600
Subject: userns: Teach security_path_chown to take kuids and kgids

Don't make the security modules deal with raw user space uid and
gids instead pass in a kuid_t and a kgid_t so that security modules
only have to deal with internal kernel uids and gids.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: James Morris <james.l.morris@oracle.com>
Cc: John Johansen <john.johansen@canonical.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/open.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index f3d96e7e7b19..2b2573980d0f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -534,7 +534,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
 		newattrs.ia_valid |=
 			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
 	mutex_lock(&inode->i_mutex);
-	error = security_path_chown(path, user, group);
+	error = security_path_chown(path, uid, gid);
 	if (!error)
 		error = notify_change(path->dentry, &newattrs);
 	mutex_unlock(&inode->i_mutex);
-- 
cgit 


From 8fed10be0029acda5564f03b9cc1fc4cb7470bae Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:20:16 -0800
Subject: userns: Convert affs to use kuid/kgid wherwe appropriate

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/affs/affs.h  |  4 ++--
 fs/affs/inode.c | 20 ++++++++++----------
 fs/affs/super.c | 18 +++++++++++-------
 3 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 6e216419f340..3952121f2f28 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -88,8 +88,8 @@ struct affs_sb_info {
 	u32 s_root_block;		/* FFS root block number. */
 	int s_hashsize;			/* Size of hash table. */
 	unsigned long s_flags;		/* See below. */
-	uid_t s_uid;			/* uid to override */
-	gid_t s_gid;			/* gid to override */
+	kuid_t s_uid;			/* uid to override */
+	kgid_t s_gid;			/* gid to override */
 	umode_t s_mode;			/* mode to override */
 	struct buffer_head *s_root_bh;	/* Cached root block. */
 	struct mutex s_bmlock;		/* Protects bitmap access. */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 8bc4a59f4e7e..15c484268229 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -80,17 +80,17 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
 	if (id == 0 || sbi->s_flags & SF_SETUID)
 		inode->i_uid = sbi->s_uid;
 	else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
-		inode->i_uid = 0;
+		i_uid_write(inode, 0);
 	else
-		inode->i_uid = id;
+		i_uid_write(inode, id);
 
 	id = be16_to_cpu(tail->gid);
 	if (id == 0 || sbi->s_flags & SF_SETGID)
 		inode->i_gid = sbi->s_gid;
 	else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
-		inode->i_gid = 0;
+		i_gid_write(inode, 0);
 	else
-		inode->i_gid = id;
+		i_gid_write(inode, id);
 
 	switch (be32_to_cpu(tail->stype)) {
 	case ST_ROOT:
@@ -193,13 +193,13 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 		tail->size = cpu_to_be32(inode->i_size);
 		secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
 		if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
-			uid = inode->i_uid;
-			gid = inode->i_gid;
+			uid = i_uid_read(inode);
+			gid = i_gid_read(inode);
 			if (AFFS_SB(sb)->s_flags & SF_MUFS) {
-				if (inode->i_uid == 0 || inode->i_uid == 0xFFFF)
-					uid = inode->i_uid ^ ~0;
-				if (inode->i_gid == 0 || inode->i_gid == 0xFFFF)
-					gid = inode->i_gid ^ ~0;
+				if (uid == 0 || uid == 0xFFFF)
+					uid = uid ^ ~0;
+				if (gid == 0 || gid == 0xFFFF)
+					gid = gid ^ ~0;
 			}
 			if (!(AFFS_SB(sb)->s_flags & SF_SETUID))
 				tail->uid = cpu_to_be16(uid);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c70f1e5fc024..966c8c06b9b3 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -188,7 +188,7 @@ static const match_table_t tokens = {
 };
 
 static int
-parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s32 *root,
+parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, s32 *root,
 		int *blocksize, char **prefix, char *volume, unsigned long *mount_opts)
 {
 	char *p;
@@ -253,13 +253,17 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
 		case Opt_setgid:
 			if (match_int(&args[0], &option))
 				return 0;
-			*gid = option;
+			*gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(*gid))
+				return 0;
 			*mount_opts |= SF_SETGID;
 			break;
 		case Opt_setuid:
 			if (match_int(&args[0], &option))
 				return 0;
-			*uid = option;
+			*uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(*uid))
+				return 0;
 			*mount_opts |= SF_SETUID;
 			break;
 		case Opt_verbose:
@@ -301,8 +305,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
 	int			 num_bm;
 	int			 i, j;
 	s32			 key;
-	uid_t			 uid;
-	gid_t			 gid;
+	kuid_t			 uid;
+	kgid_t			 gid;
 	int			 reserved;
 	unsigned long		 mount_flags;
 	int			 tmp_flags;	/* fix remount prototype... */
@@ -527,8 +531,8 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct affs_sb_info	*sbi = AFFS_SB(sb);
 	int			 blocksize;
-	uid_t			 uid;
-	gid_t			 gid;
+	kuid_t			 uid;
+	kgid_t			 gid;
 	int			 mode;
 	int			 reserved;
 	int			 root_block;
-- 
cgit 


From 7f5b82b835ee62b365f39373e35162eb7a072c5f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 25 Apr 2012 03:57:31 -0700
Subject: userns: Convert bfs to use kuid/kgid where appropriate

Cc: "Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/bfs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 9870417c26e7..b242beba58ed 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -76,8 +76,8 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
 	BFS_I(inode)->i_sblock =  le32_to_cpu(di->i_sblock);
 	BFS_I(inode)->i_eblock =  le32_to_cpu(di->i_eblock);
 	BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
-	inode->i_uid =  le32_to_cpu(di->i_uid);
-	inode->i_gid =  le32_to_cpu(di->i_gid);
+	i_uid_write(inode, le32_to_cpu(di->i_uid));
+	i_gid_write(inode,  le32_to_cpu(di->i_gid));
 	set_nlink(inode, le32_to_cpu(di->i_nlink));
 	inode->i_size = BFS_FILESIZE(di);
 	inode->i_blocks = BFS_FILEBLOCKS(di);
@@ -139,8 +139,8 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 
 	di->i_ino = cpu_to_le16(ino);
 	di->i_mode = cpu_to_le32(inode->i_mode);
-	di->i_uid = cpu_to_le32(inode->i_uid);
-	di->i_gid = cpu_to_le32(inode->i_gid);
+	di->i_uid = cpu_to_le32(i_uid_read(inode));
+	di->i_gid = cpu_to_le32(i_gid_read(inode));
 	di->i_nlink = cpu_to_le32(inode->i_nlink);
 	di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
 	di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
-- 
cgit 


From 2f2f43d3c7b1da8dba56716dd1be196b6f57bf9b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:05:07 -0800
Subject: userns: Convert btrfs to use kuid/kgid where appropriate

Cc: Chris Mason <chris.mason@fusionio.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/btrfs/delayed-inode.c | 8 ++++----
 fs/btrfs/inode.c         | 8 ++++----
 fs/btrfs/ioctl.c         | 6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c8ceab..f908c5180795 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1715,8 +1715,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 				  struct btrfs_inode_item *inode_item,
 				  struct inode *inode)
 {
-	btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
-	btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
+	btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
+	btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
 	btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
 	btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
 	btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
@@ -1764,8 +1764,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
 
 	inode_item = &delayed_node->inode_item;
 
-	inode->i_uid = btrfs_stack_inode_uid(inode_item);
-	inode->i_gid = btrfs_stack_inode_gid(inode_item);
+	i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
+	i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
 	btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
 	inode->i_mode = btrfs_stack_inode_mode(inode_item);
 	set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 83baec24946d..53687149c077 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2570,8 +2570,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
 				    struct btrfs_inode_item);
 	inode->i_mode = btrfs_inode_mode(leaf, inode_item);
 	set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
-	inode->i_uid = btrfs_inode_uid(leaf, inode_item);
-	inode->i_gid = btrfs_inode_gid(leaf, inode_item);
+	i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
+	i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
 	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
 
 	tspec = btrfs_inode_atime(inode_item);
@@ -2649,8 +2649,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 			    struct btrfs_inode_item *item,
 			    struct inode *inode)
 {
-	btrfs_set_inode_uid(leaf, item, inode->i_uid);
-	btrfs_set_inode_gid(leaf, item, inode->i_gid);
+	btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
+	btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
 	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
 	btrfs_set_inode_mode(leaf, item, inode->i_mode);
 	btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bc2f6ffff3cf..1292682c537f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -575,13 +575,13 @@ fail:
 */
 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
 {
-	uid_t fsuid = current_fsuid();
+	kuid_t fsuid = current_fsuid();
 
 	if (!(dir->i_mode & S_ISVTX))
 		return 0;
-	if (inode->i_uid == fsuid)
+	if (uid_eq(inode->i_uid, fsuid))
 		return 0;
-	if (dir->i_uid == fsuid)
+	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
 	return !capable(CAP_FOWNER);
 }
-- 
cgit 


From 0e1a43c71612cd0b6b50da03040c85fbf3d24211 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:27:53 -0800
Subject: userns: Convert hpfs to use kuid and kgid where appropriate

Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hpfs/hpfs_fn.h |  4 ++--
 fs/hpfs/inode.c   | 19 +++++++++++--------
 fs/hpfs/namei.c   |  8 ++++----
 fs/hpfs/super.c   | 18 +++++++++++-------
 4 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index ac1ead194db5..7102aaecc244 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -63,8 +63,8 @@ struct hpfs_sb_info {
 	unsigned sb_dmap;		/* sector number of dnode bit map */
 	unsigned sb_n_free;		/* free blocks for statfs, or -1 */
 	unsigned sb_n_free_dnodes;	/* free dnodes for statfs, or -1 */
-	uid_t sb_uid;			/* uid from mount options */
-	gid_t sb_gid;			/* gid from mount options */
+	kuid_t sb_uid;			/* uid from mount options */
+	kgid_t sb_gid;			/* gid from mount options */
 	umode_t sb_mode;		/* mode from mount options */
 	unsigned sb_eas : 2;		/* eas: 0-ignore, 1-ro, 2-rw */
 	unsigned sb_err : 2;		/* on errs: 0-cont, 1-ro, 2-panic */
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index ed671e0ea784..804a9a842cbc 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 #include "hpfs_fn.h"
 
 void hpfs_init_inode(struct inode *i)
@@ -60,14 +61,14 @@ void hpfs_read_inode(struct inode *i)
 	if (hpfs_sb(i->i_sb)->sb_eas) {
 		if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) {
 			if (ea_size == 2) {
-				i->i_uid = le16_to_cpu(*(__le16*)ea);
+				i_uid_write(i, le16_to_cpu(*(__le16*)ea));
 				hpfs_inode->i_ea_uid = 1;
 			}
 			kfree(ea);
 		}
 		if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) {
 			if (ea_size == 2) {
-				i->i_gid = le16_to_cpu(*(__le16*)ea);
+				i_gid_write(i, le16_to_cpu(*(__le16*)ea));
 				hpfs_inode->i_ea_gid = 1;
 			}
 			kfree(ea);
@@ -149,13 +150,13 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
 		hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
 	} else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
 		__le32 ea;
-		if ((i->i_uid != hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
-			ea = cpu_to_le32(i->i_uid);
+		if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
+			ea = cpu_to_le32(i_uid_read(i));
 			hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2);
 			hpfs_inode->i_ea_uid = 1;
 		}
-		if ((i->i_gid != hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) {
-			ea = cpu_to_le32(i->i_gid);
+		if (!gid_eq(i->i_gid, hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) {
+			ea = cpu_to_le32(i_gid_read(i));
 			hpfs_set_ea(i, fnode, "GID", (char *)&ea, 2);
 			hpfs_inode->i_ea_gid = 1;
 		}
@@ -261,9 +262,11 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 	hpfs_lock(inode->i_sb);
 	if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
 		goto out_unlock;
-	if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000)
+	if ((attr->ia_valid & ATTR_UID) &&
+	    from_kuid(&init_user_ns, attr->ia_uid) >= 0x10000)
 		goto out_unlock;
-	if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000)
+	if ((attr->ia_valid & ATTR_GID) &&
+	    from_kgid(&init_user_ns, attr->ia_gid) >= 0x10000)
 		goto out_unlock;
 	if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
 		goto out_unlock;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index bc9082482f68..345713d2f8f3 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -91,8 +91,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	inc_nlink(dir);
 	insert_inode_hash(result);
 
-	if (result->i_uid != current_fsuid() ||
-	    result->i_gid != current_fsgid() ||
+	if (!uid_eq(result->i_uid, current_fsuid()) ||
+	    !gid_eq(result->i_gid, current_fsgid()) ||
 	    result->i_mode != (mode | S_IFDIR)) {
 		result->i_uid = current_fsuid();
 		result->i_gid = current_fsgid();
@@ -179,8 +179,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b
 
 	insert_inode_hash(result);
 
-	if (result->i_uid != current_fsuid() ||
-	    result->i_gid != current_fsgid() ||
+	if (!uid_eq(result->i_uid, current_fsuid()) ||
+	    !gid_eq(result->i_gid, current_fsgid()) ||
 	    result->i_mode != (mode | S_IFREG)) {
 		result->i_uid = current_fsuid();
 		result->i_gid = current_fsgid();
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 706a12c083ea..a152783602d9 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -251,7 +251,7 @@ static const match_table_t tokens = {
 	{Opt_err, NULL},
 };
 
-static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
+static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask,
 		      int *lowercase, int *eas, int *chk, int *errs,
 		      int *chkdsk, int *timeshift)
 {
@@ -276,12 +276,16 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
 		case Opt_uid:
 			if (match_int(args, &option))
 				return 0;
-			*uid = option;
+			*uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(*uid))
+				return 0;
 			break;
 		case Opt_gid:
 			if (match_int(args, &option))
 				return 0;
-			*gid = option;
+			*gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(*gid))
+				return 0;
 			break;
 		case Opt_umask:
 			if (match_octal(args, &option))
@@ -378,8 +382,8 @@ HPFS filesystem options:\n\
 
 static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
 {
-	uid_t uid;
-	gid_t gid;
+	kuid_t uid;
+	kgid_t gid;
 	umode_t umask;
 	int lowercase, eas, chk, errs, chkdsk, timeshift;
 	int o;
@@ -455,8 +459,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	struct hpfs_sb_info *sbi;
 	struct inode *root;
 
-	uid_t uid;
-	gid_t gid;
+	kuid_t uid;
+	kgid_t gid;
 	umode_t umask;
 	int lowercase, eas, chk, errs, chkdsk, timeshift;
 
-- 
cgit 


From 0cfe53d3c3875e1dd565b30737cd5c6691c00188 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:28:39 -0800
Subject: userns: Convert jffs2 to use kuid and kgid where appropriate

- General routine uid/gid conversion work
- When storing posix acls treat ACL_USER and ACL_GROUP separately
  so I can call from_kuid or from_kgid as appropriate.
- When reading posix acls treat ACL_USER and ACL_GROUP separately
  so I can call make_kuid or make_kgid as appropriate.

Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/jffs2/acl.c      | 26 ++++++++++++++++++++------
 fs/jffs2/file.c     |  8 ++++----
 fs/jffs2/fs.c       | 24 +++++++++++++-----------
 fs/jffs2/os-linux.h |  4 ++--
 4 files changed, 39 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 42e4edc17a90..223283c30111 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -94,15 +94,23 @@ static struct posix_acl *jffs2_acl_from_medium(void *value, size_t size)
 			case ACL_MASK:
 			case ACL_OTHER:
 				value += sizeof(struct jffs2_acl_entry_short);
-				acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
 				break;
 
 			case ACL_USER:
+				value += sizeof(struct jffs2_acl_entry);
+				if (value > end)
+					goto fail;
+				acl->a_entries[i].e_uid =
+					make_kuid(&init_user_ns,
+						  je32_to_cpu(entry->e_id));
+				break;
 			case ACL_GROUP:
 				value += sizeof(struct jffs2_acl_entry);
 				if (value > end)
 					goto fail;
-				acl->a_entries[i].e_id = je32_to_cpu(entry->e_id);
+				acl->a_entries[i].e_gid =
+					make_kgid(&init_user_ns,
+						  je32_to_cpu(entry->e_id));
 				break;
 
 			default:
@@ -131,13 +139,19 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
 	header->a_version = cpu_to_je32(JFFS2_ACL_VERSION);
 	e = header + 1;
 	for (i=0; i < acl->a_count; i++) {
+		const struct posix_acl_entry *acl_e = &acl->a_entries[i];
 		entry = e;
-		entry->e_tag = cpu_to_je16(acl->a_entries[i].e_tag);
-		entry->e_perm = cpu_to_je16(acl->a_entries[i].e_perm);
-		switch(acl->a_entries[i].e_tag) {
+		entry->e_tag = cpu_to_je16(acl_e->e_tag);
+		entry->e_perm = cpu_to_je16(acl_e->e_perm);
+		switch(acl_e->e_tag) {
 			case ACL_USER:
+				entry->e_id = cpu_to_je32(
+					from_kuid(&init_user_ns, acl_e->e_uid));
+				e += sizeof(struct jffs2_acl_entry);
+				break;
 			case ACL_GROUP:
-				entry->e_id = cpu_to_je32(acl->a_entries[i].e_id);
+				entry->e_id = cpu_to_je32(
+					from_kgid(&init_user_ns, acl_e->e_gid));
 				e += sizeof(struct jffs2_acl_entry);
 				break;
 
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index db3889ba8818..60ef3fb707ff 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -175,8 +175,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 		ri.ino = cpu_to_je32(f->inocache->ino);
 		ri.version = cpu_to_je32(++f->highest_version);
 		ri.mode = cpu_to_jemode(inode->i_mode);
-		ri.uid = cpu_to_je16(inode->i_uid);
-		ri.gid = cpu_to_je16(inode->i_gid);
+		ri.uid = cpu_to_je16(i_uid_read(inode));
+		ri.gid = cpu_to_je16(i_gid_read(inode));
 		ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
 		ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds());
 		ri.offset = cpu_to_je32(inode->i_size);
@@ -283,8 +283,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
 	/* Set the fields that the generic jffs2_write_inode_range() code can't find */
 	ri->ino = cpu_to_je32(inode->i_ino);
 	ri->mode = cpu_to_jemode(inode->i_mode);
-	ri->uid = cpu_to_je16(inode->i_uid);
-	ri->gid = cpu_to_je16(inode->i_gid);
+	ri->uid = cpu_to_je16(i_uid_read(inode));
+	ri->gid = cpu_to_je16(i_gid_read(inode));
 	ri->isize = cpu_to_je32((uint32_t)inode->i_size);
 	ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds());
 
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3d3092eda811..fe3c0527545f 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -99,8 +99,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	ri->ino = cpu_to_je32(inode->i_ino);
 	ri->version = cpu_to_je32(++f->highest_version);
 
-	ri->uid = cpu_to_je16((ivalid & ATTR_UID)?iattr->ia_uid:inode->i_uid);
-	ri->gid = cpu_to_je16((ivalid & ATTR_GID)?iattr->ia_gid:inode->i_gid);
+	ri->uid = cpu_to_je16((ivalid & ATTR_UID)?
+		from_kuid(&init_user_ns, iattr->ia_uid):i_uid_read(inode));
+	ri->gid = cpu_to_je16((ivalid & ATTR_GID)?
+		from_kgid(&init_user_ns, iattr->ia_gid):i_gid_read(inode));
 
 	if (ivalid & ATTR_MODE)
 		ri->mode = cpu_to_jemode(iattr->ia_mode);
@@ -147,8 +149,8 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	inode->i_ctime = ITIME(je32_to_cpu(ri->ctime));
 	inode->i_mtime = ITIME(je32_to_cpu(ri->mtime));
 	inode->i_mode = jemode_to_cpu(ri->mode);
-	inode->i_uid = je16_to_cpu(ri->uid);
-	inode->i_gid = je16_to_cpu(ri->gid);
+	i_uid_write(inode, je16_to_cpu(ri->uid));
+	i_gid_write(inode, je16_to_cpu(ri->gid));
 
 
 	old_metadata = f->metadata;
@@ -276,8 +278,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 		return ERR_PTR(ret);
 	}
 	inode->i_mode = jemode_to_cpu(latest_node.mode);
-	inode->i_uid = je16_to_cpu(latest_node.uid);
-	inode->i_gid = je16_to_cpu(latest_node.gid);
+	i_uid_write(inode, je16_to_cpu(latest_node.uid));
+	i_gid_write(inode, je16_to_cpu(latest_node.gid));
 	inode->i_size = je32_to_cpu(latest_node.isize);
 	inode->i_atime = ITIME(je32_to_cpu(latest_node.atime));
 	inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
 
 	memset(ri, 0, sizeof(*ri));
 	/* Set OS-specific defaults for new inodes */
-	ri->uid = cpu_to_je16(current_fsuid());
+	ri->uid = cpu_to_je16(from_kuid(&init_user_ns, current_fsuid()));
 
 	if (dir_i->i_mode & S_ISGID) {
-		ri->gid = cpu_to_je16(dir_i->i_gid);
+		ri->gid = cpu_to_je16(i_gid_read(dir_i));
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
 	} else {
-		ri->gid = cpu_to_je16(current_fsgid());
+		ri->gid = cpu_to_je16(from_kgid(&init_user_ns, current_fsgid()));
 	}
 
 	/* POSIX ACLs have to be processed now, at least partly.
@@ -467,8 +469,8 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
 	set_nlink(inode, 1);
 	inode->i_ino = je32_to_cpu(ri->ino);
 	inode->i_mode = jemode_to_cpu(ri->mode);
-	inode->i_gid = je16_to_cpu(ri->gid);
-	inode->i_uid = je16_to_cpu(ri->uid);
+	i_gid_write(inode, je16_to_cpu(ri->gid));
+	i_uid_write(inode, je16_to_cpu(ri->uid));
 	inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
 	ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
 
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index bcd983d7e7f9..d200a9b8fd5e 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -27,8 +27,8 @@ struct kvec;
 
 #define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size)
 #define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode)
-#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid)
-#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid)
+#define JFFS2_F_I_UID(f) (i_uid_read(OFNI_EDONI_2SFFJ(f)))
+#define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f)))
 #define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev)
 
 #define ITIME(sec) ((struct timespec){sec, 0})
-- 
cgit 


From c18cdc1a3ec643b5c6c0d65aac1a6bf8e461778f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 11:40:34 -0800
Subject: userns: Convert jfs to use kuid/kgid where appropriate

Cc: Dave Kleikamp <shaggy@kernel.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/jfs/file.c       |  4 ++--
 fs/jfs/jfs_imap.c   | 22 ++++++++++++----------
 fs/jfs/jfs_incore.h |  8 ++++----
 fs/jfs/super.c      | 22 +++++++++++++++-------
 4 files changed, 33 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 844f9460cb11..9d3afd157f99 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,8 +108,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 
 	if (is_quota_modification(inode, iattr))
 		dquot_initialize(inode);
-	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
-	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
+	if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
+	    (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
 		rc = dquot_transfer(inode, iattr);
 		if (rc)
 			return rc;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 1b6f15f191b3..6ba4006e011b 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3078,15 +3078,15 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 	}
 	set_nlink(ip, le32_to_cpu(dip->di_nlink));
 
-	jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
-	if (sbi->uid == -1)
+	jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
+	if (!uid_valid(sbi->uid))
 		ip->i_uid = jfs_ip->saved_uid;
 	else {
 		ip->i_uid = sbi->uid;
 	}
 
-	jfs_ip->saved_gid = le32_to_cpu(dip->di_gid);
-	if (sbi->gid == -1)
+	jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
+	if (!gid_valid(sbi->gid))
 		ip->i_gid = jfs_ip->saved_gid;
 	else {
 		ip->i_gid = sbi->gid;
@@ -3150,14 +3150,16 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 	dip->di_size = cpu_to_le64(ip->i_size);
 	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
 	dip->di_nlink = cpu_to_le32(ip->i_nlink);
-	if (sbi->uid == -1)
-		dip->di_uid = cpu_to_le32(ip->i_uid);
+	if (!uid_valid(sbi->uid))
+		dip->di_uid = cpu_to_le32(i_uid_read(ip));
 	else
-		dip->di_uid = cpu_to_le32(jfs_ip->saved_uid);
-	if (sbi->gid == -1)
-		dip->di_gid = cpu_to_le32(ip->i_gid);
+		dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
+						   jfs_ip->saved_uid));
+	if (!gid_valid(sbi->gid))
+		dip->di_gid = cpu_to_le32(i_gid_read(ip));
 	else
-		dip->di_gid = cpu_to_le32(jfs_ip->saved_gid);
+		dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
+						    jfs_ip->saved_gid));
 	jfs_get_inode_flags(jfs_ip);
 	/*
 	 * mode2 is only needed for storing the higher order bits.
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 584a4a1a6e81..680605d7bf15 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -38,8 +38,8 @@
 struct jfs_inode_info {
 	int	fileset;	/* fileset number (always 16)*/
 	uint	mode2;		/* jfs-specific mode		*/
-	uint	saved_uid;	/* saved for uid mount option */
-	uint	saved_gid;	/* saved for gid mount option */
+	kuid_t	saved_uid;	/* saved for uid mount option */
+	kgid_t	saved_gid;	/* saved for gid mount option */
 	pxd_t	ixpxd;		/* inode extent descriptor	*/
 	dxd_t	acl;		/* dxd describing acl	*/
 	dxd_t	ea;		/* dxd describing ea	*/
@@ -192,8 +192,8 @@ struct jfs_sb_info {
 	uint		state;		/* mount/recovery state	*/
 	unsigned long	flag;		/* mount time flags */
 	uint		p_state;	/* state prior to going no integrity */
-	uint		uid;		/* uid to override on-disk uid */
-	uint		gid;		/* gid to override on-disk gid */
+	kuid_t		uid;		/* uid to override on-disk uid */
+	kgid_t		gid;		/* gid to override on-disk gid */
 	uint		umask;		/* umask to override on-disk umask */
 };
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index c55c7452d285..706692f24033 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -321,13 +321,19 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 		case Opt_uid:
 		{
 			char *uid = args[0].from;
-			sbi->uid = simple_strtoul(uid, &uid, 0);
+			uid_t val = simple_strtoul(uid, &uid, 0);
+			sbi->uid = make_kuid(current_user_ns(), val);
+			if (!uid_valid(sbi->uid))
+				goto cleanup;
 			break;
 		}
 		case Opt_gid:
 		{
 			char *gid = args[0].from;
-			sbi->gid = simple_strtoul(gid, &gid, 0);
+			gid_t val = simple_strtoul(gid, &gid, 0);
+			sbi->gid = make_kgid(current_user_ns(), val);
+			if (!gid_valid(sbi->gid))
+				goto cleanup;
 			break;
 		}
 		case Opt_umask:
@@ -443,7 +449,9 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = sbi;
 	sb->s_max_links = JFS_LINK_MAX;
 	sbi->sb = sb;
-	sbi->uid = sbi->gid = sbi->umask = -1;
+	sbi->uid = INVALID_UID;
+	sbi->gid = INVALID_GID;
+	sbi->umask = -1;
 
 	/* initialize the mount flag and determine the default error handler */
 	flag = JFS_ERR_REMOUNT_RO;
@@ -617,10 +625,10 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
 
-	if (sbi->uid != -1)
-		seq_printf(seq, ",uid=%d", sbi->uid);
-	if (sbi->gid != -1)
-		seq_printf(seq, ",gid=%d", sbi->gid);
+	if (uid_valid(sbi->uid))
+		seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid));
+	if (gid_valid(sbi->gid))
+		seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid));
 	if (sbi->umask != -1)
 		seq_printf(seq, ",umask=%03o", sbi->umask);
 	if (sbi->flag & JFS_NOINTEGRITY)
-- 
cgit 


From df814654f364369dfb2fe3c870f3544ce69aa78c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 16:30:06 -0800
Subject: userns: Convert reiserfs to use kuid and kgid where appropriate

Cc: reiserfs-devel@vger.kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/reiserfs/inode.c     | 26 +++++++++++++-------------
 fs/reiserfs/xattr_acl.c | 20 +++++++++++++++++---
 2 files changed, 30 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6d4268fb6c1..7119f488d9cd 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1155,8 +1155,8 @@ static void init_inode(struct inode *inode, struct treepath *path)
 		set_inode_sd_version(inode, STAT_DATA_V1);
 		inode->i_mode = sd_v1_mode(sd);
 		set_nlink(inode, sd_v1_nlink(sd));
-		inode->i_uid = sd_v1_uid(sd);
-		inode->i_gid = sd_v1_gid(sd);
+		i_uid_write(inode, sd_v1_uid(sd));
+		i_gid_write(inode, sd_v1_gid(sd));
 		inode->i_size = sd_v1_size(sd);
 		inode->i_atime.tv_sec = sd_v1_atime(sd);
 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
@@ -1200,9 +1200,9 @@ static void init_inode(struct inode *inode, struct treepath *path)
 
 		inode->i_mode = sd_v2_mode(sd);
 		set_nlink(inode, sd_v2_nlink(sd));
-		inode->i_uid = sd_v2_uid(sd);
+		i_uid_write(inode, sd_v2_uid(sd));
 		inode->i_size = sd_v2_size(sd);
-		inode->i_gid = sd_v2_gid(sd);
+		i_gid_write(inode, sd_v2_gid(sd));
 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
 		inode->i_atime.tv_sec = sd_v2_atime(sd);
 		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
@@ -1258,9 +1258,9 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
 
 	set_sd_v2_mode(sd_v2, inode->i_mode);
 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
-	set_sd_v2_uid(sd_v2, inode->i_uid);
+	set_sd_v2_uid(sd_v2, i_uid_read(inode));
 	set_sd_v2_size(sd_v2, size);
-	set_sd_v2_gid(sd_v2, inode->i_gid);
+	set_sd_v2_gid(sd_v2, i_gid_read(inode));
 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
 	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
@@ -1280,8 +1280,8 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
 	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
 
 	set_sd_v1_mode(sd_v1, inode->i_mode);
-	set_sd_v1_uid(sd_v1, inode->i_uid);
-	set_sd_v1_gid(sd_v1, inode->i_gid);
+	set_sd_v1_uid(sd_v1, i_uid_read(inode));
+	set_sd_v1_gid(sd_v1, i_gid_read(inode));
 	set_sd_v1_nlink(sd_v1, inode->i_nlink);
 	set_sd_v1_size(sd_v1, size);
 	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
@@ -1869,7 +1869,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 		goto out_bad_inode;
 	}
 	if (old_format_only(sb)) {
-		if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
+		if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
 			pathrelse(&path_to_key);
 			/* i_uid or i_gid is too big to be stored in stat data v3.5 */
 			err = -EINVAL;
@@ -3140,16 +3140,16 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 	}
 
-	if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
-	     ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
+	if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) ||
+	     ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) &&
 	    (get_inode_sd_version(inode) == STAT_DATA_V1)) {
 		/* stat data of format v3.5 has 16 bit uid and gid */
 		error = -EINVAL;
 		goto out;
 	}
 
-	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+	if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
+	    (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
 		struct reiserfs_transaction_handle th;
 		int jbegin_count =
 		    2 *
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 87d6911c659d..d7c01ef64eda 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -121,15 +121,23 @@ static struct posix_acl *posix_acl_from_disk(const void *value, size_t size)
 		case ACL_OTHER:
 			value = (char *)value +
 			    sizeof(reiserfs_acl_entry_short);
-			acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
 			break;
 
 		case ACL_USER:
+			value = (char *)value + sizeof(reiserfs_acl_entry);
+			if ((char *)value > end)
+				goto fail;
+			acl->a_entries[n].e_uid = 
+				make_kuid(&init_user_ns,
+					  le32_to_cpu(entry->e_id));
+			break;
 		case ACL_GROUP:
 			value = (char *)value + sizeof(reiserfs_acl_entry);
 			if ((char *)value > end)
 				goto fail;
-			acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
+			acl->a_entries[n].e_gid =
+				make_kgid(&init_user_ns,
+					  le32_to_cpu(entry->e_id));
 			break;
 
 		default:
@@ -164,13 +172,19 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
 	ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION);
 	e = (char *)ext_acl + sizeof(reiserfs_acl_header);
 	for (n = 0; n < acl->a_count; n++) {
+		const struct posix_acl_entry *acl_e = &acl->a_entries[n];
 		reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e;
 		entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
 		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
 		switch (acl->a_entries[n].e_tag) {
 		case ACL_USER:
+			entry->e_id = cpu_to_le32(
+				from_kuid(&init_user_ns, acl_e->e_uid));
+			e += sizeof(reiserfs_acl_entry);
+			break;
 		case ACL_GROUP:
-			entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+			entry->e_id = cpu_to_le32(
+				from_kgid(&init_user_ns, acl_e->e_gid));
 			e += sizeof(reiserfs_acl_entry);
 			break;
 
-- 
cgit 


From 61293ee2749bc2414725da37e50308154ff91574 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 12:14:27 -0800
Subject: userns: Convert squashfs to use kuid/kgid where appropriate

Cc: Phillip Lougher <phillip@squashfs.org.uk>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/squashfs/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 81afbccfa843..a1ce5ce60632 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -56,16 +56,20 @@
 static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
 				struct squashfs_base_inode *sqsh_ino)
 {
+	uid_t i_uid;
+	gid_t i_gid;
 	int err;
 
-	err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &inode->i_uid);
+	err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
 	if (err)
 		return err;
 
-	err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &inode->i_gid);
+	err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &i_gid);
 	if (err)
 		return err;
 
+	i_uid_write(inode, i_uid);
+	i_gid_write(inode, i_gid);
 	inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
 	inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
 	inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
-- 
cgit 


From 39241beb78f69925b0475ad78f06f0e0589fb71b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Feb 2012 15:50:56 -0800
Subject: userns: Convert ubifs to use kuid/kgid

Cc: Artem Bityutskiy <dedekind1@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/ubifs/budget.c  | 4 ++--
 fs/ubifs/debug.c   | 4 ++--
 fs/ubifs/journal.c | 4 ++--
 fs/ubifs/sb.c      | 4 ++--
 fs/ubifs/super.c   | 4 ++--
 fs/ubifs/ubifs.h   | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index bc4f94b28706..969489e478bc 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -272,8 +272,8 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
  */
 static int can_use_rp(struct ubifs_info *c)
 {
-	if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
-	    (c->rp_gid != 0 && in_group_p(c->rp_gid)))
+	if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) ||
+	    (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid)))
 		return 1;
 	return 0;
 }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index bb3167257aab..340d1afc1302 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -243,8 +243,8 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
 	printk(KERN_ERR "\tsize           %llu\n",
 	       (unsigned long long)i_size_read(inode));
 	printk(KERN_ERR "\tnlink          %u\n", inode->i_nlink);
-	printk(KERN_ERR "\tuid            %u\n", (unsigned int)inode->i_uid);
-	printk(KERN_ERR "\tgid            %u\n", (unsigned int)inode->i_gid);
+	printk(KERN_ERR "\tuid            %u\n", (unsigned int)i_uid_read(inode));
+	printk(KERN_ERR "\tgid            %u\n", (unsigned int)i_gid_read(inode));
 	printk(KERN_ERR "\tatime          %u.%u\n",
 	       (unsigned int)inode->i_atime.tv_sec,
 	       (unsigned int)inode->i_atime.tv_nsec);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 12c0f154ca83..afaad07f3b29 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -469,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
 	ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 	ino->mtime_sec  = cpu_to_le64(inode->i_mtime.tv_sec);
 	ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
-	ino->uid   = cpu_to_le32(inode->i_uid);
-	ino->gid   = cpu_to_le32(inode->i_gid);
+	ino->uid   = cpu_to_le32(i_uid_read(inode));
+	ino->gid   = cpu_to_le32(i_gid_read(inode));
 	ino->mode  = cpu_to_le32(inode->i_mode);
 	ino->flags = cpu_to_le32(ui->flags);
 	ino->size  = cpu_to_le64(ui->ui_size);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 15e2fc5aa60b..52c21f4190f6 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -611,8 +611,8 @@ int ubifs_read_superblock(struct ubifs_info *c)
 	c->fanout        = le32_to_cpu(sup->fanout);
 	c->lsave_cnt     = le32_to_cpu(sup->lsave_cnt);
 	c->rp_size       = le64_to_cpu(sup->rp_size);
-	c->rp_uid        = le32_to_cpu(sup->rp_uid);
-	c->rp_gid        = le32_to_cpu(sup->rp_gid);
+	c->rp_uid        = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid));
+	c->rp_gid        = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid));
 	sup_flags        = le32_to_cpu(sup->flags);
 	if (!c->mount_opts.override_compr)
 		c->default_compr = le16_to_cpu(sup->default_compr);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1c766c39c038..f39bad9db61c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -130,8 +130,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
 
 	inode->i_flags |= (S_NOCMTIME | S_NOATIME);
 	set_nlink(inode, le32_to_cpu(ino->nlink));
-	inode->i_uid   = le32_to_cpu(ino->uid);
-	inode->i_gid   = le32_to_cpu(ino->gid);
+	i_uid_write(inode, le32_to_cpu(ino->uid));
+	i_gid_write(inode, le32_to_cpu(ino->gid));
 	inode->i_atime.tv_sec  = (int64_t)le64_to_cpu(ino->atime_sec);
 	inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
 	inode->i_mtime.tv_sec  = (int64_t)le64_to_cpu(ino->mtime_sec);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1e5a08623d11..64f2367c2f4c 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1426,8 +1426,8 @@ struct ubifs_info {
 
 	long long rp_size;
 	long long report_rp_size;
-	uid_t rp_uid;
-	gid_t rp_gid;
+	kuid_t rp_uid;
+	kgid_t rp_gid;
 
 	/* The below fields are used only during mounting and re-mounting */
 	unsigned int empty:1;
-- 
cgit 


From c2ba138a27ddac4abbc931599dbce907c868910a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 12:20:35 -0800
Subject: userns: Convert the udf filesystem to use kuid/kgid where appropriate

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/udf/inode.c  | 12 ++++++------
 fs/udf/super.c  | 20 ++++++++++++--------
 fs/udf/udf_sb.h |  4 ++--
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad795cd6..1825dc0af728 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1309,14 +1309,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 	}
 
 	read_lock(&sbi->s_cred_lock);
-	inode->i_uid = le32_to_cpu(fe->uid);
-	if (inode->i_uid == -1 ||
+	i_uid_write(inode, le32_to_cpu(fe->uid));
+	if (!uid_valid(inode->i_uid) ||
 	    UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
 	    UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET))
 		inode->i_uid = UDF_SB(inode->i_sb)->s_uid;
 
-	inode->i_gid = le32_to_cpu(fe->gid);
-	if (inode->i_gid == -1 ||
+	i_gid_write(inode, le32_to_cpu(fe->gid));
+	if (!gid_valid(inode->i_gid) ||
 	    UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) ||
 	    UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
 		inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
@@ -1539,12 +1539,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET))
 		fe->uid = cpu_to_le32(-1);
 	else
-		fe->uid = cpu_to_le32(inode->i_uid);
+		fe->uid = cpu_to_le32(i_uid_read(inode));
 
 	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET))
 		fe->gid = cpu_to_le32(-1);
 	else
-		fe->gid = cpu_to_le32(inode->i_gid);
+		fe->gid = cpu_to_le32(i_gid_read(inode));
 
 	udfperms = ((inode->i_mode & S_IRWXO)) |
 		   ((inode->i_mode & S_IRWXG) << 2) |
diff --git a/fs/udf/super.c b/fs/udf/super.c
index dcbf98722afc..38c705574b92 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -199,8 +199,8 @@ struct udf_options {
 	unsigned int rootdir;
 	unsigned int flags;
 	umode_t umask;
-	gid_t gid;
-	uid_t uid;
+	kgid_t gid;
+	kuid_t uid;
 	umode_t fmode;
 	umode_t dmode;
 	struct nls_table *nls_map;
@@ -335,9 +335,9 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE))
 		seq_puts(seq, ",gid=ignore");
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
-		seq_printf(seq, ",uid=%u", sbi->s_uid);
+		seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid));
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
-		seq_printf(seq, ",gid=%u", sbi->s_gid);
+		seq_printf(seq, ",gid=%u", from_kgid(&init_user_ns, sbi->s_gid));
 	if (sbi->s_umask != 0)
 		seq_printf(seq, ",umask=%ho", sbi->s_umask);
 	if (sbi->s_fmode != UDF_INVALID_MODE)
@@ -516,13 +516,17 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
 		case Opt_gid:
 			if (match_int(args, &option))
 				return 0;
-			uopt->gid = option;
+			uopt->gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(uopt->gid))
+				return 0;
 			uopt->flags |= (1 << UDF_FLAG_GID_SET);
 			break;
 		case Opt_uid:
 			if (match_int(args, &option))
 				return 0;
-			uopt->uid = option;
+			uopt->uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(uopt->uid))
+				return 0;
 			uopt->flags |= (1 << UDF_FLAG_UID_SET);
 			break;
 		case Opt_umask:
@@ -1931,8 +1935,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	struct udf_sb_info *sbi;
 
 	uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
-	uopt.uid = -1;
-	uopt.gid = -1;
+	uopt.uid = INVALID_UID;
+	uopt.gid = INVALID_GID;
 	uopt.umask = 0;
 	uopt.fmode = UDF_INVALID_MODE;
 	uopt.dmode = UDF_INVALID_MODE;
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 42ad69ac9576..5f027227f085 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -128,8 +128,8 @@ struct udf_sb_info {
 
 	/* Default permissions */
 	umode_t			s_umask;
-	gid_t			s_gid;
-	uid_t			s_uid;
+	kgid_t			s_gid;
+	kuid_t			s_uid;
 	umode_t			s_fmode;
 	umode_t			s_dmode;
 	/* Lock protecting consistency of above permission settings */
-- 
cgit 


From 72235465864d84cedb2d9f26f8e1de824ee20339 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Feb 2012 12:21:22 -0800
Subject: userns: Convert the ufs filesystem to use kuid/kgid where appropriate

Cc: Evgeniy Dushistov <dushistov@mail.ru>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/ufs/inode.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index dd7c89d8a1c1..eb6d0b7dc879 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -597,8 +597,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	/*
 	 * Linux now has 32-bit uid and gid, so we can support EFT.
 	 */
-	inode->i_uid = ufs_get_inode_uid(sb, ufs_inode);
-	inode->i_gid = ufs_get_inode_gid(sb, ufs_inode);
+	i_uid_write(inode, ufs_get_inode_uid(sb, ufs_inode));
+	i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
 
 	inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
 	inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
@@ -645,8 +645,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
         /*
          * Linux now has 32-bit uid and gid, so we can support EFT.
          */
-	inode->i_uid = fs32_to_cpu(sb, ufs2_inode->ui_uid);
-	inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid);
+	i_uid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_uid));
+	i_gid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_gid));
 
 	inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
 	inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
@@ -745,8 +745,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
 	ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
 
-	ufs_set_inode_uid(sb, ufs_inode, inode->i_uid);
-	ufs_set_inode_gid(sb, ufs_inode, inode->i_gid);
+	ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode));
+	ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode));
 		
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
 	ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
@@ -789,8 +789,8 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
 	ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
 	ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
 
-	ufs_inode->ui_uid = cpu_to_fs32(sb, inode->i_uid);
-	ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid);
+	ufs_inode->ui_uid = cpu_to_fs32(sb, i_uid_read(inode));
+	ufs_inode->ui_gid = cpu_to_fs32(sb, i_gid_read(inode));
 
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
 	ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
-- 
cgit 


From 84c3b84860440a9e3a3666c14112f41311b8f623 Mon Sep 17 00:00:00 2001
From: Jaeden Amero <jaeden.amero@ni.com>
Date: Wed, 19 Sep 2012 15:34:31 +0100
Subject: compat_ioctl: Add RS-485 IOCTLs to the list

The RS-485 TIOCSRS485 and TIOCGRS485 ioctls are 32-bit compatible, so
in order to call them on 64-bit systems from 32-bit user mode, we add
them to the ioctl pointer list as compatible ioctls.

Signed-off-by: Jaeden Amero <jaeden.amero@ni.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/compat_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index debdfe0fc809..85dfebfe6820 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -866,6 +866,8 @@ COMPATIBLE_IOCTL(TIOCGPTN)
 COMPATIBLE_IOCTL(TIOCSPTLCK)
 COMPATIBLE_IOCTL(TIOCSERGETLSR)
 COMPATIBLE_IOCTL(TIOCSIG)
+COMPATIBLE_IOCTL(TIOCSRS485)
+COMPATIBLE_IOCTL(TIOCGRS485)
 #ifdef TCGETS2
 COMPATIBLE_IOCTL(TCGETS2)
 COMPATIBLE_IOCTL(TCSETS2)
-- 
cgit 


From 1f981697432daea3c0abf938b39174dcfb29340e Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 26 Jul 2012 11:26:36 +0100
Subject: GFS2: Merge two nearly identical xattr functions

There were two functions in the xattr code which were nearly
identical, the only difference being that one was copy data into
the unstuffed xattrs and the other was copying data out from it.

This patch merges the two functions such that the code which deal
with iteration over the unstuffed xattrs is no longer duplicated.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/xattr.c | 94 ++++++++++++++++++---------------------------------------
 1 file changed, 30 insertions(+), 64 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 27a0b4a901f5..5404ed1582ff 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 
 /**
- * ea_get_unstuffed - actually copies the unstuffed data into the
- *                    request buffer
+ * ea_iter_unstuffed - copies the unstuffed xattr data to/from the
+ *                     request buffer
  * @ip: The GFS2 inode
  * @ea: The extended attribute header structure
- * @data: The data to be copied
+ * @din: The data to be copied in
+ * @dout: The data to be copied out (one of din,dout will be NULL)
  *
  * Returns: errno
  */
 
-static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
-			    char *data)
+static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
+			       const char *din, char *dout)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head **bh;
@@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 	__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
 	unsigned int x;
 	int error = 0;
+	unsigned char *pos;
+	unsigned cp_size;
 
 	bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
 	if (!bh)
@@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 			goto out;
 		}
 
-		memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header),
-		       (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
+		pos = bh[x]->b_data + sizeof(struct gfs2_meta_header);
+		cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize;
 
-		amount -= sdp->sd_jbsize;
-		data += sdp->sd_jbsize;
+		if (dout) {
+			memcpy(dout, pos, cp_size);
+			dout += sdp->sd_jbsize;
+		}
+
+		if (din) {
+			gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
+			memcpy(pos, din, cp_size);
+			din += sdp->sd_jbsize;
+		}
 
+		amount -= sdp->sd_jbsize;
 		brelse(bh[x]);
 	}
 
@@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
 		memcpy(data, GFS2_EA2DATA(el->el_ea), len);
 		return len;
 	}
-	ret = ea_get_unstuffed(ip, el->el_ea, data);
+	ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data);
 	if (ret < 0)
 		return ret;
 	return len;
@@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name,
 				size, flags, type);
 }
 
+
 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
 				  struct gfs2_ea_header *ea, char *data)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct buffer_head **bh;
 	unsigned int amount = GFS2_EA_DATA_LEN(ea);
 	unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
-	__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
-	unsigned int x;
-	int error;
-
-	bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
-	if (!bh)
-		return -ENOMEM;
-
-	error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
-	if (error)
-		goto out;
-
-	for (x = 0; x < nptrs; x++) {
-		error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
-				       bh + x);
-		if (error) {
-			while (x--)
-				brelse(bh[x]);
-			goto fail;
-		}
-		dataptrs++;
-	}
-
-	for (x = 0; x < nptrs; x++) {
-		error = gfs2_meta_wait(sdp, bh[x]);
-		if (error) {
-			for (; x < nptrs; x++)
-				brelse(bh[x]);
-			goto fail;
-		}
-		if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
-			for (; x < nptrs; x++)
-				brelse(bh[x]);
-			error = -EIO;
-			goto fail;
-		}
-
-		gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
-
-		memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data,
-		       (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
-
-		amount -= sdp->sd_jbsize;
-		data += sdp->sd_jbsize;
-
-		brelse(bh[x]);
-	}
+	int ret;
 
-out:
-	kfree(bh);
-	return error;
+	ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
+	if (ret)
+		return ret;
 
-fail:
+	ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
 	gfs2_trans_end(sdp);
-	kfree(bh);
-	return error;
+
+	return ret;
 }
 
 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
-- 
cgit 


From 71f890f7f758f340215d48fed5223f9cce05b652 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 30 Jul 2012 14:53:19 +0100
Subject: GFS2: Remove rs_requested field from reservations

The rs_requested field is left over from the original allocation
code, however this should have been a parameter passed to the
various functions from gfs2_inplace_reserve() and not a member of the
reservation structure as the value is not required after the
initial allocation.

This also helps simplify the code since we no longer need to set
the rs_requested to zero. Also the gfs2_inplace_release()
function can also be simplified since the reservation structure
will always be defined when it is called, and the only remaining
task is to unlock the rgrp if required. It can also now be
called unconditionally too, resulting in a further simplification.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/aops.c   |  9 +++++----
 fs/gfs2/file.c   |  4 ++--
 fs/gfs2/incore.h |  3 ---
 fs/gfs2/inode.c  |  9 +++------
 fs/gfs2/quota.c  |  9 +++++----
 fs/gfs2/rgrp.c   | 35 ++++++++---------------------------
 fs/gfs2/rgrp.h   |  8 --------
 fs/gfs2/trans.h  |  7 +++----
 fs/gfs2/xattr.c  |  2 +-
 9 files changed, 27 insertions(+), 59 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d6526347d386..00eaa83871b7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+	unsigned requested = 0;
 	int alloc_required;
 	int error = 0;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
@@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 		if (error)
 			goto out_unlock;
 
-		error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
+		requested = data_blocks + ind_blocks;
+		error = gfs2_inplace_reserve(ip, requested);
 		if (error)
 			goto out_qunlock;
 	}
@@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	if (&ip->i_inode == sdp->sd_rindex)
 		rblocks += 2 * RES_STATFS;
 	if (alloc_required)
-		rblocks += gfs2_rg_blocks(ip);
+		rblocks += gfs2_rg_blocks(ip, requested);
 
 	error = gfs2_trans_begin(sdp, rblocks,
 				 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
 	brelse(dibh);
 failed:
 	gfs2_trans_end(sdp);
-	if (gfs2_mb_reserved(ip))
-		gfs2_inplace_release(ip);
+	gfs2_inplace_release(ip);
 	if (ip->i_res->rs_qa_qd_num)
 		gfs2_quota_unlock(ip);
 	if (inode == sdp->sd_rindex) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 382000ffac1f..30e21997a1a1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -441,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 		rblocks += data_blocks ? data_blocks : 1;
 	if (ind_blocks || data_blocks) {
 		rblocks += RES_STATFS + RES_QUOTA;
-		rblocks += gfs2_rg_blocks(ip);
+		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
 	}
 	ret = gfs2_trans_begin(sdp, rblocks, 0);
 	if (ret)
@@ -845,7 +845,7 @@ retry:
 				&max_bytes, &data_blocks, &ind_blocks);
 
 		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
-			  RES_RG_HDR + gfs2_rg_blocks(ip);
+			  RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
 		if (gfs2_is_jdata(ip))
 			rblocks += data_blocks ? data_blocks : 1;
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index aaecc8085fc5..52078a161ecd 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -250,9 +250,6 @@ struct gfs2_blkreserv {
 	/* components used during write (step 1): */
 	atomic_t rs_sizehint;         /* hint of the write size */
 
-	/* components used during inplace_reserve (step 2): */
-	u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-
 	/* components used during get_local_rgrp (step 3): */
 	struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
 	struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 753af3d86bbc..f2709ea887da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -737,10 +737,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		brelse(bh);
 
 	gfs2_trans_end(sdp);
-	/* Check if we reserved space in the rgrp. Function link_dinode may
-	   not, depending on whether alloc is required. */
-	if (gfs2_mb_reserved(dip))
-		gfs2_inplace_release(dip);
+	gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	mark_inode_dirty(inode);
 	gfs2_glock_dq_uninit_m(2, ghs);
@@ -897,7 +894,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(dip) +
+					 gfs2_rg_blocks(dip, sdp->sd_max_dirres) +
 					 2 * RES_DINODE + RES_STATFS +
 					 RES_QUOTA, 0);
 		if (error)
@@ -1378,7 +1375,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_gunlock_q;
 
 		error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-					 gfs2_rg_blocks(ndip) +
+					 gfs2_rg_blocks(ndip, sdp->sd_max_dirres) +
 					 4 * RES_DINODE + 4 * RES_LEAF +
 					 RES_STATFS + RES_QUOTA + 4, 0);
 		if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3bde91645c2..420bc3805ccc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	struct gfs2_holder *ghs, i_gh;
 	unsigned int qx, x;
 	struct gfs2_quota_data *qd;
+	unsigned reserved;
 	loff_t offset;
 	unsigned int nalloc = 0, blocks;
 	int error;
@@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	 * two blocks need to be updated instead of 1 */
 	blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
 
-	error = gfs2_inplace_reserve(ip, 1 +
-				     (nalloc * (data_blocks + ind_blocks)));
+	reserved = 1 + (nalloc * (data_blocks + ind_blocks));
+	error = gfs2_inplace_reserve(ip, reserved);
 	if (error)
 		goto out_alloc;
 
 	if (nalloc)
-		blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS;
+		blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS;
 
 	error = gfs2_trans_begin(sdp, blocks, 0);
 	if (error)
@@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 		error = gfs2_inplace_reserve(ip, blocks);
 		if (error)
 			goto out_i;
-		blocks += gfs2_rg_blocks(ip);
+		blocks += gfs2_rg_blocks(ip, blocks);
 	}
 
 	/* Some quotas span block boundaries and can update two blocks,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c9ed814eeb6f..a2b43bb83499 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -539,7 +539,6 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 	   E.g. We can't set rs_rgd to NULL because the rgd glock is held and
 	   dequeued through this pointer.
 	   Can't: atomic_set(&rs->rs_sizehint, 0);
-	   Can't: rs->rs_requested = 0;
 	   Can't: rs->rs_rgd = NULL;*/
 	rs->rs_bi = NULL;
 	rs->rs_biblk = 0;
@@ -1350,7 +1349,7 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
  * Returns: 0 if successful or BFITNOENT if there isn't enough free space
  */
 
-static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested)
 {
 	struct gfs2_bitmap *bi = rgd->rd_bits;
 	const u32 length = rgd->rd_length;
@@ -1422,8 +1421,7 @@ do_search:
 			   what we can. If there's not enough, keep looking. */
 			if (nonzero == NULL)
 				rsv_bytes = search_bytes;
-			else if ((nonzero - ptr) * GFS2_NBBY >=
-				 ip->i_res->rs_requested)
+			else if ((nonzero - ptr) * GFS2_NBBY >= requested)
 				rsv_bytes = (nonzero - ptr);
 
 			if (rsv_bytes) {
@@ -1461,17 +1459,16 @@ skip:
  * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
  */
 
-static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
+			unsigned requested)
 {
-	struct gfs2_blkreserv *rs = ip->i_res;
-
 	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
 		return 0;
 	/* Look for a multi-block reservation. */
 	if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
-	    rg_mblk_search(rgd, ip) != BFITNOENT)
+	    rg_mblk_search(rgd, ip, requested) != BFITNOENT)
 		return 1;
-	if (unclaimed_blocks(rgd) >= rs->rs_requested)
+	if (unclaimed_blocks(rgd) >= requested)
 		return 1;
 
 	return 0;
@@ -1562,7 +1559,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
-	rs->rs_requested = requested;
 	if (gfs2_assert_warn(sdp, requested)) {
 		error = -EINVAL;
 		goto out;
@@ -1606,7 +1602,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 		case 0:
 			if (gfs2_rs_active(rs)) {
 				if (unclaimed_blocks(rs->rs_rgd) +
-				    rs->rs_free >= rs->rs_requested) {
+				    rs->rs_free >= requested) {
 					ip->i_rgd = rs->rs_rgd;
 					return 0;
 				}
@@ -1616,7 +1612,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 				   and look for a suitable rgrp. */
 				gfs2_rs_deltree(rs);
 			}
-			if (try_rgrp_fit(rs->rs_rgd, ip)) {
+			if (try_rgrp_fit(rs->rs_rgd, ip, requested)) {
 				if (sdp->sd_args.ar_rgrplvb)
 					gfs2_rgrp_bh_get(rs->rs_rgd);
 				ip->i_rgd = rs->rs_rgd;
@@ -1656,8 +1652,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 	error = -ENOSPC;
 
 out:
-	if (error)
-		rs->rs_requested = 0;
 	return error;
 }
 
@@ -1672,15 +1666,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
 	struct gfs2_blkreserv *rs = ip->i_res;
 
-	if (!rs)
-		return;
-
-	if (!rs->rs_free)
-		gfs2_rs_deltree(rs);
-
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-	rs->rs_requested = 0;
 }
 
 /**
@@ -2021,12 +2008,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	int error;
 	struct gfs2_bitmap *bi;
 
-	/* Only happens if there is a bug in gfs2, return something distinctive
-	 * to ensure that it is noticed.
-	 */
-	if (ip->i_res->rs_requested == 0)
-		return -ECANCELED;
-
 	/* If we have a reservation, claim blocks from it. */
 	if (gfs2_rs_active(ip->i_res)) {
 		BUG_ON(!ip->i_res->rs_free);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ca6e26729b86..0b0e9cc7e3d9 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -73,14 +73,6 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
-/* This is how to tell if a multi-block reservation is "inplace" reserved: */
-static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
-{
-	if (ip->i_res && ip->i_res->rs_requested)
-		return 1;
-	return 0;
-}
-
 /* This is how to tell if a multi-block reservation is in the rgrp tree: */
 static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
 {
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 41f42cdccbb8..bf2ae9aeee7a 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -28,11 +28,10 @@ struct gfs2_glock;
 
 /* reserve either the number of blocks to be allocated plus the rg header
  * block, or all of the blocks in the rg, whichever is smaller */
-static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
+static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
 {
-	const struct gfs2_blkreserv *rs = ip->i_res;
-	if (rs && rs->rs_requested < ip->i_rgd->rd_length)
-		return rs->rs_requested + 1;
+	if (requested < ip->i_rgd->rd_length)
+		return requested + 1;
 	return ip->i_rgd->rd_length;
 }
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 5404ed1582ff..db330e5518cd 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -739,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		goto out_gunlock_q;
 
 	error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-				 blks + gfs2_rg_blocks(ip) +
+				 blks + gfs2_rg_blocks(ip, blks) +
 				 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
 	if (error)
 		goto out_ipres;
-- 
cgit 


From 4a993fb1503d11496974bd86c0b7123f63d9c8a2 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Jul 2012 15:21:20 +0100
Subject: GFS2: Add structure to contain rgrp, bitmap, offset tuple

This patch introduces a new structure, gfs2_rbm, which is a
tuple of a resource group, a bitmap within the resource group
and an offset within that bitmap. This is designed to make
manipulating these sets of variables easier. There is also a
new helper function which converts this representation back
to a disk block address.

In addition, the rbtree nodes which are used for the reservations
were not being correctly initialised, which is now fixed. Also,
the tracing was not passing through the inode where it should
have been. That is mostly fixed aside from one corner case. This
needs to be revisited since there can also be a NULL rgrp in
some cases which results in the device being incorrect in the
trace.

This is intended to be the first step towards cleaning up some
of the allocation code, and some further bug fixes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c       |   2 +-
 fs/gfs2/incore.h     |  15 ++++-
 fs/gfs2/rgrp.c       | 175 ++++++++++++++++++++++++---------------------------
 fs/gfs2/rgrp.h       |  16 ++---
 fs/gfs2/super.c      |   2 +-
 fs/gfs2/trace_gfs2.h |  10 +--
 6 files changed, 105 insertions(+), 115 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 49cd7dd4a9fa..1fd3ae237bdd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 		goto out_rlist;
 
 	if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
-		gfs2_rs_deltree(ip->i_res);
+		gfs2_rs_deltree(ip, ip->i_res);
 
 	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
 				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 52078a161ecd..d5e254604c72 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -102,6 +102,17 @@ struct gfs2_rgrpd {
 	u32 rd_rs_cnt;                  /* count of current reservations */
 };
 
+struct gfs2_rbm {
+	struct gfs2_rgrpd *rgd;
+	struct gfs2_bitmap *bi;	/* Bitmap must belong to the rgd */
+	u32 offset;		/* The offset is bitmap relative */
+};
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+	return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset;
+}
+
 enum gfs2_state_bits {
 	BH_Pinned = BH_PrivateStart,
 	BH_Escaped = BH_PrivateStart + 1,
@@ -251,13 +262,11 @@ struct gfs2_blkreserv {
 	atomic_t rs_sizehint;         /* hint of the write size */
 
 	/* components used during get_local_rgrp (step 3): */
-	struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
+	struct gfs2_rbm rs_rbm;
 	struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
 	struct rb_node rs_node;       /* link to other block reservations */
 
 	/* components used during block searches and assignments (step 4): */
-	struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
-	u32 rs_biblk;                 /* start block relative to the bi */
 	u32 rs_free;                  /* how many blocks are still free */
 
 	/* ancillary quota stuff */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index a2b43bb83499..eaa41885a00d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -192,7 +192,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  */
 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 {
-	u64 startblk = gfs2_rs_startblk(rs);
+	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 
 	if (blk >= startblk + rs->rs_free)
 		return 1;
@@ -487,6 +487,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 	if (!res)
 		error = -ENOMEM;
 
+	rb_init_node(&res->rs_node);
+
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res)
 		kmem_cache_free(gfs2_rsrv_cachep, res);
@@ -499,8 +501,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
 {
 	gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
-		       rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
-		       rs->rs_free);
+		       rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), 
+		       rs->rs_rbm.offset, rs->rs_free);
 }
 
 /**
@@ -508,40 +510,28 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
  * @rs: The reservation to remove
  *
  */
-static void __rs_deltree(struct gfs2_blkreserv *rs)
+static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
 {
 	struct gfs2_rgrpd *rgd;
 
 	if (!gfs2_rs_active(rs))
 		return;
 
-	rgd = rs->rs_rgd;
-	/* We can't do this: The reason is that when the rgrp is invalidated,
-	   it's in the "middle" of acquiring the glock, but the HOLDER bit
-	   isn't set yet:
-	   BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
-	trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
-
-	if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
-		rb_erase(&rs->rs_node, &rgd->rd_rstree);
+	rgd = rs->rs_rbm.rgd;
+	trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL);
+	rb_erase(&rs->rs_node, &rgd->rd_rstree);
+	rb_init_node(&rs->rs_node);
 	BUG_ON(!rgd->rd_rs_cnt);
 	rgd->rd_rs_cnt--;
 
 	if (rs->rs_free) {
 		/* return reserved blocks to the rgrp and the ip */
-		BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
-		rs->rs_rgd->rd_reserved -= rs->rs_free;
+		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
+		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
 		rs->rs_free = 0;
-		clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+		clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
 		smp_mb__after_clear_bit();
 	}
-	/* We can't change any of the step 1 or step 2 components of the rs.
-	   E.g. We can't set rs_rgd to NULL because the rgd glock is held and
-	   dequeued through this pointer.
-	   Can't: atomic_set(&rs->rs_sizehint, 0);
-	   Can't: rs->rs_rgd = NULL;*/
-	rs->rs_bi = NULL;
-	rs->rs_biblk = 0;
 }
 
 /**
@@ -549,17 +539,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
  * @rs: The reservation to remove
  *
  */
-void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
 {
 	struct gfs2_rgrpd *rgd;
 
-	if (!gfs2_rs_active(rs))
-		return;
-
-	rgd = rs->rs_rgd;
-	spin_lock(&rgd->rd_rsspin);
-	__rs_deltree(rs);
-	spin_unlock(&rgd->rd_rsspin);
+	rgd = rs->rs_rbm.rgd;
+	if (rgd) {
+		spin_lock(&rgd->rd_rsspin);
+		__rs_deltree(ip, rs);
+		spin_unlock(&rgd->rd_rsspin);
+	}
 }
 
 /**
@@ -571,7 +560,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
 {
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res) {
-		gfs2_rs_deltree(ip->i_res);
+		gfs2_rs_deltree(ip, ip->i_res);
 		trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
 		BUG_ON(ip->i_res->rs_free);
 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
@@ -596,7 +585,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
 	spin_lock(&rgd->rd_rsspin);
 	while ((n = rb_first(&rgd->rd_rstree))) {
 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
-		__rs_deltree(rs);
+		__rs_deltree(NULL, rs);
 	}
 	spin_unlock(&rgd->rd_rsspin);
 }
@@ -1284,7 +1273,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
 	struct rb_node **newn, *parent = NULL;
 	int rc;
 	struct gfs2_blkreserv *rs = ip->i_res;
-	struct gfs2_rgrpd *rgd = rs->rs_rgd;
+	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
 	u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
 
 	spin_lock(&rgd->rd_rsspin);
@@ -1312,8 +1301,8 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
 	/* Do our reservation work */
 	rs = ip->i_res;
 	rs->rs_free = amount;
-	rs->rs_biblk = biblk;
-	rs->rs_bi = bi;
+	rs->rs_rbm.offset = biblk;
+	rs->rs_rbm.bi = bi;
 	rb_link_node(&rs->rs_node, parent, newn);
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
@@ -1564,34 +1553,34 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 		goto out;
 	}
 	if (gfs2_rs_active(rs)) {
-		begin = rs->rs_rgd;
+		begin = rs->rs_rbm.rgd;
 		flags = 0; /* Yoda: Do or do not. There is no try */
 	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
-		rs->rs_rgd = begin = ip->i_rgd;
+		rs->rs_rbm.rgd = begin = ip->i_rgd;
 	} else {
-		rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
 	}
-	if (rs->rs_rgd == NULL)
+	if (rs->rs_rbm.rgd == NULL)
 		return -EBADSLT;
 
 	while (loops < 3) {
 		rg_locked = 0;
 
-		if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
+		if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
 			rg_locked = 1;
 			error = 0;
 		} else if (!loops && !gfs2_rs_active(rs) &&
-			   rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
+			   rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
 			/* If the rgrp already is maxed out for contenders,
 			   we can eliminate it as a "first pass" without even
 			   requesting the rgrp glock. */
 			error = GLR_TRYFAILED;
 		} else {
-			error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
+			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
 						   LM_ST_EXCLUSIVE, flags,
 						   &rs->rs_rgd_gh);
 			if (!error && sdp->sd_args.ar_rgrplvb) {
-				error = update_rgrp_lvb(rs->rs_rgd);
+				error = update_rgrp_lvb(rs->rs_rbm.rgd);
 				if (error) {
 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 					return error;
@@ -1601,36 +1590,36 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 		switch (error) {
 		case 0:
 			if (gfs2_rs_active(rs)) {
-				if (unclaimed_blocks(rs->rs_rgd) +
+				if (unclaimed_blocks(rs->rs_rbm.rgd) +
 				    rs->rs_free >= requested) {
-					ip->i_rgd = rs->rs_rgd;
+					ip->i_rgd = rs->rs_rbm.rgd;
 					return 0;
 				}
 				/* We have a multi-block reservation, but the
 				   rgrp doesn't have enough free blocks to
 				   satisfy the request. Free the reservation
 				   and look for a suitable rgrp. */
-				gfs2_rs_deltree(rs);
+				gfs2_rs_deltree(ip, rs);
 			}
-			if (try_rgrp_fit(rs->rs_rgd, ip, requested)) {
+			if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) {
 				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rs->rs_rgd);
-				ip->i_rgd = rs->rs_rgd;
+					gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+				ip->i_rgd = rs->rs_rbm.rgd;
 				return 0;
 			}
-			if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
+			if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) {
 				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rs->rs_rgd);
-				try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
+					gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+				try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
 						ip->i_no_addr);
 			}
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 			/* fall through */
 		case GLR_TRYFAILED:
-			rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
-			rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
-			if (rs->rs_rgd != begin) /* If we didn't wrap */
+			rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd);
+			rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */
+			if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */
 				break;
 
 			flags &= ~LM_FLAG_TRY;
@@ -1776,11 +1765,11 @@ do_search:
 			if (rs == NULL)
 				break;
 
-			BUG_ON(rs->rs_bi != bi);
+			BUG_ON(rs->rs_rbm.bi != bi);
 			biblk = BFITNOENT;
 			/* This should jump to the first block after the
 			   reservation. */
-			goal = rs->rs_biblk + rs->rs_free;
+			goal = rs->rs_rbm.offset + rs->rs_free;
 			if (goal >= bi->bi_len * GFS2_NBBY)
 				break;
 		}
@@ -1805,9 +1794,7 @@ skip:
 
 /**
  * gfs2_alloc_extent - allocate an extent from a given bitmap
- * @rgd: the resource group descriptor
- * @bi: the bitmap within the rgrp
- * @blk: the block within the bitmap
+ * @rbm: the resource group information
  * @dinode: TRUE if the first block we allocate is for a dinode
  * @n: The extent length
  *
@@ -1815,9 +1802,12 @@ skip:
  * Set the found bits to @new_state to change block's allocation state.
  * Returns: starting block number of the extent (fs scope)
  */
-static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
-			     u32 blk, bool dinode, unsigned int *n)
+static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
+			     unsigned int *n)
 {
+	struct gfs2_rgrpd *rgd = rbm->rgd;
+	struct gfs2_bitmap *bi = rbm->bi;
+	u32 blk = rbm->offset;
 	const unsigned int elen = *n;
 	u32 goal, rgblk;
 	const u8 *buffer = NULL;
@@ -1956,21 +1946,21 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
 			       unsigned int *nblocks)
 {
 	struct gfs2_blkreserv *rs = ip->i_res;
-	struct gfs2_rgrpd *rgd = rs->rs_rgd;
+	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
 	struct gfs2_bitmap *bi;
-	u64 start_block = gfs2_rs_startblk(rs);
+	u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm);
 	const unsigned int elen = *nblocks;
 
-	bi = rs->rs_bi;
+	bi = rs->rs_rbm.bi;
 	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
 
 	for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
 		if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-				 bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE)
+				 bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE)
 			break;
-		gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
+		gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset,
 			    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
-		rs->rs_biblk++;
+		rs->rs_rbm.offset++;
 		rs->rs_free--;
 
 		BUG_ON(!rgd->rd_reserved);
@@ -1980,7 +1970,7 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
 
 	trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
 	if (!rs->rs_free || *nblocks != elen)
-		gfs2_rs_deltree(rs);
+		gfs2_rs_deltree(ip, rs);
 
 	return start_block;
 }
@@ -2001,40 +1991,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
-	struct gfs2_rgrpd *rgd;
+	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
 	unsigned int ndata;
-	u32 goal, blk; /* block, within the rgrp scope */
+	u32 goal; /* block, within the rgrp scope */
 	u64 block; /* block, within the file system scope */
 	int error;
-	struct gfs2_bitmap *bi;
 
 	/* If we have a reservation, claim blocks from it. */
 	if (gfs2_rs_active(ip->i_res)) {
 		BUG_ON(!ip->i_res->rs_free);
-		rgd = ip->i_res->rs_rgd;
+		rbm.rgd = ip->i_res->rs_rbm.rgd;
 		block = claim_reserved_blks(ip, dinode, nblocks);
 		if (*nblocks)
 			goto found_blocks;
 	}
 
-	rgd = ip->i_rgd;
-
-	if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-		goal = ip->i_goal - rgd->rd_data0;
+	if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
+		goal = ip->i_goal - rbm.rgd->rd_data0;
 	else
-		goal = rgd->rd_last_alloc;
+		goal = rbm.rgd->rd_last_alloc;
 
-	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+	rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi);
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
-	if (blk == BFITNOENT) {
+	if (rbm.offset == BFITNOENT) {
 		printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks);
 		printk(KERN_WARNING "FULL=%d\n",
-		       test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
+		       test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
 		goto rgrp_error;
 	}
 
-	block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+	block = gfs2_alloc_extent(&rbm, dinode, nblocks);
 found_blocks:
 	ndata = *nblocks;
 	if (dinode)
@@ -2052,22 +2039,22 @@ found_blocks:
 			brelse(dibh);
 		}
 	}
-	if (rgd->rd_free < *nblocks) {
+	if (rbm.rgd->rd_free < *nblocks) {
 		printk(KERN_WARNING "nblocks=%u\n", *nblocks);
 		goto rgrp_error;
 	}
 
-	rgd->rd_free -= *nblocks;
+	rbm.rgd->rd_free -= *nblocks;
 	if (dinode) {
-		rgd->rd_dinodes++;
-		*generation = rgd->rd_igeneration++;
+		rbm.rgd->rd_dinodes++;
+		*generation = rbm.rgd->rd_igeneration++;
 		if (*generation == 0)
-			*generation = rgd->rd_igeneration++;
+			*generation = rbm.rgd->rd_igeneration++;
 	}
 
-	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
-	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1);
+	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
@@ -2081,14 +2068,14 @@ found_blocks:
 		gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
 				  ip->i_inode.i_gid);
 
-	rgd->rd_free_clone -= *nblocks;
-	trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
+	rbm.rgd->rd_free_clone -= *nblocks;
+	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	*bn = block;
 	return 0;
 
 rgrp_error:
-	gfs2_rgrp_error(rgd);
+	gfs2_rgrp_error(rbm.rgd);
 	return -EIO;
 }
 
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 0b0e9cc7e3d9..c98f6af07e1c 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
 			     bool dinode, u64 *generation);
 
 extern int gfs2_rs_alloc(struct gfs2_inode *ip);
-extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs);
 extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -73,22 +73,16 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
-/* This is how to tell if a multi-block reservation is in the rgrp tree: */
-static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
+/* This is how to tell if a reservation is in the rgrp tree: */
+static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
 {
-	if (rs && rs->rs_bi)
-		return 1;
-	return 0;
+	return rs && !RB_EMPTY_NODE(&rs->rs_node);
 }
 
+
 static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
 {
 	return (bi->bi_start * GFS2_NBBY) + blk;
 }
 
-static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
-{
-	return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
-}
-
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a14..3cbac6803038 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1557,7 +1557,7 @@ out_truncate:
 out_unlock:
 	/* Error path for case 1 */
 	if (gfs2_rs_active(ip->i_res))
-		gfs2_rs_deltree(ip->i_res);
+		gfs2_rs_deltree(ip, ip->i_res);
 
 	if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
 		gfs2_glock_dq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index a25c252fe412..b947aa4dfca4 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -526,12 +526,12 @@ TRACE_EVENT(gfs2_rs,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
-		__entry->rd_addr	= rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
-		__entry->rd_free_clone	= rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
-		__entry->rd_reserved	= rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
+		__entry->dev		= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev : 0;
+		__entry->rd_addr	= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_addr : 0;
+		__entry->rd_free_clone	= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_free_clone : 0;
+		__entry->rd_reserved	= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_reserved : 0;
 		__entry->inum		= ip ? ip->i_no_addr : 0;
-		__entry->start		= gfs2_rs_startblk(rs);
+		__entry->start		= gfs2_rbm_to_block(&rs->rs_rbm);
 		__entry->free		= rs->rs_free;
 		__entry->func		= func;
 	),
-- 
cgit 


From 5b924ae2dcb1cc5e78445a0cedb5a3673bb5ad8a Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Aug 2012 20:35:05 +0100
Subject: GFS2: Replace rgblk_search with gfs2_rbm_find

This is part of a series of patches which are introducing the
gfs2_rbm structure throughout the block allocation code. The
main aim of this part is to create a search function which can
deal directly with struct gfs2_rbm. In this case it specifies
the initial position at which to start the search and also the
point at which the search terminates.

The net result of this is to clean up the search code and make
it rather more readable, and the various possible exceptions which
may occur during the search are partitioned into their own functions.

There are some bug fixes too. We should not be checking the reservations
while allocating extents - the time for that is when we are searching
for where to put the extent, not when we've already made that decision.

Also, rgblk_search had two uses, and in only one of those cases did
it make sense to check for reservations. This is fixed in the new
gfs2_rbm_find function, which has a cleaner interface.

The reservation checking has been improved by always checking for
contiguous reservations, and returning the first free block after
all contiguous reservations. This is done under the spin lock to
ensure consistancy of the tree.

The allocation of extents is now in all cases done by the existing
allocation code, and if there is an active reservation, that is updated
after the fact. Again this is done under the spin lock, since it entails
changing the lookup key for the reservation in question.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h |   7 +
 fs/gfs2/rgrp.c   | 460 ++++++++++++++++++++++++++++++-------------------------
 2 files changed, 257 insertions(+), 210 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d5e254604c72..99d7c64b5091 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -113,6 +113,13 @@ static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
 	return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset;
 }
 
+static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
+			       const struct gfs2_rbm *rbm2)
+{
+	return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && 
+	       (rbm1->offset == rbm2->offset);
+}
+
 enum gfs2_state_bits {
 	BH_Pinned = BH_PrivateStart,
 	BH_Escaped = BH_PrivateStart + 1,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index eaa41885a00d..bd3b926949d0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -67,10 +67,6 @@ static const char valid_change[16] = {
 	        1, 0, 0, 0
 };
 
-static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-			unsigned char old_state,
-			struct gfs2_bitmap **rbi);
-
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @rgd: the resource group descriptor
@@ -201,36 +197,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 	return 0;
 }
 
-/**
- * rs_find - Find a rgrp multi-block reservation that contains a given block
- * @rgd: The rgrp
- * @rgblk: The block we're looking for, relative to the rgrp
- */
-static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
-{
-	struct rb_node **newn;
-	int rc;
-	u64 fsblk = rgblk + rgd->rd_data0;
-
-	spin_lock(&rgd->rd_rsspin);
-	newn = &rgd->rd_rstree.rb_node;
-	while (*newn) {
-		struct gfs2_blkreserv *cur =
-			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
-		rc = rs_cmp(fsblk, 1, cur);
-		if (rc < 0)
-			newn = &((*newn)->rb_left);
-		else if (rc > 0)
-			newn = &((*newn)->rb_right);
-		else {
-			spin_unlock(&rgd->rd_rsspin);
-			return cur;
-		}
-	}
-	spin_unlock(&rgd->rd_rsspin);
-	return NULL;
-}
-
 /**
  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
  *       a block in a given allocation state.
@@ -1306,9 +1272,6 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
 	rb_link_node(&rs->rs_node, parent, newn);
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
-	/* Do our inode accounting for the reservation */
-	/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
-
 	/* Do our rgrp accounting for the reservation */
 	rgd->rd_reserved += amount; /* blocks reserved */
 	rgd->rd_rs_cnt++; /* number of in-tree reservations */
@@ -1463,6 +1426,199 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 	return 0;
 }
 
+/**
+ * gfs2_next_unreserved_block - Return next block that is not reserved
+ * @rgd: The resource group
+ * @block: The starting block
+ * @ip: Ignore any reservations for this inode
+ *
+ * If the block does not appear in any reservation, then return the
+ * block number unchanged. If it does appear in the reservation, then
+ * keep looking through the tree of reservations in order to find the
+ * first block number which is not reserved.
+ */
+
+static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
+				      const struct gfs2_inode *ip)
+{
+	struct gfs2_blkreserv *rs;
+	struct rb_node *n;
+	int rc;
+
+	spin_lock(&rgd->rd_rsspin);
+	n = rb_first(&rgd->rd_rstree);
+	while (n) {
+		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+		rc = rs_cmp(block, 1, rs);
+		if (rc < 0)
+			n = n->rb_left;
+		else if (rc > 0)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	if (n) {
+		while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) {
+			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
+			n = rb_next(&rs->rs_node);
+			if (n == NULL)
+				break;
+			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+		}
+	}
+
+	spin_unlock(&rgd->rd_rsspin);
+	return block;
+}
+
+/**
+ * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
+ * @rbm: The rbm with rgd already set correctly
+ * @block: The block number (filesystem relative)
+ *
+ * This sets the bi and offset members of an rbm based on a
+ * resource group and a filesystem relative block number. The
+ * resource group must be set in the rbm on entry, the bi and
+ * offset members will be set by this function.
+ *
+ * Returns: 0 on success, or an error code
+ */
+
+static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
+{
+	u64 rblock = block - rbm->rgd->rd_data0;
+	u32 goal = (u32)rblock;
+	int x;
+
+	if (WARN_ON_ONCE(rblock > UINT_MAX))
+		return -EINVAL;
+
+	for (x = 0; x < rbm->rgd->rd_length; x++) {
+		rbm->bi = rbm->rgd->rd_bits + x;
+		if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
+			rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
+			return 0;
+		}
+	}
+
+	return -E2BIG;
+}
+
+/**
+ * gfs2_reservation_check_and_update - Check for reservations during block alloc
+ * @rbm: The current position in the resource group
+ *
+ * This checks the current position in the rgrp to see whether there is
+ * a reservation covering this block. If not then this function is a
+ * no-op. If there is, then the position is moved to the end of the
+ * contiguous reservation(s) so that we are pointing at the first
+ * non-reserved block.
+ *
+ * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
+ */
+
+static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
+					     const struct gfs2_inode *ip)
+{
+	u64 block = gfs2_rbm_to_block(rbm);
+	u64 nblock;
+	int ret;
+
+	nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip);
+	if (nblock == block)
+		return 0;
+	ret = gfs2_rbm_from_block(rbm, nblock);
+	if (ret < 0)
+		return ret;
+	return 1;
+}
+
+/**
+ * gfs2_rbm_find - Look for blocks of a particular state
+ * @rbm: Value/result starting position and final position
+ * @state: The state which we want to find
+ * @ip: If set, check for reservations
+ * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
+ *          around until we've reached the starting point.
+ *
+ * Side effects:
+ * - If looking for free blocks, we set GBF_FULL on each bitmap which
+ *   has no free blocks in it.
+ *
+ * Returns: 0 on success, -ENOSPC if there is no block of the requested state
+ */
+
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state,
+			 const struct gfs2_inode *ip, bool nowrap)
+{
+	struct buffer_head *bh;
+	struct gfs2_bitmap *initial_bi;
+	u32 initial_offset;
+	u32 offset;
+	u8 *buffer;
+	int index;
+	int n = 0;
+	int iters = rbm->rgd->rd_length;
+	int ret;
+
+	/* If we are not starting at the beginning of a bitmap, then we
+	 * need to add one to the bitmap count to ensure that we search
+	 * the starting bitmap twice.
+	 */
+	if (rbm->offset != 0)
+		iters++;
+
+	while(1) {
+		if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
+		    (state == GFS2_BLKST_FREE))
+			goto next_bitmap;
+
+		bh = rbm->bi->bi_bh;
+		buffer = bh->b_data + rbm->bi->bi_offset;
+		WARN_ON(!buffer_uptodate(bh));
+		if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
+			buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
+find_next:
+		initial_offset = rbm->offset;
+		offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
+		if (offset == BFITNOENT)
+			goto bitmap_full;
+		rbm->offset = offset;
+		if (ip == NULL)
+			return 0;
+
+		initial_bi = rbm->bi;
+		ret = gfs2_reservation_check_and_update(rbm, ip);
+		if (ret == 0)
+			return 0;
+		if (ret > 0) {
+			n += (rbm->bi - initial_bi);
+			goto find_next;
+		}
+		return ret;
+
+bitmap_full:	/* Mark bitmap as full and fall through */
+		if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
+			set_bit(GBF_FULL, &rbm->bi->bi_flags);
+
+next_bitmap:	/* Find next bitmap in the rgrp */
+		rbm->offset = 0;
+		index = rbm->bi - rbm->rgd->rd_bits;
+		index++;
+		if (index == rbm->rgd->rd_length)
+			index = 0;
+		rbm->bi = &rbm->rgd->rd_bits[index];
+		if ((index == 0) && nowrap)
+			break;
+		n++;
+		if (n >= iters)
+			break;
+	}
+
+	return -ENOSPC;
+}
+
 /**
  * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
  * @rgd: The rgrp
@@ -1475,34 +1631,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 
 static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
 {
-	u32 goal = 0, block;
-	u64 no_addr;
+	u64 block;
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	struct gfs2_glock *gl;
 	struct gfs2_inode *ip;
 	int error;
 	int found = 0;
-	struct gfs2_bitmap *bi;
+	struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
 
-	while (goal < rgd->rd_data) {
+	while (1) {
 		down_write(&sdp->sd_log_flush_lock);
-		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi);
+		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true);
 		up_write(&sdp->sd_log_flush_lock);
-		if (block == BFITNOENT)
+		if (error == -ENOSPC)
+			break;
+		if (WARN_ON_ONCE(error))
 			break;
 
-		block = gfs2_bi2rgd_blk(bi, block);
-		/* rgblk_search can return a block < goal, so we need to
-		   keep it marching forward. */
-		no_addr = block + rgd->rd_data0;
-		goal = max(block + 1, goal + 1);
-		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
+		block = gfs2_rbm_to_block(&rbm);
+		if (gfs2_rbm_from_block(&rbm, block + 1))
+			break;
+		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
 			continue;
-		if (no_addr == skip)
+		if (block == skip)
 			continue;
-		*last_unlinked = no_addr;
+		*last_unlinked = block;
 
-		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
 		if (error)
 			continue;
 
@@ -1692,105 +1847,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 	return type;
 }
 
-/**
- * rgblk_search - find a block in @state
- * @rgd: the resource group descriptor
- * @goal: the goal block within the RG (start here to search for avail block)
- * @state: GFS2_BLKST_XXX the before-allocation state to find
- * @rbi: address of the pointer to the bitmap containing the block found
- *
- * Walk rgrp's bitmap to find bits that represent a block in @state.
- *
- * This function never fails, because we wouldn't call it unless we
- * know (from reservation results, etc.) that a block is available.
- *
- * Scope of @goal is just within rgrp, not the whole filesystem.
- * Scope of @returned block is just within bitmap, not the whole filesystem.
- *
- * Returns: the block number found relative to the bitmap rbi
- */
-
-static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
-			struct gfs2_bitmap **rbi)
-{
-	struct gfs2_bitmap *bi = NULL;
-	const u32 length = rgd->rd_length;
-	u32 biblk = BFITNOENT;
-	unsigned int buf, x;
-	const u8 *buffer = NULL;
-
-	*rbi = NULL;
-	/* Find bitmap block that contains bits for goal block */
-	for (buf = 0; buf < length; buf++) {
-		bi = rgd->rd_bits + buf;
-		/* Convert scope of "goal" from rgrp-wide to within found bit block */
-		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
-			goal -= bi->bi_start * GFS2_NBBY;
-			goto do_search;
-		}
-	}
-	buf = 0;
-	goal = 0;
-
-do_search:
-	/* Search (up to entire) bitmap in this rgrp for allocatable block.
-	   "x <= length", instead of "x < length", because we typically start
-	   the search in the middle of a bit block, but if we can't find an
-	   allocatable block anywhere else, we want to be able wrap around and
-	   search in the first part of our first-searched bit block.  */
-	for (x = 0; x <= length; x++) {
-		bi = rgd->rd_bits + buf;
-
-		if (test_bit(GBF_FULL, &bi->bi_flags) &&
-		    (state == GFS2_BLKST_FREE))
-			goto skip;
-
-		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
-		   bitmaps, so we must search the originals for that. */
-		buffer = bi->bi_bh->b_data + bi->bi_offset;
-		WARN_ON(!buffer_uptodate(bi->bi_bh));
-		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
-			buffer = bi->bi_clone + bi->bi_offset;
-
-		while (1) {
-			struct gfs2_blkreserv *rs;
-			u32 rgblk;
-
-			biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
-			if (biblk == BFITNOENT)
-				break;
-			/* Check if this block is reserved() */
-			rgblk = gfs2_bi2rgd_blk(bi, biblk);
-			rs = rs_find(rgd, rgblk);
-			if (rs == NULL)
-				break;
-
-			BUG_ON(rs->rs_rbm.bi != bi);
-			biblk = BFITNOENT;
-			/* This should jump to the first block after the
-			   reservation. */
-			goal = rs->rs_rbm.offset + rs->rs_free;
-			if (goal >= bi->bi_len * GFS2_NBBY)
-				break;
-		}
-		if (biblk != BFITNOENT)
-			break;
-
-		if ((goal == 0) && (state == GFS2_BLKST_FREE))
-			set_bit(GBF_FULL, &bi->bi_flags);
-
-		/* Try next bitmap block (wrap back to rgrp header if at end) */
-skip:
-		buf++;
-		buf %= length;
-		goal = 0;
-	}
-
-	if (biblk != BFITNOENT)
-		*rbi = bi;
-
-	return biblk;
-}
 
 /**
  * gfs2_alloc_extent - allocate an extent from a given bitmap
@@ -1809,9 +1865,8 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 	struct gfs2_bitmap *bi = rbm->bi;
 	u32 blk = rbm->offset;
 	const unsigned int elen = *n;
-	u32 goal, rgblk;
+	u32 goal;
 	const u8 *buffer = NULL;
-	struct gfs2_blkreserv *rs;
 
 	*n = 0;
 	buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1824,10 +1879,6 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 		goal++;
 		if (goal >= (bi->bi_len * GFS2_NBBY))
 			break;
-		rgblk = gfs2_bi2rgd_blk(bi, goal);
-		rs = rs_find(rgd, rgblk);
-		if (rs) /* Oops, we bumped into someone's reservation */
-			break;
 		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
 		    GFS2_BLKST_FREE)
 			break;
@@ -1933,46 +1984,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 }
 
 /**
- * claim_reserved_blks - Claim previously reserved blocks
- * @ip: the inode that's claiming the reservation
- * @dinode: 1 if this block is a dinode block, otherwise data block
- * @nblocks: desired extent length
+ * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
+ * @ip: The inode we have just allocated blocks for
+ * @rbm: The start of the allocated blocks
+ * @len: The extent length
  *
- * Lay claim to previously reserved blocks.
- * Returns: Starting block number of the blocks claimed.
- * Sets *nblocks to the actual extent length allocated.
+ * Adjusts a reservation after an allocation has taken place. If the
+ * reservation does not match the allocation, or if it is now empty
+ * then it is removed.
  */
-static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
-			       unsigned int *nblocks)
+
+static void gfs2_adjust_reservation(struct gfs2_inode *ip,
+				    const struct gfs2_rbm *rbm, unsigned len)
 {
 	struct gfs2_blkreserv *rs = ip->i_res;
-	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
-	struct gfs2_bitmap *bi;
-	u64 start_block = gfs2_rbm_to_block(&rs->rs_rbm);
-	const unsigned int elen = *nblocks;
-
-	bi = rs->rs_rbm.bi;
-	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+	struct gfs2_rgrpd *rgd = rbm->rgd;
+	unsigned rlen;
+	u64 block;
+	int ret;
 
-	for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
-		if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-				 bi->bi_len, rs->rs_rbm.offset) != GFS2_BLKST_FREE)
-			break;
-		gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_rbm.offset,
-			    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
-		rs->rs_rbm.offset++;
-		rs->rs_free--;
-
-		BUG_ON(!rgd->rd_reserved);
-		rgd->rd_reserved--;
-		dinode = false;
+	spin_lock(&rgd->rd_rsspin);
+	if (gfs2_rs_active(rs)) {
+		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
+			block = gfs2_rbm_to_block(rbm);
+			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
+			rlen = min(rs->rs_free, len);
+			rs->rs_free -= rlen;
+			rgd->rd_reserved -= rlen;
+			trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+			if (rs->rs_free && !ret)
+				goto out;
+		}
+		__rs_deltree(ip, rs);
 	}
-
-	trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
-	if (!rs->rs_free || *nblocks != elen)
-		gfs2_rs_deltree(ip, rs);
-
-	return start_block;
+out:
+	spin_unlock(&rgd->rd_rsspin);
 }
 
 /**
@@ -1993,36 +2039,30 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	struct buffer_head *dibh;
 	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
 	unsigned int ndata;
-	u32 goal; /* block, within the rgrp scope */
+	u64 goal;
 	u64 block; /* block, within the file system scope */
 	int error;
 
-	/* If we have a reservation, claim blocks from it. */
-	if (gfs2_rs_active(ip->i_res)) {
-		BUG_ON(!ip->i_res->rs_free);
-		rbm.rgd = ip->i_res->rs_rbm.rgd;
-		block = claim_reserved_blks(ip, dinode, nblocks);
-		if (*nblocks)
-			goto found_blocks;
-	}
-
-	if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
-		goal = ip->i_goal - rbm.rgd->rd_data0;
+	if (gfs2_rs_active(ip->i_res))
+		goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
+	else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
+		goal = ip->i_goal;
 	else
-		goal = rbm.rgd->rd_last_alloc;
+		goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
 
-	rbm.offset = rgblk_search(rbm.rgd, goal, GFS2_BLKST_FREE, &rbm.bi);
+	gfs2_rbm_from_block(&rbm, goal);
+	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
-	if (rbm.offset == BFITNOENT) {
-		printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", *nblocks);
-		printk(KERN_WARNING "FULL=%d\n",
-		       test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
+	if (error) {
+		fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks,
+			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
 		goto rgrp_error;
 	}
 
 	block = gfs2_alloc_extent(&rbm, dinode, nblocks);
-found_blocks:
+	if (gfs2_rs_active(ip->i_res))
+		gfs2_adjust_reservation(ip, &rbm, *nblocks);
 	ndata = *nblocks;
 	if (dinode)
 		ndata--;
-- 
cgit 


From 3983903a712e74548fa08ef25d68e55b8e4349c6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 3 Aug 2012 11:10:30 +0100
Subject: GFS2: Update gfs2_get_block_type() to use rbm

Use the new gfs2_rbm_from_block() function to replace an open
coded version of the same code.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bd3b926949d0..0c1be38f8370 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1824,27 +1824,14 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 
 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 {
-	struct gfs2_bitmap *bi = NULL;
-	u32 length, rgrp_block, buf_block;
-	unsigned int buf;
-	unsigned char type;
-
-	length = rgd->rd_length;
-	rgrp_block = block - rgd->rd_data0;
-
-	for (buf = 0; buf < length; buf++) {
-		bi = rgd->rd_bits + buf;
-		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
-			break;
-	}
-
-	gfs2_assert(rgd->rd_sbd, buf < length);
-	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
+	struct gfs2_rbm rbm = { .rgd = rgd, };
+	int ret;
 
-	type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-			   bi->bi_len, buf_block);
+	ret = gfs2_rbm_from_block(&rbm, block);
+	WARN_ON_ONCE(ret != 0);
 
-	return type;
+	return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
+			    rbm.bi->bi_len, rbm.offset);
 }
 
 
-- 
cgit 


From 3b1d0b9d0b6f4b76293c8f30cc95aa946bd34150 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 3 Aug 2012 11:23:28 +0100
Subject: GFS2: Update rgblk_free() to use rbm

Replace open coded version with a call to gfs2_rbm_from_block()

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 44 ++++++++++++++------------------------------
 1 file changed, 14 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0c1be38f8370..06476b34a1b9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1890,46 +1890,30 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 				     u32 blen, unsigned char new_state)
 {
-	struct gfs2_rgrpd *rgd;
-	struct gfs2_bitmap *bi = NULL;
-	u32 length, rgrp_blk, buf_blk;
-	unsigned int buf;
+	struct gfs2_rbm rbm;
 
-	rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
-	if (!rgd) {
+	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
+	if (!rbm.rgd) {
 		if (gfs2_consist(sdp))
 			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
 		return NULL;
 	}
 
-	length = rgd->rd_length;
-
-	rgrp_blk = bstart - rgd->rd_data0;
-
 	while (blen--) {
-		for (buf = 0; buf < length; buf++) {
-			bi = rgd->rd_bits + buf;
-			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
-				break;
+		gfs2_rbm_from_block(&rbm, bstart);
+		bstart++;
+		if (!rbm.bi->bi_clone) {
+			rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
+						   GFP_NOFS | __GFP_NOFAIL);
+			memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
+			       rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
+			       rbm.bi->bi_len);
 		}
-
-		gfs2_assert(rgd->rd_sbd, buf < length);
-
-		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
-		rgrp_blk++;
-
-		if (!bi->bi_clone) {
-			bi->bi_clone = kmalloc(bi->bi_bh->b_size,
-					       GFP_NOFS | __GFP_NOFAIL);
-			memcpy(bi->bi_clone + bi->bi_offset,
-			       bi->bi_bh->b_data + bi->bi_offset,
-			       bi->bi_len);
-		}
-		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-		gfs2_setbit(rgd, NULL, bi, buf_blk, new_state);
+		gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
+		gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state);
 	}
 
-	return rgd;
+	return rbm.rgd;
 }
 
 /**
-- 
cgit 


From 24d634e8f3b43fe2eb7c7d66567de7aba8edc308 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Sun, 5 Aug 2012 22:04:08 -0700
Subject: GFS2: Use RB_CLEAR_NODE() rather than rb_init_node()

gfs2 calls RB_EMPTY_NODE() to check if nodes are not on an rbtree.
The corresponding initialization function is RB_CLEAR_NODE().
rb_init_node() was never clearly defined and is going away.

Signed-off-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 06476b34a1b9..7ce22d8c489b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -453,7 +453,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 	if (!res)
 		error = -ENOMEM;
 
-	rb_init_node(&res->rs_node);
+	RB_CLEAR_NODE(&res->rs_node);
 
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res)
@@ -486,7 +486,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
 	rgd = rs->rs_rbm.rgd;
 	trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL);
 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
-	rb_init_node(&rs->rs_node);
+	RB_CLEAR_NODE(&rs->rs_node);
 	BUG_ON(!rgd->rd_rs_cnt);
 	rgd->rd_rs_cnt--;
 
-- 
cgit 


From 5d50d5324612d28c47b9361e5424f13a19c888cd Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 7 Aug 2012 13:47:12 +0100
Subject: GFS2: Fix case where reservation finished at end of rgrp

One corner case which the original patch failed to take into
account was when there is a reservation which ended such that
the following block was one beyond the end of the rgrp in
question. This extra test fixes that case.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Bob Peterson <rpeterso@redhat.com>
Tested-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/rgrp.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7ce22d8c489b..c17029a92b8f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1596,6 +1596,12 @@ find_next:
 			n += (rbm->bi - initial_bi);
 			goto find_next;
 		}
+		if (ret == -E2BIG) {
+			index = 0;
+			rbm->offset = 0;
+			n += (rbm->bi - initial_bi);
+			goto res_covered_end_of_rgrp;
+		}
 		return ret;
 
 bitmap_full:	/* Mark bitmap as full and fall through */
@@ -1608,6 +1614,7 @@ next_bitmap:	/* Find next bitmap in the rgrp */
 		index++;
 		if (index == rbm->rgd->rd_length)
 			index = 0;
+res_covered_end_of_rgrp:
 		rbm->bi = &rbm->rgd->rd_bits[index];
 		if ((index == 0) && nowrap)
 			break;
-- 
cgit 


From 8d8b752a0f55282498b918f6a28f87ee2bcf19c3 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 7 Aug 2012 13:28:17 -0400
Subject: GFS2: rbm code cleanup

This patch fixes a few small rbm related things. First, it fixes
a corner case where the rbm needs to switch bitmaps and wasn't
adjusting its buffer pointer. Second, there's a white space issue
fixed. Third, the logic in function gfs2_rbm_from_block was optimized
a bit. Lastly, a check for goal block overflows was added to function
gfs2_alloc_blocks.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c17029a92b8f..c26711882a6b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -467,7 +467,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
 {
 	gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
-		       rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), 
+		       rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm),
 		       rs->rs_rbm.offset, rs->rs_free);
 }
 
@@ -1493,16 +1493,18 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 
 	if (WARN_ON_ONCE(rblock > UINT_MAX))
 		return -EINVAL;
+	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
+		return -E2BIG;
 
 	for (x = 0; x < rbm->rgd->rd_length; x++) {
 		rbm->bi = rbm->rgd->rd_bits + x;
 		if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
 			rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
-			return 0;
+			break;
 		}
 	}
 
-	return -E2BIG;
+	return 0;
 }
 
 /**
@@ -1579,7 +1581,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state,
 		WARN_ON(!buffer_uptodate(bh));
 		if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
 			buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
-find_next:
 		initial_offset = rbm->offset;
 		offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
 		if (offset == BFITNOENT)
@@ -1594,7 +1595,7 @@ find_next:
 			return 0;
 		if (ret > 0) {
 			n += (rbm->bi - initial_bi);
-			goto find_next;
+			goto next_iter;
 		}
 		if (ret == -E2BIG) {
 			index = 0;
@@ -1619,6 +1620,7 @@ res_covered_end_of_rgrp:
 		if ((index == 0) && nowrap)
 			break;
 		n++;
+next_iter:
 		if (n >= iters)
 			break;
 	}
@@ -2028,6 +2030,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	else
 		goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
 
+	if ((goal < rbm.rgd->rd_data0) ||
+	    (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data))
+		rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1);
+
 	gfs2_rbm_from_block(&rbm, goal);
 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
 
-- 
cgit 


From 8e711e100fd63b9cbf095030d45fd5ee2fa4c995 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 9 Aug 2012 12:48:42 -0500
Subject: GFS2: change function gfs2_direct_IO to use a normal gfs2_glock_dq

This patch changes function gfs2_direct_IO so that it uses a normal
call to gfs2_glock_dq rather than a call to a multiple-dq of one item.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/aops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 00eaa83871b7..01c4975da4bc 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1024,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 				  offset, nr_segs, gfs2_get_block_direct,
 				  NULL, NULL, 0);
 out:
-	gfs2_glock_dq_m(1, &gh);
+	gfs2_glock_dq(&gh);
 	gfs2_holder_uninit(&gh);
 	return rv;
 }
-- 
cgit 


From 4abb6ad9eae0aebfcec4f188a408447cc4ea1cb4 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 9 Aug 2012 12:48:43 -0500
Subject: GFS2: inline __gfs2_glock_schedule_for_reclaim

Since function gfs2_glock_schedule_for_reclaim is only two
significant lines, we can eliminate it, simplifying the code
and making it more readable.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1ed81f40da0d..67f3e42d4bd2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -185,20 +185,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
 	spin_unlock(&lru_lock);
 }
 
-/**
- * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
- * @gl: the glock
- *
- * If the glock is demotable, then we add it (or move it) to the end
- * of the glock LRU list.
- */
-
-static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
-	if (demote_ok(gl))
-		gfs2_glock_add_to_lru(gl);
-}
-
 /**
  * gfs2_glock_put_nolock() - Decrement reference count on glock
  * @gl: The glock to put
@@ -1121,8 +1107,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
 			fast_path = 1;
 	}
-	if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
-		__gfs2_glock_schedule_for_reclaim(gl);
+	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
+		gfs2_glock_add_to_lru(gl);
+
 	trace_gfs2_glock_queue(gh, 0);
 	spin_unlock(&gl->gl_spin);
 	if (likely(fast_path))
-- 
cgit 


From 07a790494260e102eb42840537af82e84d2ab766 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 9 Aug 2012 12:48:44 -0500
Subject: GFS2: Combine functions gfs2_glock_wait and wait_on_holder

Function gfs2_glock_wait only called function wait_on_holder and
returned its return code, so they were combined for readability.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 67f3e42d4bd2..5c8790960735 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -869,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word)
 	return 0;
 }
 
-static void wait_on_holder(struct gfs2_holder *gh)
+/**
+ * gfs2_glock_wait - wait on a glock acquisition
+ * @gh: the glock holder
+ *
+ * Returns: 0 on success
+ */
+
+int gfs2_glock_wait(struct gfs2_holder *gh)
 {
 	unsigned long time1 = jiffies;
 
@@ -880,6 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh)
 		gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
 					      GL_GLOCK_HOLD_INCR,
 					      GL_GLOCK_MAX_HOLD);
+	return gh->gh_error;
 }
 
 static void wait_on_demote(struct gfs2_glock *gl)
@@ -915,19 +923,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
 	trace_gfs2_demote_rq(gl);
 }
 
-/**
- * gfs2_glock_wait - wait on a glock acquisition
- * @gh: the glock holder
- *
- * Returns: 0 on success
- */
-
-int gfs2_glock_wait(struct gfs2_holder *gh)
-{
-	wait_on_holder(gh);
-	return gh->gh_error;
-}
-
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 {
 	struct va_format vaf;
-- 
cgit 


From 81e1d45061d09f296d7664af70d3334840d123a0 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 9 Aug 2012 12:48:45 -0500
Subject: GFS2: Combine functions gfs2_glock_dq_wait and wait_on_demote

Function gfs2_glock_dq_wait called two-line function wait_on_demote,
so they were combined.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 5c8790960735..fca6a873dcf3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -890,12 +890,6 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
 	return gh->gh_error;
 }
 
-static void wait_on_demote(struct gfs2_glock *gl)
-{
-	might_sleep();
-	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
-}
-
 /**
  * handle_callback - process a demote request
  * @gl: the glock
@@ -1123,7 +1117,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
 	gfs2_glock_dq(gh);
-	wait_on_demote(gl);
+	might_sleep();
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
 }
 
 /**
-- 
cgit 


From e5dc76b9afcfb936818261d65f6f9e1f2c346d59 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 9 Aug 2012 12:48:46 -0500
Subject: GFS2: Eliminate redundant calls to may_grant

Function add_to_queue was checking may_grant for the passed-in
holder for every iteration of its gh2 loop. Now it only checks it
once at the beginning to see if a try lock is futile.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index fca6a873dcf3..e6c2fd53cab2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -954,7 +954,7 @@ __acquires(&gl->gl_spin)
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct list_head *insert_pt = NULL;
 	struct gfs2_holder *gh2;
-	int try_lock = 0;
+	int try_futile = 0;
 
 	BUG_ON(gh->gh_owner_pid == NULL);
 	if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
@@ -962,7 +962,7 @@ __acquires(&gl->gl_spin)
 
 	if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
 		if (test_bit(GLF_LOCK, &gl->gl_flags))
-			try_lock = 1;
+			try_futile = !may_grant(gl, gh);
 		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
 			goto fail;
 	}
@@ -971,9 +971,8 @@ __acquires(&gl->gl_spin)
 		if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
 		    (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
 			goto trap_recursive;
-		if (try_lock &&
-		    !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
-		    !may_grant(gl, gh)) {
+		if (try_futile &&
+		    !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
 fail:
 			gh->gh_error = GLR_TRYFAILED;
 			gfs2_holder_wake(gh);
-- 
cgit 


From 29c05b205d4d30248982d5167bb142146db89bd3 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 9 Aug 2012 12:48:47 -0500
Subject: GFS2: Eliminate unnecessary check for state > 3 in bitfit

Function gfs2_bitfit was checking for state > 3, but that's
impossible since it is only called from rgblk_search, which receives
only GFS2_BLKST_ constants.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c26711882a6b..47d2346575ad 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -228,8 +228,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 	u64 mask = 0x5555555555555555ULL;
 	u32 bit;
 
-	BUG_ON(state > 3);
-
 	/* Mask off bits we don't care about at the start of the search */
 	mask <<= spoint;
 	tmp = gfs2_bit_search(ptr, mask, state);
-- 
cgit 


From c04a2ef3a8b51cf58d8ff18bb4d6af17252f0fb6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 13 Aug 2012 11:14:57 +0100
Subject: GFS2: Use rbm for gfs2_testbit()

Change the arguments to gfs2_testbit() so that it now just takes an
rbm specifying the position of the two bit entry to return.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 72 +++++++++++++++++++++++-----------------------------------
 1 file changed, 28 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 47d2346575ad..3a288cec5af0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -117,30 +117,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
 
 /**
  * gfs2_testbit - test a bit in the bitmaps
- * @rgd: the resource group descriptor
- * @buffer: the buffer that holds the bitmaps
- * @buflen: the length (in bytes) of the buffer
- * @block: the block to read
+ * @rbm: The bit to test
  *
+ * Returns: The two bit block state of the requested bit
  */
 
-static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
-					 const unsigned char *buffer,
-					 unsigned int buflen, u32 block)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 {
-	const unsigned char *byte, *end;
-	unsigned char cur_state;
+	const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
+	const u8 *byte;
 	unsigned int bit;
 
-	byte = buffer + (block / GFS2_NBBY);
-	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
-	end = buffer + buflen;
-
-	gfs2_assert(rgd->rd_sbd, byte < end);
-
-	cur_state = (*byte >> bit) & GFS2_BIT_MASK;
+	byte = buffer + (rbm->offset / GFS2_NBBY);
+	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 
-	return cur_state;
+	return (*byte >> bit) & GFS2_BIT_MASK;
 }
 
 /**
@@ -1837,8 +1828,7 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 	ret = gfs2_rbm_from_block(&rbm, block);
 	WARN_ON_ONCE(ret != 0);
 
-	return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
-			    rbm.bi->bi_len, rbm.offset);
+	return gfs2_testbit(&rbm);
 }
 
 
@@ -1846,42 +1836,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
  * gfs2_alloc_extent - allocate an extent from a given bitmap
  * @rbm: the resource group information
  * @dinode: TRUE if the first block we allocate is for a dinode
- * @n: The extent length
+ * @n: The extent length (value/result)
  *
- * Add the found bitmap buffer to the transaction.
+ * Add the bitmap buffer to the transaction.
  * Set the found bits to @new_state to change block's allocation state.
- * Returns: starting block number of the extent (fs scope)
  */
-static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
+static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 			     unsigned int *n)
 {
-	struct gfs2_rgrpd *rgd = rbm->rgd;
-	struct gfs2_bitmap *bi = rbm->bi;
-	u32 blk = rbm->offset;
+	struct gfs2_rbm pos = { .rgd = rbm->rgd, };
 	const unsigned int elen = *n;
-	u32 goal;
-	const u8 *buffer = NULL;
+	u64 block;
+	int ret;
 
-	*n = 0;
-	buffer = bi->bi_bh->b_data + bi->bi_offset;
-	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-	gfs2_setbit(rgd, bi->bi_clone, bi, blk,
+	*n = 1;
+	block = gfs2_rbm_to_block(rbm);
+	gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
+	gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset,
 		    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
-	(*n)++;
-	goal = blk;
+	block++;
 	while (*n < elen) {
-		goal++;
-		if (goal >= (bi->bi_len * GFS2_NBBY))
-			break;
-		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
-		    GFS2_BLKST_FREE)
+		ret = gfs2_rbm_from_block(&pos, block);
+		WARN_ON(ret);
+		if (gfs2_testbit(&pos) != GFS2_BLKST_FREE)
 			break;
-		gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED);
+		gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
+		gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED);
 		(*n)++;
+		block++;
 	}
-	blk = gfs2_bi2rgd_blk(bi, blk);
-	rgd->rd_last_alloc = blk + *n - 1;
-	return rgd->rd_data0 + blk;
 }
 
 /**
@@ -2042,7 +2025,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 		goto rgrp_error;
 	}
 
-	block = gfs2_alloc_extent(&rbm, dinode, nblocks);
+	gfs2_alloc_extent(&rbm, dinode, nblocks);
+	block = gfs2_rbm_to_block(&rbm);
 	if (gfs2_rs_active(ip->i_res))
 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
 	ndata = *nblocks;
-- 
cgit 


From 3e6339dd2850353781513dec42a8d56690e9e1db Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 13 Aug 2012 11:37:51 +0100
Subject: GFS2: Use rbm for gfs2_setbit()

Use the rbm structure for gfs2_setbit() in order to simplify the
arguments to the function. We have to add a bool to control whether
the clone bitmap should be updated (if it exists) but otherwise it
is a more or less direct substitution.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3a288cec5af0..55a2651666c9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -69,47 +69,42 @@ static const char valid_change[16] = {
 
 /**
  * gfs2_setbit - Set a bit in the bitmaps
- * @rgd: the resource group descriptor
- * @buf2: the clone buffer that holds the bitmaps
- * @bi: the bitmap structure
- * @block: the block to set
+ * @rbm: The position of the bit to set
+ * @do_clone: Also set the clone bitmap, if it exists
  * @new_state: the new state of the block
  *
  */
 
-static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
-			       struct gfs2_bitmap *bi, u32 block,
+static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
 			       unsigned char new_state)
 {
 	unsigned char *byte1, *byte2, *end, cur_state;
-	unsigned int buflen = bi->bi_len;
-	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
+	unsigned int buflen = rbm->bi->bi_len;
+	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 
-	byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY);
-	end = bi->bi_bh->b_data + bi->bi_offset + buflen;
+	byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
+	end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
 
 	BUG_ON(byte1 >= end);
 
 	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 
 	if (unlikely(!valid_change[new_state * 4 + cur_state])) {
-		printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
-		       "new_state=%d\n",
-		       (unsigned long long)block, cur_state, new_state);
-		printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
-		       (unsigned long long)rgd->rd_addr,
-		       (unsigned long)bi->bi_start);
-		printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
-		       (unsigned long)bi->bi_offset,
-		       (unsigned long)bi->bi_len);
+		printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
+		       "new_state=%d\n", rbm->offset, cur_state, new_state);
+		printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
+		       (unsigned long long)rbm->rgd->rd_addr,
+		       rbm->bi->bi_start);
+		printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
+		       rbm->bi->bi_offset, rbm->bi->bi_len);
 		dump_stack();
-		gfs2_consist_rgrpd(rgd);
+		gfs2_consist_rgrpd(rbm->rgd);
 		return;
 	}
 	*byte1 ^= (cur_state ^ new_state) << bit;
 
-	if (buf2) {
-		byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY);
+	if (do_clone && rbm->bi->bi_clone) {
+		byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
 		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
 		*byte2 ^= (cur_state ^ new_state) << bit;
 	}
@@ -1852,8 +1847,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 	*n = 1;
 	block = gfs2_rbm_to_block(rbm);
 	gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
-	gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset,
-		    dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	block++;
 	while (*n < elen) {
 		ret = gfs2_rbm_from_block(&pos, block);
@@ -1861,7 +1855,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 		if (gfs2_testbit(&pos) != GFS2_BLKST_FREE)
 			break;
 		gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
-		gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, GFS2_BLKST_USED);
+		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
 		(*n)++;
 		block++;
 	}
@@ -1900,7 +1894,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 			       rbm.bi->bi_len);
 		}
 		gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
-		gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state);
+		gfs2_setbit(&rbm, false, new_state);
 	}
 
 	return rbm.rgd;
-- 
cgit 


From 2b9731e8bb1bdc6b9da149f4e482a071747207f3 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 20 Aug 2012 17:07:49 +0100
Subject: GFS2: Fix ->show_options() for statfs slow

The ->show_options() function for GFS2 was not correctly displaying
the value when statfs slow in in use.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-by: Milos Jakubicek <xjakub@fi.muni.cz>
---
 fs/gfs2/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3cbac6803038..79cac7057691 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
 	val = sdp->sd_tune.gt_statfs_quantum;
 	if (val != 30)
 		seq_printf(s, ",statfs_quantum=%d", val);
+	else if (sdp->sd_tune.gt_statfs_slow)
+		seq_puts(s, ",statfs_quantum=0");
 	val = sdp->sd_tune.gt_quota_quantum;
 	if (val != 60)
 		seq_printf(s, ",quota_quantum=%d", val);
-- 
cgit 


From 137834a696fd51ef8c710a0ad854b585027c7df0 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Aug 2012 13:43:40 +0100
Subject: GFS2: Fall back to ignoring reservations, if there are no other
 blocks left

When we get to the stage of allocating blocks, we know that the
resource group in question must contain enough free blocks, otherwise
gfs2_inplace_reserve() would have failed. So if we are left with only
free blocks which are reserved, then we must use those. This can happen
if another node has sneeked in and use some blocks reserved on this
node, for example. Generally this will happen very rarely and only
when the resouce group is nearly full.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 55a2651666c9..30c864e70298 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2012,6 +2012,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	gfs2_rbm_from_block(&rbm, goal);
 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
 
+	if (error == -ENOSPC) {
+		gfs2_rbm_from_block(&rbm, goal);
+		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false);
+	}
+
 	/* Since all blocks are reserved in advance, this shouldn't happen */
 	if (error) {
 		fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks,
-- 
cgit 


From 9e733d3923fb0e4eeae7b827019332d246576a22 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Aug 2012 15:37:59 +0100
Subject: GFS2: Improve block reservation tracing

This patch improves the tracing of block reservations by
removing some corner cases and also providing more useful
detail in the traces.

A new field is added to the reservation structure to contain
the inode number. This is used since in certain contexts it is
not possible to access the inode itself to obtain this information.
As a result we can then display the inode number for all tracepoints
and also in case we dump the resource group.

The "del" tracepoint operation has been removed. This could be called
with the reservation rgrp set to NULL. That resulted in not printing
the device number, and thus making the information largely useless
anyway. Also, the conditional on the rgrp being NULL can then be
removed from the tracepoint. After this change, all the block
reservation tracepoint calls will be called with the rgrp information.

The existing ins,clm and tdel calls to the block reservation tracepoint
are sufficient to track the entire life of the block reservation.

In gfs2_block_alloc() the error detection is updated to print out
the inode number of the problematic inode. This can then be compared
against the information in the glock dump,tracepoints, etc.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h     |  6 ++----
 fs/gfs2/rgrp.c       | 22 ++++++++++------------
 fs/gfs2/trace_gfs2.h | 18 ++++++++----------
 3 files changed, 20 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 99d7c64b5091..6aaa07c7c731 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -268,13 +268,11 @@ struct gfs2_blkreserv {
 	/* components used during write (step 1): */
 	atomic_t rs_sizehint;         /* hint of the write size */
 
-	/* components used during get_local_rgrp (step 3): */
-	struct gfs2_rbm rs_rbm;
 	struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
 	struct rb_node rs_node;       /* link to other block reservations */
-
-	/* components used during block searches and assignments (step 4): */
+	struct gfs2_rbm rs_rbm;       /* Start of reservation */
 	u32 rs_free;                  /* how many blocks are still free */
+	u64 rs_inum;                  /* Inode number for reservation */
 
 	/* ancillary quota stuff */
 	struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 30c864e70298..87ee0b70f818 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -448,10 +448,11 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 	return error;
 }
 
-static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 {
-	gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
-		       rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm),
+	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
+		       (unsigned long long)rs->rs_inum,
+		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
 		       rs->rs_rbm.offset, rs->rs_free);
 }
 
@@ -468,7 +469,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
 		return;
 
 	rgd = rs->rs_rbm.rgd;
-	trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL);
+	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
 	RB_CLEAR_NODE(&rs->rs_node);
 	BUG_ON(!rgd->rd_rs_cnt);
@@ -511,7 +512,6 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
 	down_write(&ip->i_rw_mutex);
 	if (ip->i_res) {
 		gfs2_rs_deltree(ip, ip->i_res);
-		trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
 		BUG_ON(ip->i_res->rs_free);
 		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
 		ip->i_res = NULL;
@@ -1253,6 +1253,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
 	rs->rs_free = amount;
 	rs->rs_rbm.offset = biblk;
 	rs->rs_rbm.bi = bi;
+	rs->rs_inum = ip->i_no_addr;
 	rb_link_node(&rs->rs_node, parent, newn);
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
@@ -1260,7 +1261,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
 	rgd->rd_reserved += amount; /* blocks reserved */
 	rgd->rd_rs_cnt++; /* number of in-tree reservations */
 	spin_unlock(&rgd->rd_rsspin);
-	trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
+	trace_gfs2_rs(rs, TRACE_RS_INSERT);
 	return rs;
 }
 
@@ -1966,7 +1967,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			rlen = min(rs->rs_free, len);
 			rs->rs_free -= rlen;
 			rgd->rd_reserved -= rlen;
-			trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_free && !ret)
 				goto out;
 		}
@@ -2005,10 +2006,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	else
 		goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
 
-	if ((goal < rbm.rgd->rd_data0) ||
-	    (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data))
-		rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1);
-
 	gfs2_rbm_from_block(&rbm, goal);
 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
 
@@ -2019,7 +2016,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
 	if (error) {
-		fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks,
+		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
+			(unsigned long long)ip->i_no_addr, error, *nblocks,
 			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
 		goto rgrp_error;
 	}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index b947aa4dfca4..bbdc78af60ca 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc,
 /* Keep track of multi-block reservations as they are allocated/freed */
 TRACE_EVENT(gfs2_rs,
 
-	TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
-		 u8 func),
+	TP_PROTO(const struct gfs2_blkreserv *rs, u8 func),
 
-	TP_ARGS(ip, rs, func),
+	TP_ARGS(rs, func),
 
 	TP_STRUCT__entry(
 		__field(        dev_t,  dev                     )
@@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev : 0;
-		__entry->rd_addr	= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_addr : 0;
-		__entry->rd_free_clone	= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_free_clone : 0;
-		__entry->rd_reserved	= rs->rs_rbm.rgd ? rs->rs_rbm.rgd->rd_reserved : 0;
-		__entry->inum		= ip ? ip->i_no_addr : 0;
+		__entry->dev		= rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
+		__entry->rd_addr	= rs->rs_rbm.rgd->rd_addr;
+		__entry->rd_free_clone	= rs->rs_rbm.rgd->rd_free_clone;
+		__entry->rd_reserved	= rs->rs_rbm.rgd->rd_reserved;
+		__entry->inum		= rs->rs_inum;
 		__entry->start		= gfs2_rbm_to_block(&rs->rs_rbm);
 		__entry->free		= rs->rs_free;
 		__entry->func		= func;
 	),
 
-	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
-		  "f:%lu",
+	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
-- 
cgit 


From c743ffd09fa7d3464c6f74767a3ae2ca5dc3ebf7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Sat, 25 Aug 2012 18:21:47 +0100
Subject: GFS2: Fix unclaimed_blocks() wrapping bug and clean up

When rgd->rd_free_clone is less than rgd->rd_reserved, the
unclaimed_blocks() calculation would wrap and produce
incorrect results. This patch checks for this condition
when this function is called from gfs2_mblk_search()

In addition, the use of this particular function in other
places in the code has been dropped by means of a general
clean up of gfs2_inplace_reserve(). This function is now
much easier to follow.

Also the setting of the rgd->rd_last_alloc field is corrected.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 193 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 88 insertions(+), 105 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 87ee0b70f818..886954126704 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1231,7 +1231,7 @@ static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
 	BUG_ON(!ip->i_res);
 	BUG_ON(gfs2_rs_active(rs));
 	/* Figure out where to put new node */
-	/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+
 	while (*newn) {
 		struct gfs2_blkreserv *cur =
 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
@@ -1276,17 +1276,16 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
 /**
  * rg_mblk_search - find a group of multiple free blocks
  * @rgd: the resource group descriptor
- * @rs: the block reservation
  * @ip: pointer to the inode for which we're reserving blocks
+ * @requested: number of blocks required for this allocation
  *
  * This is very similar to rgblk_search, except we're looking for whole
  * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
  * on aligned dwords for speed's sake.
  *
- * Returns: 0 if successful or BFITNOENT if there isn't enough free space
  */
 
-static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested)
+static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested)
 {
 	struct gfs2_bitmap *bi = rgd->rd_bits;
 	const u32 length = rgd->rd_length;
@@ -1299,11 +1298,16 @@ static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigne
 	u32 best_rs_bytes, unclaimed;
 	int best_rs_blocks;
 
+	if ((rgd->rd_free_clone < rgd->rd_reserved) ||
+	    (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS)))
+		return;
+
 	/* Find bitmap block that contains bits for goal block */
 	if (rgrp_contains_block(rgd, ip->i_goal))
 		goal = ip->i_goal - rgd->rd_data0;
 	else
 		goal = rgd->rd_last_alloc;
+
 	for (buf = 0; buf < length; buf++) {
 		bi = rgd->rd_bits + buf;
 		/* Convert scope of "goal" from rgrp-wide to within
@@ -1366,10 +1370,8 @@ do_search:
 				BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
 				rs = rs_insert(bi, ip, blk,
 					       rsv_bytes * GFS2_NBBY);
-				if (IS_ERR(rs))
-					return PTR_ERR(rs);
 				if (rs)
-					return 0;
+					return;
 			}
 			ptr += ALIGN(search_bytes, sizeof(u64));
 		}
@@ -1380,35 +1382,6 @@ skip:
 		buf %= length;
 		goal = 0;
 	}
-
-	return BFITNOENT;
-}
-
-/**
- * try_rgrp_fit - See if a given reservation will fit in a given RG
- * @rgd: the RG data
- * @ip: the inode
- *
- * If there's room for the requested blocks to be allocated from the RG:
- * This will try to get a multi-block reservation first, and if that doesn't
- * fit, it will take what it can.
- *
- * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
- */
-
-static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
-			unsigned requested)
-{
-	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
-		return 0;
-	/* Look for a multi-block reservation. */
-	if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
-	    rg_mblk_search(rgd, ip, requested) != BFITNOENT)
-		return 1;
-	if (unclaimed_blocks(rgd) >= requested)
-		return 1;
-
-	return 0;
 }
 
 /**
@@ -1678,6 +1651,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 	return;
 }
 
+static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
+{
+	struct gfs2_rgrpd *rgd = *pos;
+
+	rgd = gfs2_rgrpd_get_next(rgd);
+	if (rgd == NULL)
+		rgd = gfs2_rgrpd_get_next(NULL);
+	*pos = rgd;
+	if (rgd != begin) /* If we didn't wrap */
+		return true;
+	return false;
+}
+
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
@@ -1697,10 +1683,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
-	if (gfs2_assert_warn(sdp, requested)) {
-		error = -EINVAL;
-		goto out;
-	}
+	if (gfs2_assert_warn(sdp, requested))
+		return -EINVAL;
 	if (gfs2_rs_active(rs)) {
 		begin = rs->rs_rbm.rgd;
 		flags = 0; /* Yoda: Do or do not. There is no try */
@@ -1713,84 +1697,82 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 		return -EBADSLT;
 
 	while (loops < 3) {
-		rg_locked = 0;
-
-		if (gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
-			rg_locked = 1;
-			error = 0;
-		} else if (!loops && !gfs2_rs_active(rs) &&
-			   rs->rs_rbm.rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
-			/* If the rgrp already is maxed out for contenders,
-			   we can eliminate it as a "first pass" without even
-			   requesting the rgrp glock. */
-			error = GLR_TRYFAILED;
-		} else {
+		rg_locked = 1;
+
+		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
+			rg_locked = 0;
 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
 						   LM_ST_EXCLUSIVE, flags,
 						   &rs->rs_rgd_gh);
-			if (!error && sdp->sd_args.ar_rgrplvb) {
+			if (error == GLR_TRYFAILED)
+				goto next_rgrp;
+			if (unlikely(error))
+				return error;
+			if (sdp->sd_args.ar_rgrplvb) {
 				error = update_rgrp_lvb(rs->rs_rbm.rgd);
-				if (error) {
+				if (unlikely(error)) {
 					gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 					return error;
 				}
 			}
 		}
-		switch (error) {
-		case 0:
-			if (gfs2_rs_active(rs)) {
-				if (unclaimed_blocks(rs->rs_rbm.rgd) +
-				    rs->rs_free >= requested) {
-					ip->i_rgd = rs->rs_rbm.rgd;
-					return 0;
-				}
-				/* We have a multi-block reservation, but the
-				   rgrp doesn't have enough free blocks to
-				   satisfy the request. Free the reservation
-				   and look for a suitable rgrp. */
-				gfs2_rs_deltree(ip, rs);
-			}
-			if (try_rgrp_fit(rs->rs_rbm.rgd, ip, requested)) {
-				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
-				ip->i_rgd = rs->rs_rbm.rgd;
-				return 0;
-			}
-			if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) {
-				if (sdp->sd_args.ar_rgrplvb)
-					gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
-				try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
-						ip->i_no_addr);
-			}
-			if (!rg_locked)
-				gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-			/* fall through */
-		case GLR_TRYFAILED:
-			rs->rs_rbm.rgd = gfs2_rgrpd_get_next(rs->rs_rbm.rgd);
-			rs->rs_rbm.rgd = rs->rs_rbm.rgd ? : begin; /* if NULL, wrap */
-			if (rs->rs_rbm.rgd != begin) /* If we didn't wrap */
-				break;
 
-			flags &= ~LM_FLAG_TRY;
-			loops++;
-			/* Check that fs hasn't grown if writing to rindex */
-			if (ip == GFS2_I(sdp->sd_rindex) &&
-			    !sdp->sd_rindex_uptodate) {
-				error = gfs2_ri_update(ip);
-				if (error)
-					goto out;
-			} else if (loops == 2)
-				/* Flushing the log may release space */
-				gfs2_log_flush(sdp, NULL);
-			break;
-		default:
-			goto out;
+		/* Skip unuseable resource groups */
+		if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
+			goto skip_rgrp;
+
+		if (sdp->sd_args.ar_rgrplvb)
+			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+
+		/* Get a reservation if we don't already have one */
+		if (!gfs2_rs_active(rs))
+			rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
+
+		/* Skip rgrps when we can't get a reservation on first pass */
+		if (!gfs2_rs_active(rs) && (loops < 1))
+			goto check_rgrp;
+
+		/* If rgrp has enough free space, use it */
+		if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
+			ip->i_rgd = rs->rs_rbm.rgd;
+			return 0;
 		}
+
+		/* Drop reservation, if we couldn't use reserved rgrp */
+		if (gfs2_rs_active(rs))
+			gfs2_rs_deltree(ip, rs);
+check_rgrp:
+		/* Check for unlinked inodes which can be reclaimed */
+		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
+			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
+					ip->i_no_addr);
+skip_rgrp:
+		/* Unlock rgrp if required */
+		if (!rg_locked)
+			gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+next_rgrp:
+		/* Find the next rgrp, and continue looking */
+		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
+			continue;
+
+		/* If we've scanned all the rgrps, but found no free blocks
+		 * then this checks for some less likely conditions before
+		 * trying again.
+		 */
+		flags &= ~LM_FLAG_TRY;
+		loops++;
+		/* Check that fs hasn't grown if writing to rindex */
+		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
+			error = gfs2_ri_update(ip);
+			if (error)
+				return error;
+		}
+		/* Flushing the log may release space */
+		if (loops == 2)
+			gfs2_log_flush(sdp, NULL);
 	}
-	error = -ENOSPC;
 
-out:
-	return error;
+	return -ENOSPC;
 }
 
 /**
@@ -2024,6 +2006,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_alloc_extent(&rbm, dinode, nblocks);
 	block = gfs2_rbm_to_block(&rbm);
+	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
 	if (gfs2_rs_active(ip->i_res))
 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
 	ndata = *nblocks;
-- 
cgit 


From 0688a5ecea61a36ba12d17a18ab9f8712145cfa2 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 28 Aug 2012 08:45:56 -0400
Subject: GFS2: Stop block extents at the end of bitmaps

This patch stops multiple block allocations if a nonzero
return code is received from gfs2_rbm_from_block. Without
this patch, if enough pressure is put on the file system,
you get a kernel warning quickly followed by:
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffffa04f47e8>] gfs2_alloc_blocks+0x2c8/0x880 [gfs2]
With this patch, things run normally.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 886954126704..defb8265ce52 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1834,8 +1834,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 	block++;
 	while (*n < elen) {
 		ret = gfs2_rbm_from_block(&pos, block);
-		WARN_ON(ret);
-		if (gfs2_testbit(&pos) != GFS2_BLKST_FREE)
+		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
 			break;
 		gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
 		gfs2_setbit(&pos, true, GFS2_BLKST_USED);
-- 
cgit 


From 56aa72d0fcc9c4a3af4d0111d8d7f336b63adff9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 5 Sep 2012 16:55:11 -0400
Subject: GFS2: Get rid of I_MUTEX_QUOTA usage

GFS2 uses i_mutex on its system quota inode to synchronize writes to
quota file. Since this is an internal inode to GFS2 (not part of directory
hiearchy or visible by user) we are safe to define locking rules for it. So
let's just get it its own locking class to make it clear.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_fstype.c | 8 ++++++++
 fs/gfs2/quota.c      | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e5af9dc420ef..e443966c8106 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -19,6 +19,7 @@
 #include <linux/mount.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/quotaops.h>
+#include <linux/lockdep.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -766,6 +767,7 @@ fail:
 	return error;
 }
 
+static struct lock_class_key gfs2_quota_imutex_key;
 
 static int init_inodes(struct gfs2_sbd *sdp, int undo)
 {
@@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 		fs_err(sdp, "can't get quota file inode: %d\n", error);
 		goto fail_rindex;
 	}
+	/*
+	 * i_mutex on quota files is special. Since this inode is hidden system
+	 * file, we are safe to define locking ourselves.
+	 */
+	lockdep_set_class(&sdp->sd_quota_inode->i_mutex,
+			  &gfs2_quota_imutex_key);
 
 	error = gfs2_rindex_update(sdp);
 	if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 420bc3805ccc..4021deca61ef 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -782,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 		return -ENOMEM;
 
 	sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
-	mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA);
+	mutex_lock(&ip->i_inode.i_mutex);
 	for (qx = 0; qx < num_qd; qx++) {
 		error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
 					   GL_NOCACHE, &ghs[qx]);
-- 
cgit 


From ff7f4cb461163967a9dbb8c569e2447b7520654f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 10 Sep 2012 10:03:50 +0100
Subject: GFS2: Consolidate free block searching functions

With the recently added block reservation code, an additional function
was added to search for free blocks. This had a restriction of only being
able to search for aligned extents of free blocks. As a result the
allocation patterns when reserving blocks were suboptimal when the
existing allocation of blocks for an inode was not aligned to the same
boundary.

This patch resolves that problem by adding the ability for gfs2_rbm_find
to search for extents of a particular minimum size. We can then use
gfs2_rbm_find for both looking for reservations, and also looking for
free blocks on an individual basis when we actually come to do the
allocation later on. As a result we only need a single set of code
to deal with both situations.

The function gfs2_rbm_from_block() is moved up rgrp.c so that it
occurs before all of its callers.

Many thanks are due to Bob for helping track down the final issue in
this patch. That fix to the rb_tree traversal and to not share
block reservations from a dirctory to its children is included here.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/incore.h |   1 -
 fs/gfs2/inode.c  |  11 +-
 fs/gfs2/rgrp.c   | 367 +++++++++++++++++++++++++++++--------------------------
 fs/gfs2/rgrp.h   |   6 -
 4 files changed, 195 insertions(+), 190 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6aaa07c7c731..3d469d37345e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -99,7 +99,6 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
 	spinlock_t rd_rsspin;           /* protects reservation related vars */
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
-	u32 rd_rs_cnt;                  /* count of current reservations */
 };
 
 struct gfs2_rbm {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f2709ea887da..381893ceefa4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_gunlock2;
 
-	/* The newly created inode needs a reservation so it can allocate
-	   xattrs. At the same time, we want new blocks allocated to the new
-	   dinode to be as contiguous as possible. Since we allocated the
-	   dinode block under the directory's reservation, we transfer
-	   ownership of that reservation to the new inode. The directory
-	   doesn't need a reservation unless it needs a new allocation. */
-	ip->i_res = dip->i_res;
-	dip->i_res = NULL;
+	error = gfs2_rs_alloc(ip);
+	if (error)
+		goto fail_gunlock2;
 
 	error = gfs2_acl_create(dip, inode);
 	if (error)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index defb8265ce52..b933cdcda7f4 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,9 +35,6 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
-#define RSRV_CONTENTION_FACTOR 4
-#define RGRP_RSRV_MAX_CONTENDERS 2
-
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0x55555555UL)
 #define LBITSKIP55 (0x55555555UL)
@@ -67,6 +64,10 @@ static const char valid_change[16] = {
 	        1, 0, 0, 0
 };
 
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
+                         const struct gfs2_inode *ip, bool nowrap);
+
+
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @rbm: The position of the bit to set
@@ -234,6 +235,130 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
 	return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
 }
 
+/**
+ * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
+ * @rbm: The rbm with rgd already set correctly
+ * @block: The block number (filesystem relative)
+ *
+ * This sets the bi and offset members of an rbm based on a
+ * resource group and a filesystem relative block number. The
+ * resource group must be set in the rbm on entry, the bi and
+ * offset members will be set by this function.
+ *
+ * Returns: 0 on success, or an error code
+ */
+
+static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
+{
+	u64 rblock = block - rbm->rgd->rd_data0;
+	u32 goal = (u32)rblock;
+	int x;
+
+	if (WARN_ON_ONCE(rblock > UINT_MAX))
+		return -EINVAL;
+	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
+		return -E2BIG;
+
+	for (x = 0; x < rbm->rgd->rd_length; x++) {
+		rbm->bi = rbm->rgd->rd_bits + x;
+		if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
+			rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
+ * @rbm: Position to search (value/result)
+ * @n_unaligned: Number of unaligned blocks to check
+ * @len: Decremented for each block found (terminate on zero)
+ *
+ * Returns: true if a non-free block is encountered
+ */
+
+static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
+{
+	u64 block;
+	u32 n;
+	u8 res;
+
+	for (n = 0; n < n_unaligned; n++) {
+		res = gfs2_testbit(rbm);
+		if (res != GFS2_BLKST_FREE)
+			return true;
+		(*len)--;
+		if (*len == 0)
+			return true;
+		block = gfs2_rbm_to_block(rbm);
+		if (gfs2_rbm_from_block(rbm, block + 1))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * gfs2_free_extlen - Return extent length of free blocks
+ * @rbm: Starting position
+ * @len: Max length to check
+ *
+ * Starting at the block specified by the rbm, see how many free blocks
+ * there are, not reading more than len blocks ahead. This can be done
+ * using memchr_inv when the blocks are byte aligned, but has to be done
+ * on a block by block basis in case of unaligned blocks. Also this
+ * function can cope with bitmap boundaries (although it must stop on
+ * a resource group boundary)
+ *
+ * Returns: Number of free blocks in the extent
+ */
+
+static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
+{
+	struct gfs2_rbm rbm = *rrbm;
+	u32 n_unaligned = rbm.offset & 3;
+	u32 size = len;
+	u32 bytes;
+	u32 chunk_size;
+	u8 *ptr, *start, *end;
+	u64 block;
+
+	if (n_unaligned &&
+	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
+		goto out;
+
+	/* Start is now byte aligned */
+	while (len > 3) {
+		start = rbm.bi->bi_bh->b_data;
+		if (rbm.bi->bi_clone)
+			start = rbm.bi->bi_clone;
+		end = start + rbm.bi->bi_bh->b_size;
+		start += rbm.bi->bi_offset;
+		BUG_ON(rbm.offset & 3);
+		start += (rbm.offset / GFS2_NBBY);
+		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
+		ptr = memchr_inv(start, 0, bytes);
+		chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
+		chunk_size *= GFS2_NBBY;
+		BUG_ON(len < chunk_size);
+		len -= chunk_size;
+		block = gfs2_rbm_to_block(&rbm);
+		gfs2_rbm_from_block(&rbm, block + chunk_size);
+		n_unaligned = 3;
+		if (ptr)
+			break;
+		n_unaligned = len & 3;
+	}
+
+	/* Deal with any bits left over at the end */
+	if (n_unaligned)
+		gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
+out:
+	return size - len;
+}
+
 /**
  * gfs2_bitcount - count the number of bits in a certain state
  * @rgd: the resource group descriptor
@@ -472,8 +597,6 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
 	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
 	RB_CLEAR_NODE(&rs->rs_node);
-	BUG_ON(!rgd->rd_rs_cnt);
-	rgd->rd_rs_cnt--;
 
 	if (rs->rs_free) {
 		/* return reserved blocks to the rgrp and the ip */
@@ -1208,179 +1331,85 @@ out:
 
 /**
  * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
- * @bi: the bitmap with the blocks
  * @ip: the inode structure
- * @biblk: the 32-bit block number relative to the start of the bitmap
- * @amount: the number of blocks to reserve
  *
- * Returns: NULL - reservation was already taken, so not inserted
- *          pointer to the inserted reservation
  */
-static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
-				       struct gfs2_inode *ip, u32 biblk,
-				       int amount)
+static void rs_insert(struct gfs2_inode *ip)
 {
 	struct rb_node **newn, *parent = NULL;
 	int rc;
 	struct gfs2_blkreserv *rs = ip->i_res;
 	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
-	u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
+	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
 
-	spin_lock(&rgd->rd_rsspin);
-	newn = &rgd->rd_rstree.rb_node;
-	BUG_ON(!ip->i_res);
 	BUG_ON(gfs2_rs_active(rs));
-	/* Figure out where to put new node */
 
+	spin_lock(&rgd->rd_rsspin);
+	newn = &rgd->rd_rstree.rb_node;
 	while (*newn) {
 		struct gfs2_blkreserv *cur =
 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
 
 		parent = *newn;
-		rc = rs_cmp(fsblock, amount, cur);
+		rc = rs_cmp(fsblock, rs->rs_free, cur);
 		if (rc > 0)
 			newn = &((*newn)->rb_right);
 		else if (rc < 0)
 			newn = &((*newn)->rb_left);
 		else {
 			spin_unlock(&rgd->rd_rsspin);
-			return NULL; /* reservation already in use */
+			WARN_ON(1);
+			return;
 		}
 	}
 
-	/* Do our reservation work */
-	rs = ip->i_res;
-	rs->rs_free = amount;
-	rs->rs_rbm.offset = biblk;
-	rs->rs_rbm.bi = bi;
-	rs->rs_inum = ip->i_no_addr;
 	rb_link_node(&rs->rs_node, parent, newn);
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
 	/* Do our rgrp accounting for the reservation */
-	rgd->rd_reserved += amount; /* blocks reserved */
-	rgd->rd_rs_cnt++; /* number of in-tree reservations */
+	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
 	spin_unlock(&rgd->rd_rsspin);
 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
-	return rs;
 }
 
 /**
- * unclaimed_blocks - return number of blocks that aren't spoken for
- */
-static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
-{
-	return rgd->rd_free_clone - rgd->rd_reserved;
-}
-
-/**
- * rg_mblk_search - find a group of multiple free blocks
+ * rg_mblk_search - find a group of multiple free blocks to form a reservation
  * @rgd: the resource group descriptor
  * @ip: pointer to the inode for which we're reserving blocks
  * @requested: number of blocks required for this allocation
  *
- * This is very similar to rgblk_search, except we're looking for whole
- * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
- * on aligned dwords for speed's sake.
- *
  */
 
-static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, unsigned requested)
+static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
+			   unsigned requested)
 {
-	struct gfs2_bitmap *bi = rgd->rd_bits;
-	const u32 length = rgd->rd_length;
-	u32 blk;
-	unsigned int buf, x, search_bytes;
-	u8 *buffer = NULL;
-	u8 *ptr, *end, *nonzero;
-	u32 goal, rsv_bytes;
-	struct gfs2_blkreserv *rs;
-	u32 best_rs_bytes, unclaimed;
-	int best_rs_blocks;
+	struct gfs2_rbm rbm = { .rgd = rgd, };
+	u64 goal;
+	struct gfs2_blkreserv *rs = ip->i_res;
+	u32 extlen;
+	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
+	int ret;
 
-	if ((rgd->rd_free_clone < rgd->rd_reserved) ||
-	    (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS)))
+	extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
+	extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
 		return;
 
 	/* Find bitmap block that contains bits for goal block */
 	if (rgrp_contains_block(rgd, ip->i_goal))
-		goal = ip->i_goal - rgd->rd_data0;
+		goal = ip->i_goal;
 	else
-		goal = rgd->rd_last_alloc;
+		goal = rgd->rd_last_alloc + rgd->rd_data0;
 
-	for (buf = 0; buf < length; buf++) {
-		bi = rgd->rd_bits + buf;
-		/* Convert scope of "goal" from rgrp-wide to within
-		   found bit block */
-		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
-			goal -= bi->bi_start * GFS2_NBBY;
-			goto do_search;
-		}
-	}
-	buf = 0;
-	goal = 0;
-
-do_search:
-	best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
-			       (RGRP_RSRV_MINBLKS * rgd->rd_length));
-	best_rs_bytes = (best_rs_blocks *
-			 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
-		GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
-	best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
-	unclaimed = unclaimed_blocks(rgd);
-	if (best_rs_bytes * GFS2_NBBY > unclaimed)
-		best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
-
-	for (x = 0; x <= length; x++) {
-		bi = rgd->rd_bits + buf;
-
-		if (test_bit(GBF_FULL, &bi->bi_flags))
-			goto skip;
+	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
+		return;
 
-		WARN_ON(!buffer_uptodate(bi->bi_bh));
-		if (bi->bi_clone)
-			buffer = bi->bi_clone + bi->bi_offset;
-		else
-			buffer = bi->bi_bh->b_data + bi->bi_offset;
-
-		/* We have to keep the reservations aligned on u64 boundaries
-		   otherwise we could get situations where a byte can't be
-		   used because it's after a reservation, but a free bit still
-		   is within the reservation's area. */
-		ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
-		end = (buffer + bi->bi_len);
-		while (ptr < end) {
-			rsv_bytes = 0;
-			if ((ptr + best_rs_bytes) <= end)
-				search_bytes = best_rs_bytes;
-			else
-				search_bytes = end - ptr;
-			BUG_ON(!search_bytes);
-			nonzero = memchr_inv(ptr, 0, search_bytes);
-			/* If the lot is all zeroes, reserve the whole size. If
-			   there's enough zeroes to satisfy the request, use
-			   what we can. If there's not enough, keep looking. */
-			if (nonzero == NULL)
-				rsv_bytes = search_bytes;
-			else if ((nonzero - ptr) * GFS2_NBBY >= requested)
-				rsv_bytes = (nonzero - ptr);
-
-			if (rsv_bytes) {
-				blk = ((ptr - buffer) * GFS2_NBBY);
-				BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
-				rs = rs_insert(bi, ip, blk,
-					       rsv_bytes * GFS2_NBBY);
-				if (rs)
-					return;
-			}
-			ptr += ALIGN(search_bytes, sizeof(u64));
-		}
-skip:
-		/* Try next bitmap block (wrap back to rgrp header
-		   if at end) */
-		buf++;
-		buf %= length;
-		goal = 0;
+	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
+	if (ret == 0) {
+		rs->rs_rbm = rbm;
+		rs->rs_free = extlen;
+		rs->rs_inum = ip->i_no_addr;
+		rs_insert(ip);
 	}
 }
 
@@ -1388,6 +1417,7 @@ skip:
  * gfs2_next_unreserved_block - Return next block that is not reserved
  * @rgd: The resource group
  * @block: The starting block
+ * @length: The required length
  * @ip: Ignore any reservations for this inode
  *
  * If the block does not appear in any reservation, then return the
@@ -1397,6 +1427,7 @@ skip:
  */
 
 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
+				      u32 length,
 				      const struct gfs2_inode *ip)
 {
 	struct gfs2_blkreserv *rs;
@@ -1404,10 +1435,10 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 	int rc;
 
 	spin_lock(&rgd->rd_rsspin);
-	n = rb_first(&rgd->rd_rstree);
+	n = rgd->rd_rstree.rb_node;
 	while (n) {
 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
-		rc = rs_cmp(block, 1, rs);
+		rc = rs_cmp(block, length, rs);
 		if (rc < 0)
 			n = n->rb_left;
 		else if (rc > 0)
@@ -1417,9 +1448,9 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 	}
 
 	if (n) {
-		while ((rs_cmp(block, 1, rs) == 0) && (ip->i_res != rs)) {
+		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
 			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
-			n = rb_next(&rs->rs_node);
+			n = n->rb_right;
 			if (n == NULL)
 				break;
 			rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
@@ -1430,44 +1461,11 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 	return block;
 }
 
-/**
- * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
- * @rbm: The rbm with rgd already set correctly
- * @block: The block number (filesystem relative)
- *
- * This sets the bi and offset members of an rbm based on a
- * resource group and a filesystem relative block number. The
- * resource group must be set in the rbm on entry, the bi and
- * offset members will be set by this function.
- *
- * Returns: 0 on success, or an error code
- */
-
-static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
-{
-	u64 rblock = block - rbm->rgd->rd_data0;
-	u32 goal = (u32)rblock;
-	int x;
-
-	if (WARN_ON_ONCE(rblock > UINT_MAX))
-		return -EINVAL;
-	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
-		return -E2BIG;
-
-	for (x = 0; x < rbm->rgd->rd_length; x++) {
-		rbm->bi = rbm->rgd->rd_bits + x;
-		if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
-			rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
-			break;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * gfs2_reservation_check_and_update - Check for reservations during block alloc
  * @rbm: The current position in the resource group
+ * @ip: The inode for which we are searching for blocks
+ * @minext: The minimum extent length
  *
  * This checks the current position in the rgrp to see whether there is
  * a reservation covering this block. If not then this function is a
@@ -1479,15 +1477,33 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
  */
 
 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
-					     const struct gfs2_inode *ip)
+					     const struct gfs2_inode *ip,
+					     u32 minext)
 {
 	u64 block = gfs2_rbm_to_block(rbm);
+	u32 extlen = 1;
 	u64 nblock;
 	int ret;
 
-	nblock = gfs2_next_unreserved_block(rbm->rgd, block, ip);
+	/*
+	 * If we have a minimum extent length, then skip over any extent
+	 * which is less than the min extent length in size.
+	 */
+	if (minext) {
+		extlen = gfs2_free_extlen(rbm, minext);
+		nblock = block + extlen;
+		if (extlen < minext)
+			goto fail;
+	}
+
+	/*
+	 * Check the extent which has been found against the reservations
+	 * and skip if parts of it are already reserved
+	 */
+	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
 	if (nblock == block)
 		return 0;
+fail:
 	ret = gfs2_rbm_from_block(rbm, nblock);
 	if (ret < 0)
 		return ret;
@@ -1498,6 +1514,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
  * gfs2_rbm_find - Look for blocks of a particular state
  * @rbm: Value/result starting position and final position
  * @state: The state which we want to find
+ * @minext: The requested extent length (0 for a single block)
  * @ip: If set, check for reservations
  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
  *          around until we've reached the starting point.
@@ -1509,7 +1526,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
  * Returns: 0 on success, -ENOSPC if there is no block of the requested state
  */
 
-static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state,
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
 			 const struct gfs2_inode *ip, bool nowrap)
 {
 	struct buffer_head *bh;
@@ -1548,7 +1565,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state,
 			return 0;
 
 		initial_bi = rbm->bi;
-		ret = gfs2_reservation_check_and_update(rbm, ip);
+		ret = gfs2_reservation_check_and_update(rbm, ip, minext);
 		if (ret == 0)
 			return 0;
 		if (ret > 0) {
@@ -1608,7 +1625,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 
 	while (1) {
 		down_write(&sdp->sd_log_flush_lock);
-		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, true);
+		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
 		up_write(&sdp->sd_log_flush_lock);
 		if (error == -ENOSPC)
 			break;
@@ -1988,11 +2005,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 		goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
 
 	gfs2_rbm_from_block(&rbm, goal);
-	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
+	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
 
 	if (error == -ENOSPC) {
 		gfs2_rbm_from_block(&rbm, goal);
-		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false);
+		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
 	}
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c98f6af07e1c..24077958dcf6 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -79,10 +79,4 @@ static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
 	return rs && !RB_EMPTY_NODE(&rs->rs_node);
 }
 
-
-static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
-{
-	return (bi->bi_start * GFS2_NBBY) + blk;
-}
-
 #endif /* __RGRP_DOT_H__ */
-- 
cgit 


From 3701530aed9711fcfe9b29d3de0f7a27c8dbc8ae Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 12 Sep 2012 09:40:31 -0400
Subject: GFS2: Fix infinite loop in rbm_find

This patch fixes an infinite loop in gfs2_rbm_find that was introduced
by the previous patch. The problem occurred when the length was less
than 3 but the rbm block was byte-aligned, causing it to improperly
return a extent length of zero, which caused it to spin.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Tested-by: Bob Peterson <rpeterso@redhat.com>
Tested-by: Barry Marson <bmarson@redhat.com>
---
 fs/gfs2/rgrp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b933cdcda7f4..3cc402ce6fea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -329,6 +329,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
 	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
 		goto out;
 
+	n_unaligned = len & 3;
 	/* Start is now byte aligned */
 	while (len > 3) {
 		start = rbm.bi->bi_bh->b_data;
-- 
cgit 


From a0b4df294387c777c3091a43e9970572e10261b1 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 17 Sep 2012 21:50:31 -0500
Subject: GFS2: fix s_writers.counter imbalance in gfs2_ail_empty_gl

gfs2_ail_empty_gl() contains an "inline version" of gfs2_trans_begin(),
so it needs an explicit sb_start_intwrite() as well, to balance the
sb_end_intwrite() which will be called by gfs2_trans_end().

With this, xfstest 068 passes on lock_nolock local gfs2.
Without it, we reach a writer count of -1 and get stuck.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glops.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4bdcf3784187..32cc4fde975c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 	/* A shortened, inline version of gfs2_trans_begin() */
 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
 	tr.tr_ip = (unsigned long)__builtin_return_address(0);
+	sb_start_intwrite(sdp->sd_vfs);
 	gfs2_log_reserve(sdp, tr.tr_reserved);
 	BUG_ON(current->journal_info);
 	current->journal_info = &tr;
-- 
cgit 


From 2216db70c95a9610f5279b8da53bec614d98270f Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 20 Sep 2012 09:52:58 -0500
Subject: GFS2: Write out dirty inode metadata in delayed deletes

If a dirty GFS2 inode was being deleted but was in use by another node, its
metadata was not getting written out before GFS2 checked for dirty buffers in
gfs2_ail_flush().  GFS2 was relying on inode_go_sync() to write out the
metadata when the other node tried to free the file, but it failed the error
check before it got that far. This patch writes out the metadata before calling
gfs2_ail_flush()

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 79cac7057691..a8d90f2f576c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1545,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode)
 
 out_truncate:
 	gfs2_log_flush(sdp, ip->i_gl);
+	if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
+		struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
+		filemap_fdatawrite(metamapping);
+		filemap_fdatawait(metamapping);
+	}
 	write_inode_now(inode, 1);
 	gfs2_ail_flush(ip->i_gl, 0);
 
-- 
cgit 


From ed6875e0d6c28e4a6b44da04d6d4363b3d92d630 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:25 -0700
Subject: CIFS: Move unlink code to ops struct

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  6 ++++++
 fs/cifs/cifsproto.h |  9 ++++++---
 fs/cifs/cifssmb.c   | 16 ++++++++--------
 fs/cifs/inode.c     | 33 ++++++++++++++++++++++-----------
 fs/cifs/smb1ops.c   |  2 ++
 5 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 977dc0e85ccb..843356fa262d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -256,6 +256,12 @@ struct smb_version_operations {
 	/* remove directory */
 	int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *,
 		     struct cifs_sb_info *);
+	/* unlink file */
+	int (*unlink)(const unsigned int, struct cifs_tcon *, const char *,
+		      struct cifs_sb_info *);
+	/* open, rename and delete file */
+	int (*rename_pending_delete)(const char *, struct dentry *,
+				     const unsigned int);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f1bbf8305d3a..79c088469757 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -144,6 +144,11 @@ extern int cifs_get_file_info_unix(struct file *filp);
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, unsigned int xid);
+extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs,
+			      unsigned int xid, char *full_path, __u32 dosattr);
+extern int cifs_rename_pending_delete(const char *full_path,
+				      struct dentry *dentry,
+				      const unsigned int xid);
 extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
 			      struct cifs_fattr *fattr, struct inode *inode,
 			      const char *path, const __u16 *pfid);
@@ -303,9 +308,7 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon,
 			const struct nls_table *nls_codepage,
 			int remap_special_chars);
 extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon,
-			const char *name,
-			const struct nls_table *nls_codepage,
-			int remap_special_chars);
+			  const char *name, struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
 			const char *fromName, const char *toName,
 			const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f0cf934ba877..2dddf01d2b66 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -902,15 +902,15 @@ PsxDelete:
 }
 
 int
-CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon,
-	       const char *fileName, const struct nls_table *nls_codepage,
-	       int remap)
+CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+	       struct cifs_sb_info *cifs_sb)
 {
 	DELETE_FILE_REQ *pSMB = NULL;
 	DELETE_FILE_RSP *pSMBr = NULL;
 	int rc = 0;
 	int bytes_returned;
 	int name_len;
+	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
 DelFileRetry:
 	rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB,
@@ -919,15 +919,15 @@ DelFileRetry:
 		return rc;
 
 	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
-		name_len =
-		    cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName,
-				       PATH_MAX, nls_codepage, remap);
+		name_len = cifsConvertToUTF16((__le16 *) pSMB->fileName, name,
+					      PATH_MAX, cifs_sb->local_nls,
+					      remap);
 		name_len++;	/* trailing null */
 		name_len *= 2;
 	} else {		/* BB improve check for buffer overruns BB */
-		name_len = strnlen(fileName, PATH_MAX);
+		name_len = strnlen(name, PATH_MAX);
 		name_len++;	/* trailing null */
-		strncpy(pSMB->fileName, fileName, name_len);
+		strncpy(pSMB->fileName, name, name_len);
 	}
 	pSMB->SearchAttributes =
 	    cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cb79c7edecb0..bb39ea475a20 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -876,9 +876,9 @@ out:
 	return inode;
 }
 
-static int
+int
 cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
-		    char *full_path, __u32 dosattr)
+		   char *full_path, __u32 dosattr)
 {
 	int rc;
 	int oplock = 0;
@@ -993,13 +993,13 @@ out:
 }
 
 /*
- * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit
+ * Open the given file (if it isn't already), set the DELETE_ON_CLOSE bit
  * and rename it to a random name that hopefully won't conflict with
  * anything else.
  */
-static int
-cifs_rename_pending_delete(char *full_path, struct dentry *dentry,
-			   unsigned int xid)
+int
+cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
+			   const unsigned int xid)
 {
 	int oplock = 0;
 	int rc;
@@ -1136,6 +1136,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	struct tcon_link *tlink;
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	struct iattr *attrs = NULL;
 	__u32 dosattr = 0, origattr = 0;
 
@@ -1145,6 +1146,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
 	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
 
 	xid = get_xid();
 
@@ -1167,8 +1169,12 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 	}
 
 retry_std_delete:
-	rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+	if (!server->ops->unlink) {
+		rc = -ENOSYS;
+		goto psx_del_no_retry;
+	}
+
+	rc = server->ops->unlink(xid, tcon, full_path, cifs_sb);
 
 psx_del_no_retry:
 	if (!rc) {
@@ -1177,9 +1183,14 @@ psx_del_no_retry:
 	} else if (rc == -ENOENT) {
 		d_drop(dentry);
 	} else if (rc == -ETXTBSY) {
-		rc = cifs_rename_pending_delete(full_path, dentry, xid);
-		if (rc == 0)
-			cifs_drop_nlink(inode);
+		if (server->ops->rename_pending_delete) {
+			rc = server->ops->rename_pending_delete(full_path,
+								dentry, xid);
+			if (rc == 0)
+				cifs_drop_nlink(inode);
+		}
+		if (rc == -ETXTBSY)
+			rc = -EBUSY;
 	} else if ((rc == -EACCES) && (dosattr == 0) && inode) {
 		attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
 		if (attrs == NULL) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3129ac74b819..725fa6195867 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -644,6 +644,8 @@ struct smb_version_operations smb1_operations = {
 	.mkdir = CIFSSMBMkDir,
 	.mkdir_setinfo = cifs_mkdir_setinfo,
 	.rmdir = CIFSSMBRmDir,
+	.unlink = CIFSSMBDelFile,
+	.rename_pending_delete = cifs_rename_pending_delete,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From cbe6f439f5762c7fb4d2dd9293f5fdbfc4cd68f8 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:25 -0700
Subject: CIFS: Add SMB2 support for unlink

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2inode.c | 9 +++++++++
 fs/cifs/smb2ops.c   | 1 +
 fs/cifs/smb2proto.h | 2 ++
 3 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 2aa5cb08c526..02a9bda4248c 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -161,3 +161,12 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 				  0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE,
 				  NULL, SMB2_OP_DELETE);
 }
+
+int
+smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+	    struct cifs_sb_info *cifs_sb)
+{
+	return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+				  0, CREATE_DELETE_ON_CLOSE, NULL,
+				  SMB2_OP_DELETE);
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 826209bf3684..bf9b318cfa4b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -321,6 +321,7 @@ struct smb_version_operations smb21_operations = {
 	.mkdir = smb2_mkdir,
 	.mkdir_setinfo = smb2_mkdir_setinfo,
 	.rmdir = smb2_rmdir,
+	.unlink = smb2_unlink,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index bfaa7b148afd..f4ac72799f70 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -59,6 +59,8 @@ extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
 			       struct cifs_tcon *tcon, const unsigned int xid);
 extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
 		      const char *name, struct cifs_sb_info *cifs_sb);
+extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon,
+		       const char *name, struct cifs_sb_info *cifs_sb);
 
 /*
  * SMB2 Worker functions - most of protocol specific implementation details
-- 
cgit 


From 4b4de76e35518fc0c636f628abca8c1b19ad6689 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:26 -0700
Subject: CIFS: Replace netfid with cifs_fid struct in cifsFileInfo

This is help us to extend the code for future protocols that can use
another fid mechanism (as SMB2 that has it divided into two parts:
persistent and violatile).

Also rename variables and refactor the code around the changes.

Reviewed-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsacl.c  |   2 +-
 fs/cifs/cifsglob.h |   6 ++-
 fs/cifs/cifssmb.c  |   4 +-
 fs/cifs/file.c     | 116 ++++++++++++++++++++++++++---------------------------
 fs/cifs/inode.c    |  10 ++---
 fs/cifs/ioctl.c    |  13 ++++--
 fs/cifs/misc.c     |   2 +-
 fs/cifs/readdir.c  |  10 ++---
 fs/cifs/smb1ops.c  |   2 +-
 9 files changed, 87 insertions(+), 78 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 05f4dc263a23..2ee5c54797fa 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1222,7 +1222,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 	if (!open_file)
 		return get_cifs_acl_by_path(cifs_sb, path, pacllen);
 
-	pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
+	pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->fid.netfid, pacllen);
 	cifsFileInfo_put(open_file);
 	return pntsd;
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 843356fa262d..a2a3865dee1b 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -746,6 +746,10 @@ struct cifs_search_info {
 	bool smallBuf:1; /* so we know which buf_release function to call */
 };
 
+struct cifs_fid {
+	__u16 netfid;
+};
+
 struct cifsFileInfo {
 	struct list_head tlist;	/* pointer to next fid owned by tcon */
 	struct list_head flist;	/* next fid (file instance) for this inode */
@@ -755,7 +759,7 @@ struct cifsFileInfo {
 				 */
 	unsigned int uid;	/* allows finding which FileInfo structure */
 	__u32 pid;		/* process id who opened file */
-	__u16 netfid;		/* file id from remote */
+	struct cifs_fid fid;	/* file id from remote */
 	/* BB add lock scope info here if needed */ ;
 	/* lock scope id (0 if none) */
 	struct dentry *dentry;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2dddf01d2b66..4c48b9c60b26 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1632,7 +1632,7 @@ cifs_async_readv(struct cifs_readdata *rdata)
 	smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16));
 
 	smb->AndXCommand = 0xFF;	/* none */
-	smb->Fid = rdata->cfile->netfid;
+	smb->Fid = rdata->cfile->fid.netfid;
 	smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF);
 	if (wct == 12)
 		smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32);
@@ -2084,7 +2084,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
 
 	smb->AndXCommand = 0xFF;	/* none */
-	smb->Fid = wdata->cfile->netfid;
+	smb->Fid = wdata->cfile->fid.netfid;
 	smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF);
 	if (wct == 14)
 		smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 71e9ad9f5961..712f2a4d0d49 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -247,39 +247,39 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
-	struct cifsFileInfo *pCifsFile;
-
-	pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
-	if (pCifsFile == NULL)
-		return pCifsFile;
-
-	pCifsFile->count = 1;
-	pCifsFile->netfid = fileHandle;
-	pCifsFile->pid = current->tgid;
-	pCifsFile->uid = current_fsuid();
-	pCifsFile->dentry = dget(dentry);
-	pCifsFile->f_flags = file->f_flags;
-	pCifsFile->invalidHandle = false;
-	pCifsFile->tlink = cifs_get_tlink(tlink);
-	mutex_init(&pCifsFile->fh_mutex);
-	INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
-	INIT_LIST_HEAD(&pCifsFile->llist);
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifsFileInfo *cfile;
+
+	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+	if (cfile == NULL)
+		return cfile;
+
+	cfile->count = 1;
+	cfile->fid.netfid = fileHandle;
+	cfile->pid = current->tgid;
+	cfile->uid = current_fsuid();
+	cfile->dentry = dget(dentry);
+	cfile->f_flags = file->f_flags;
+	cfile->invalidHandle = false;
+	cfile->tlink = cifs_get_tlink(tlink);
+	mutex_init(&cfile->fh_mutex);
+	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
+	INIT_LIST_HEAD(&cfile->llist);
 
 	spin_lock(&cifs_file_list_lock);
-	list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
+	list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
 	/* if readable file instance put first in list*/
 	if (file->f_mode & FMODE_READ)
-		list_add(&pCifsFile->flist, &pCifsInode->openFileList);
+		list_add(&cfile->flist, &cinode->openFileList);
 	else
-		list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
+		list_add_tail(&cfile->flist, &cinode->openFileList);
 	spin_unlock(&cifs_file_list_lock);
 
-	cifs_set_oplock_level(pCifsInode, oplock);
-	pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll;
+	cifs_set_oplock_level(cinode, oplock);
+	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
 
-	file->private_data = pCifsFile;
-	return pCifsFile;
+	file->private_data = cfile;
+	return cfile;
 }
 
 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
@@ -336,7 +336,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 		unsigned int xid;
 		int rc;
 		xid = get_xid();
-		rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
+		rc = CIFSSMBClose(xid, tcon, cifs_file->fid.netfid);
 		free_xid(xid);
 	}
 
@@ -561,7 +561,7 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
 	}
 
 reopen_success:
-	pCifsFile->netfid = netfid;
+	pCifsFile->fid.netfid = netfid;
 	pCifsFile->invalidHandle = false;
 	mutex_unlock(&pCifsFile->fh_mutex);
 	pCifsInode = CIFS_I(inode);
@@ -609,39 +609,37 @@ int cifs_closedir(struct inode *inode, struct file *file)
 {
 	int rc = 0;
 	unsigned int xid;
-	struct cifsFileInfo *pCFileStruct = file->private_data;
-	char *ptmp;
+	struct cifsFileInfo *cfile = file->private_data;
+	char *tmp;
 
 	cFYI(1, "Closedir inode = 0x%p", inode);
 
 	xid = get_xid();
 
-	if (pCFileStruct) {
-		struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
+	if (cfile) {
+		struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 
 		cFYI(1, "Freeing private data in close dir");
 		spin_lock(&cifs_file_list_lock);
-		if (!pCFileStruct->srch_inf.endOfSearch &&
-		    !pCFileStruct->invalidHandle) {
-			pCFileStruct->invalidHandle = true;
+		if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+			cfile->invalidHandle = true;
 			spin_unlock(&cifs_file_list_lock);
-			rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
-			cFYI(1, "Closing uncompleted readdir with rc %d",
-				 rc);
+			rc = CIFSFindClose(xid, tcon, cfile->fid.netfid);
+			cFYI(1, "Closing uncompleted readdir with rc %d", rc);
 			/* not much we can do if it fails anyway, ignore rc */
 			rc = 0;
 		} else
 			spin_unlock(&cifs_file_list_lock);
-		ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
-		if (ptmp) {
+		tmp = cfile->srch_inf.ntwrk_buf_start;
+		if (tmp) {
 			cFYI(1, "closedir free smb buf in srch struct");
-			pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
-			if (pCFileStruct->srch_inf.smallBuf)
-				cifs_small_buf_release(ptmp);
+			cfile->srch_inf.ntwrk_buf_start = NULL;
+			if (cfile->srch_inf.smallBuf)
+				cifs_small_buf_release(tmp);
 			else
-				cifs_buf_release(ptmp);
+				cifs_buf_release(tmp);
 		}
-		cifs_put_tlink(pCFileStruct->tlink);
+		cifs_put_tlink(cfile->tlink);
 		kfree(file->private_data);
 		file->private_data = NULL;
 	}
@@ -932,7 +930,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 			cur->OffsetLow = cpu_to_le32((u32)li->offset);
 			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
 			if (++num == max_num) {
-				stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
+				stored_rc = cifs_lockv(xid, tcon,
+						       cfile->fid.netfid,
 						       (__u8)li->type, 0, num,
 						       buf);
 				if (stored_rc)
@@ -944,7 +943,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 		}
 
 		if (num) {
-			stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
+			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
 					       (__u8)types[i], 0, num, buf);
 			if (stored_rc)
 				rc = stored_rc;
@@ -1038,7 +1037,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 			type = CIFS_WRLCK;
 		lck = list_entry(el, struct lock_to_push, llist);
 		lck->pid = flock->fl_pid;
-		lck->netfid = cfile->netfid;
+		lck->netfid = cfile->fid.netfid;
 		lck->length = length;
 		lck->type = type;
 		lck->offset = flock->fl_start;
@@ -1137,7 +1136,7 @@ static int
 cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
 		    __u64 length, __u32 type, int lock, int unlock, bool wait)
 {
-	return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
+	return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
 			   current->tgid, length, offset, unlock, lock,
 			   (__u8)type, wait, 0);
 }
@@ -1151,7 +1150,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
-	__u16 netfid = cfile->netfid;
+	__u16 netfid = cfile->fid.netfid;
 
 	if (posix_lck) {
 		int posix_lock_type;
@@ -1295,7 +1294,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 			 */
 			list_move(&li->llist, &tmp_llist);
 			if (++num == max_num) {
-				stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
+				stored_rc = cifs_lockv(xid, tcon,
+						       cfile->fid.netfid,
 						       li->type, num, 0, buf);
 				if (stored_rc) {
 					/*
@@ -1318,7 +1318,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 				cur++;
 		}
 		if (num) {
-			stored_rc = cifs_lockv(xid, tcon, cfile->netfid,
+			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
 					       types[i], num, 0, buf);
 			if (stored_rc) {
 				cifs_move_llist(&tmp_llist, &cfile->llist);
@@ -1343,7 +1343,7 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u32 type,
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
-	__u16 netfid = cfile->netfid;
+	__u16 netfid = cfile->fid.netfid;
 
 	if (posix_lck) {
 		int posix_lock_type;
@@ -1423,7 +1423,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
 			tcon->ses->server);
 
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-	netfid = cfile->netfid;
+	netfid = cfile->fid.netfid;
 	cinode = CIFS_I(file->f_path.dentry->d_inode);
 
 	if (cap_unix(tcon->ses) &&
@@ -1514,7 +1514,7 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
 			/* iov[0] is reserved for smb header */
 			iov[1].iov_base = (char *)write_data + total_written;
 			iov[1].iov_len = len;
-			io_parms.netfid = open_file->netfid;
+			io_parms.netfid = open_file->fid.netfid;
 			io_parms.pid = pid;
 			io_parms.tcon = pTcon;
 			io_parms.offset = *poffset;
@@ -2078,7 +2078,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
 
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
-		rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
+		rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid);
 
 	free_xid(xid);
 	mutex_unlock(&inode->i_mutex);
@@ -2106,7 +2106,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	tcon = tlink_tcon(smbfile->tlink);
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
-		rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
+		rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid);
 
 	free_xid(xid);
 	mutex_unlock(&inode->i_mutex);
@@ -2802,7 +2802,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 				if (rc != 0)
 					break;
 			}
-			io_parms.netfid = open_file->netfid;
+			io_parms.netfid = open_file->fid.netfid;
 			io_parms.pid = pid;
 			io_parms.tcon = tcon;
 			io_parms.offset = *poffset;
@@ -3374,7 +3374,7 @@ void cifs_oplock_break(struct work_struct *work)
 	 * disconnected since oplock already released by the server
 	 */
 	if (!cfile->oplock_break_cancelled) {
-		rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
+		rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid,
 				 current->tgid, 0, 0, 0, 0,
 				 LOCKING_ANDX_OPLOCK_RELEASE, false,
 				 cinode->clientCanCacheRead ? 1 : 0);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index bb39ea475a20..ea7428a82a31 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -294,7 +294,7 @@ int cifs_get_file_info_unix(struct file *filp)
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 
 	xid = get_xid();
-	rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data);
+	rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data);
 	if (!rc) {
 		cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
 	} else if (rc == -EREMOTE) {
@@ -562,7 +562,7 @@ int cifs_get_file_info(struct file *filp)
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 
 	xid = get_xid();
-	rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data);
+	rc = CIFSSMBQFileInfo(xid, tcon, cfile->fid.netfid, &find_data);
 	switch (rc) {
 	case 0:
 		cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
@@ -930,7 +930,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 	 */
 	open_file = find_writable_file(cifsInode, true);
 	if (open_file) {
-		netfid = open_file->netfid;
+		netfid = open_file->fid.netfid;
 		netpid = open_file->pid;
 		pTcon = tlink_tcon(open_file->tlink);
 		goto set_via_filehandle;
@@ -1887,7 +1887,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 	 */
 	open_file = find_writable_file(cifsInode, true);
 	if (open_file) {
-		__u16 nfid = open_file->netfid;
+		__u16 nfid = open_file->fid.netfid;
 		__u32 npid = open_file->pid;
 		pTcon = tlink_tcon(open_file->tlink);
 		rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
@@ -2061,7 +2061,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 	args->device = 0;
 	open_file = find_writable_file(cifsInode, true);
 	if (open_file) {
-		u16 nfid = open_file->netfid;
+		u16 nfid = open_file->fid.netfid;
 		u32 npid = open_file->pid;
 		pTcon = tlink_tcon(open_file->tlink);
 		rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index ae082a66de2f..5b3481bd3d96 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -75,8 +75,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			tcon = tlink_tcon(pSMBFile->tlink);
 			caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
 			if (CIFS_UNIX_EXTATTR_CAP & caps) {
-				rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
-					&ExtAttrBits, &ExtAttrMask);
+				rc = CIFSGetExtAttr(xid, tcon,
+						    pSMBFile->fid.netfid,
+						    &ExtAttrBits, &ExtAttrMask);
 				if (rc == 0)
 					rc = put_user(ExtAttrBits &
 						FS_FL_USER_VISIBLE,
@@ -94,8 +95,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 					rc = -EFAULT;
 					break;
 				}
-				/* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
-					extAttrBits, &ExtAttrMask);*/
+				/*
+				 * rc = CIFSGetExtAttr(xid, tcon,
+				 *		       pSMBFile->fid.netfid,
+				 *		       extAttrBits,
+				 *		       &ExtAttrMask);
+				 */
 			}
 			cFYI(1, "set flags not implemented yet");
 			break;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index ce41fee07e5b..a921b0712eff 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -466,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
 			list_for_each(tmp2, &tcon->openFileList) {
 				netfile = list_entry(tmp2, struct cifsFileInfo,
 						     tlist);
-				if (pSMB->Fid != netfile->netfid)
+				if (pSMB->Fid != netfile->fid.netfid)
 					continue;
 
 				cFYI(1, "file id match, oplock break");
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index d87f82678bc7..9e76e3b3289b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -279,7 +279,7 @@ ffirst_retry:
 		search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
 
 	rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls,
-		&cifsFile->netfid, search_flags, &cifsFile->srch_inf,
+		&cifsFile->fid.netfid, search_flags, &cifsFile->srch_inf,
 		cifs_sb->mnt_cifs_flags &
 			CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
 	if (rc == 0)
@@ -545,7 +545,7 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon,
 		    !cifsFile->invalidHandle) {
 			cifsFile->invalidHandle = true;
 			spin_unlock(&cifs_file_list_lock);
-			CIFSFindClose(xid, pTcon, cifsFile->netfid);
+			CIFSFindClose(xid, pTcon, cifsFile->fid.netfid);
 		} else
 			spin_unlock(&cifs_file_list_lock);
 		if (cifsFile->srch_inf.ntwrk_buf_start) {
@@ -577,8 +577,8 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon,
 	while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
 	      (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
 		cFYI(1, "calling findnext2");
-		rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags,
-				  &cifsFile->srch_inf);
+		rc = CIFSFindNext(xid, pTcon, cifsFile->fid.netfid,
+				  search_flags, &cifsFile->srch_inf);
 		/* FindFirst/Next set last_entry to NULL on malformed reply */
 		if (cifsFile->srch_inf.last_entry)
 			cifs_save_resume_key(cifsFile->srch_inf.last_entry,
@@ -781,7 +781,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			}
 		} /* else {
 			cifsFile->invalidHandle = true;
-			CIFSFindClose(xid, pTcon, cifsFile->netfid);
+			CIFSFindClose(xid, pTcon, cifsFile->fid.netfid);
 		} */
 
 		pTcon = tlink_tcon(cifsFile->tlink);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 725fa6195867..b170da0a882d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -63,7 +63,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
 static bool
 cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
 {
-	return ob1->netfid == ob2->netfid;
+	return ob1->fid.netfid == ob2->fid.netfid;
 }
 
 static unsigned int
-- 
cgit 


From fb1214e48f735cdb68446adb77ec37aa3de60697 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:26 -0700
Subject: CIFS: Move open code to ops struct

Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  7 ++++++
 fs/cifs/cifsproto.h |  7 +++---
 fs/cifs/dir.c       | 17 ++++++++-------
 fs/cifs/file.c      | 63 +++++++++++++++++++++++++----------------------------
 fs/cifs/smb1ops.c   | 29 ++++++++++++++++++++++++
 5 files changed, 79 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a2a3865dee1b..e649fac7d6fe 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -171,6 +171,7 @@ struct cifs_tcon;
 struct dfs_info3_param;
 struct cifs_fattr;
 struct smb_vol;
+struct cifs_fid;
 
 struct smb_version_operations {
 	int (*send_cancel)(struct TCP_Server_Info *, void *,
@@ -262,6 +263,12 @@ struct smb_version_operations {
 	/* open, rename and delete file */
 	int (*rename_pending_delete)(const char *, struct dentry *,
 				     const unsigned int);
+	/* open a file for non-posix mounts */
+	int (*open)(const unsigned int, struct cifs_tcon *, const char *, int,
+		    int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *,
+		    struct cifs_sb_info *);
+	/* set fid protocol-specific info */
+	void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 79c088469757..b67f7758f422 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -121,9 +121,10 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
 				      int offset);
 extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 
-extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
-				struct file *file, struct tcon_link *tlink,
-				__u32 oplock);
+extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
+					      struct file *file,
+					      struct tcon_link *tlink,
+					      __u32 oplock);
 extern int cifs_posix_open(char *full_path, struct inode **inode,
 			   struct super_block *sb, int mode,
 			   unsigned int f_flags, __u32 *oplock, __u16 *netfid,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 781025be48bc..70823dc4f960 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -377,11 +377,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	unsigned int xid;
 	struct tcon_link *tlink;
 	struct cifs_tcon *tcon;
-	__u16 fileHandle;
+	struct cifs_fid fid;
 	__u32 oplock;
-	struct cifsFileInfo *pfile_info;
+	struct cifsFileInfo *file_info;
 
-	/* Posix open is only called (at lookup time) for file create now.  For
+	/*
+	 * Posix open is only called (at lookup time) for file create now. For
 	 * opens (rather than creates), because we do not know if it is a file
 	 * or directory yet, and current Samba no longer allows us to do posix
 	 * open on dirs, we could end up wasting an open call on what turns out
@@ -415,20 +416,20 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	tcon = tlink_tcon(tlink);
 
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
-			    &oplock, &fileHandle, opened);
+			    &oplock, &fid.netfid, opened);
 
 	if (rc)
 		goto out;
 
 	rc = finish_open(file, direntry, generic_file_open, opened);
 	if (rc) {
-		CIFSSMBClose(xid, tcon, fileHandle);
+		CIFSSMBClose(xid, tcon, fid.netfid);
 		goto out;
 	}
 
-	pfile_info = cifs_new_fileinfo(fileHandle, file, tlink, oplock);
-	if (pfile_info == NULL) {
-		CIFSSMBClose(xid, tcon, fileHandle);
+	file_info = cifs_new_fileinfo(&fid, file, tlink, oplock);
+	if (file_info == NULL) {
+		CIFSSMBClose(xid, tcon, fid.netfid);
 		rc = -ENOMEM;
 	}
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 712f2a4d0d49..1246cf74e1fb 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -169,16 +169,19 @@ posix_open_ret:
 
 static int
 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
-	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
-	     __u16 *pnetfid, unsigned int xid)
+	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
+	     struct cifs_fid *fid, unsigned int xid)
 {
 	int rc;
-	int desiredAccess;
+	int desired_access;
 	int disposition;
 	int create_options = CREATE_NOT_DIR;
 	FILE_ALL_INFO *buf;
 
-	desiredAccess = cifs_convert_flags(f_flags);
+	if (!tcon->ses->server->ops->open)
+		return -ENOSYS;
+
+	desired_access = cifs_convert_flags(f_flags);
 
 /*********************************************************************
  *  open flag mapping table:
@@ -215,16 +218,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
-	if (tcon->ses->capabilities & CAP_NT_SMBS)
-		rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
-			 desiredAccess, create_options, pnetfid, poplock, buf,
-			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
-				 & CIFS_MOUNT_MAP_SPECIAL_CHR);
-	else
-		rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
-			desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
-			cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
-				& CIFS_MOUNT_MAP_SPECIAL_CHR);
+	rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
+					  desired_access, create_options, fid,
+					  oplock, buf, cifs_sb);
 
 	if (rc)
 		goto out;
@@ -234,7 +230,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 					      xid);
 	else
 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
-					 xid, pnetfid);
+					 xid, &fid->netfid);
 
 out:
 	kfree(buf);
@@ -242,7 +238,7 @@ out:
 }
 
 struct cifsFileInfo *
-cifs_new_fileinfo(__u16 fileHandle, struct file *file,
+cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 		  struct tcon_link *tlink, __u32 oplock)
 {
 	struct dentry *dentry = file->f_path.dentry;
@@ -255,7 +251,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 		return cfile;
 
 	cfile->count = 1;
-	cfile->fid.netfid = fileHandle;
 	cfile->pid = current->tgid;
 	cfile->uid = current_fsuid();
 	cfile->dentry = dget(dentry);
@@ -265,6 +260,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 	mutex_init(&cfile->fh_mutex);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_LIST_HEAD(&cfile->llist);
+	tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
 
 	spin_lock(&cifs_file_list_lock);
 	list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
@@ -275,9 +271,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 		list_add_tail(&cfile->flist, &cinode->openFileList);
 	spin_unlock(&cifs_file_list_lock);
 
-	cifs_set_oplock_level(cinode, oplock);
-	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
-
 	file->private_data = cfile;
 	return cfile;
 }
@@ -364,10 +357,10 @@ int cifs_open(struct inode *inode, struct file *file)
 	struct cifs_sb_info *cifs_sb;
 	struct cifs_tcon *tcon;
 	struct tcon_link *tlink;
-	struct cifsFileInfo *pCifsFile = NULL;
+	struct cifsFileInfo *cfile = NULL;
 	char *full_path = NULL;
 	bool posix_open_ok = false;
-	__u16 netfid;
+	struct cifs_fid fid;
 
 	xid = get_xid();
 
@@ -399,7 +392,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		/* can not refresh inode info since size could be stale */
 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 				cifs_sb->mnt_file_mode /* ignored */,
-				file->f_flags, &oplock, &netfid, xid);
+				file->f_flags, &oplock, &fid.netfid, xid);
 		if (rc == 0) {
 			cFYI(1, "posix open succeeded");
 			posix_open_ok = true;
@@ -415,20 +408,22 @@ int cifs_open(struct inode *inode, struct file *file)
 		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
 			 (rc != -EOPNOTSUPP)) /* path not found or net err */
 			goto out;
-		/* else fallthrough to retry open the old way on network i/o
-		   or DFS errors */
+		/*
+		 * Else fallthrough to retry open the old way on network i/o
+		 * or DFS errors.
+		 */
 	}
 
 	if (!posix_open_ok) {
 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
-				  file->f_flags, &oplock, &netfid, xid);
+				  file->f_flags, &oplock, &fid, xid);
 		if (rc)
 			goto out;
 	}
 
-	pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
-	if (pCifsFile == NULL) {
-		CIFSSMBClose(xid, tcon, netfid);
+	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
+	if (cfile == NULL) {
+		CIFSSMBClose(xid, tcon, fid.netfid);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -436,8 +431,10 @@ int cifs_open(struct inode *inode, struct file *file)
 	cifs_fscache_set_inode_cookie(inode, file);
 
 	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
-		/* time to set mode which we can not set earlier due to
-		   problems creating new read-only files */
+		/*
+		 * Time to set mode which we can not set earlier due to
+		 * problems creating new read-only files.
+		 */
 		struct cifs_unix_set_info_args args = {
 			.mode	= inode->i_mode,
 			.uid	= NO_CHANGE_64,
@@ -447,8 +444,8 @@ int cifs_open(struct inode *inode, struct file *file)
 			.mtime	= NO_CHANGE_64,
 			.device	= 0,
 		};
-		CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
-					pCifsFile->pid);
+		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
+				       cfile->pid);
 	}
 
 out:
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index b170da0a882d..907b30865000 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -607,6 +607,33 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path,
 		cifsInode->cifsAttrs = dosattrs;
 }
 
+static int
+cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
+	       int disposition, int desired_access, int create_options,
+	       struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf,
+	       struct cifs_sb_info *cifs_sb)
+{
+	if (!(tcon->ses->capabilities & CAP_NT_SMBS))
+		return SMBLegacyOpen(xid, tcon, path, disposition,
+				     desired_access, CREATE_NOT_DIR,
+				     &fid->netfid, oplock, buf,
+				     cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
+						& CIFS_MOUNT_MAP_SPECIAL_CHR);
+	return CIFSSMBOpen(xid, tcon, path, disposition, desired_access,
+			   create_options, &fid->netfid, oplock, buf,
+			   cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+}
+
+static void
+cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
+{
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	cfile->fid.netfid = fid->netfid;
+	cifs_set_oplock_level(cinode, oplock);
+	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -646,6 +673,8 @@ struct smb_version_operations smb1_operations = {
 	.rmdir = CIFSSMBRmDir,
 	.unlink = CIFSSMBDelFile,
 	.rename_pending_delete = cifs_rename_pending_delete,
+	.open = cifs_open_file,
+	.set_fid = cifs_set_fid,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 0ff78a221bf7839a7f20be9929433d17e868e987 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:26 -0700
Subject: CIFS: Move close code to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  2 ++
 fs/cifs/file.c     | 10 +++++++---
 fs/cifs/smb1ops.c  |  8 ++++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e649fac7d6fe..39bf2f3e866a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -269,6 +269,8 @@ struct smb_version_operations {
 		    struct cifs_sb_info *);
 	/* set fid protocol-specific info */
 	void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
+	/* close a file */
+	int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1246cf74e1fb..14938ee4f6e4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -326,10 +326,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	cancel_work_sync(&cifs_file->oplock_break);
 
 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
+		struct TCP_Server_Info *server = tcon->ses->server;
 		unsigned int xid;
-		int rc;
+		int rc = -ENOSYS;
+
 		xid = get_xid();
-		rc = CIFSSMBClose(xid, tcon, cifs_file->fid.netfid);
+		if (server->ops->close)
+			rc = server->ops->close(xid, tcon, &cifs_file->fid);
 		free_xid(xid);
 	}
 
@@ -423,7 +426,8 @@ int cifs_open(struct inode *inode, struct file *file)
 
 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 	if (cfile == NULL) {
-		CIFSSMBClose(xid, tcon, fid.netfid);
+		if (tcon->ses->server->ops->close)
+			tcon->ses->server->ops->close(xid, tcon, &fid);
 		rc = -ENOMEM;
 		goto out;
 	}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 907b30865000..bb758476e139 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -634,6 +634,13 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
 }
 
+static int
+cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
+		struct cifs_fid *fid)
+{
+	return CIFSSMBClose(xid, tcon, fid->netfid);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -675,6 +682,7 @@ struct smb_version_operations smb1_operations = {
 	.rename_pending_delete = cifs_rename_pending_delete,
 	.open = cifs_open_file,
 	.set_fid = cifs_set_fid,
+	.close = cifs_close_file,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From f0df737ee820ec62055baf2b28e24db4fb1ad71d Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:26 -0700
Subject: CIFS: Add open/close file support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/Makefile    |  2 +-
 fs/cifs/cifsglob.h  |  4 +++
 fs/cifs/smb2file.c  | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2inode.c |  4 +--
 fs/cifs/smb2ops.c   | 22 +++++++++++++-
 fs/cifs/smb2pdu.c   | 53 +++++++++++++++++++++++++--------
 fs/cifs/smb2pdu.h   |  4 +++
 fs/cifs/smb2proto.h | 14 ++++++++-
 8 files changed, 172 insertions(+), 17 deletions(-)
 create mode 100644 fs/cifs/smb2file.c

(limited to 'fs')

diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index feee94309271..aa0d68b086eb 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -17,4 +17,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
 
 cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \
-			    smb2misc.o smb2pdu.o smb2inode.o
+			    smb2misc.o smb2pdu.o smb2inode.o smb2file.o
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 39bf2f3e866a..8a69dae81d3a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -757,6 +757,10 @@ struct cifs_search_info {
 
 struct cifs_fid {
 	__u16 netfid;
+#ifdef CONFIG_CIFS_SMB2
+	__u64 persistent_fid;	/* persist file id for smb2 */
+	__u64 volatile_fid;	/* volatile file id for smb2 */
+#endif
 };
 
 struct cifsFileInfo {
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
new file mode 100644
index 000000000000..a7618dfb7712
--- /dev/null
+++ b/fs/cifs/smb2file.c
@@ -0,0 +1,86 @@
+/*
+ *   fs/cifs/smb2file.c
+ *
+ *   Copyright (C) International Business Machines  Corp., 2002, 2011
+ *   Author(s): Steve French (sfrench@us.ibm.com),
+ *              Pavel Shilovsky ((pshilovsky@samba.org) 2012
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <asm/div64.h>
+#include "cifsfs.h"
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+#include "cifs_fs_sb.h"
+#include "cifs_unicode.h"
+#include "fscache.h"
+#include "smb2proto.h"
+
+int
+smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
+	       int disposition, int desired_access, int create_options,
+	       struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf,
+	       struct cifs_sb_info *cifs_sb)
+{
+	int rc;
+	__le16 *smb2_path;
+	struct smb2_file_all_info *smb2_data = NULL;
+
+	smb2_path = cifs_convert_path_to_utf16(path, cifs_sb);
+	if (smb2_path == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+			    GFP_KERNEL);
+	if (smb2_data == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	desired_access |= FILE_READ_ATTRIBUTES;
+
+	rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid,
+		       &fid->volatile_fid, desired_access, disposition,
+		       0, 0, smb2_data);
+	if (rc)
+		goto out;
+
+	if (buf) {
+		/* open response does not have IndexNumber field - get it */
+		rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid,
+				      fid->volatile_fid,
+				      &smb2_data->IndexNumber);
+		if (rc) {
+			/* let get_inode_info disable server inode numbers */
+			smb2_data->IndexNumber = 0;
+			rc = 0;
+		}
+		move_smb2_info_to_cifs(buf, smb2_data);
+	}
+
+out:
+	*oplock = 0;
+	kfree(smb2_data);
+	kfree(smb2_path);
+	return rc;
+}
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 02a9bda4248c..ee3a1ef686dd 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -54,7 +54,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
 		       desired_access, create_disposition, file_attributes,
-		       create_options);
+		       create_options, NULL);
 	if (rc) {
 		kfree(utf16_path);
 		return rc;
@@ -86,7 +86,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
-static void
+void
 move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src)
 {
 	memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index bf9b318cfa4b..eba12b31dc60 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -170,7 +170,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
 		return -ENOMEM;
 
 	rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
-		       FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0);
+		       FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, NULL);
 	if (rc) {
 		kfree(utf16_path);
 		return rc;
@@ -292,6 +292,23 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 #endif
 }
 
+static void
+smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
+{
+	/* struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); */
+	cfile->fid.persistent_fid = fid->persistent_fid;
+	cfile->fid.volatile_fid = fid->volatile_fid;
+	/* cifs_set_oplock_level(cinode, oplock); */
+	/* cinode->can_cache_brlcks = cinode->clientCanCacheAll; */
+}
+
+static int
+smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
+		struct cifs_fid *fid)
+{
+	return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -322,6 +339,9 @@ struct smb_version_operations smb21_operations = {
 	.mkdir_setinfo = smb2_mkdir_setinfo,
 	.rmdir = smb2_rmdir,
 	.unlink = smb2_unlink,
+	.open = smb2_open_file,
+	.set_fid = smb2_set_fid,
+	.close = smb2_close_file,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 62b3f17d0613..231e9701ab85 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -833,7 +833,8 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 int
 SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	  u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access,
-	  __u32 create_disposition, __u32 file_attributes, __u32 create_options)
+	  __u32 create_disposition, __u32 file_attributes, __u32 create_options,
+	  struct smb2_file_all_info *buf)
 {
 	struct smb2_create_req *req;
 	struct smb2_create_rsp *rsp;
@@ -856,9 +857,9 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	if (rc)
 		return rc;
 
-	if (enable_oplocks)
+	/* if (server->oplocks)
 		req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH;
-	else
+	else */
 		req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
 	req->ImpersonationLevel = IL_IMPERSONATION;
 	req->DesiredAccess = cpu_to_le32(desired_access);
@@ -906,6 +907,15 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	}
 	*persistent_fid = rsp->PersistentFileId;
 	*volatile_fid = rsp->VolatileFileId;
+
+	if (buf) {
+		memcpy(buf, &rsp->CreationTime, 32);
+		buf->AllocationSize = rsp->AllocationSize;
+		buf->EndOfFile = rsp->EndofFile;
+		buf->Attributes = rsp->FileAttributes;
+		buf->NumberOfLinks = cpu_to_le32(1);
+		buf->DeletePending = 0;
+	}
 creat_exit:
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
@@ -1019,10 +1029,10 @@ validate_and_copy_buf(unsigned int offset, unsigned int buffer_length,
 	return 0;
 }
 
-int
-SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
-		u64 persistent_fid, u64 volatile_fid,
-		struct smb2_file_all_info *data)
+static int
+query_info(const unsigned int xid, struct cifs_tcon *tcon,
+	   u64 persistent_fid, u64 volatile_fid, u8 info_class,
+	   size_t output_len, size_t min_len, void *data)
 {
 	struct smb2_query_info_req *req;
 	struct smb2_query_info_rsp *rsp = NULL;
@@ -1044,14 +1054,13 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 		return rc;
 
 	req->InfoType = SMB2_O_INFO_FILE;
-	req->FileInfoClass = FILE_ALL_INFORMATION;
+	req->FileInfoClass = info_class;
 	req->PersistentFileId = persistent_fid;
 	req->VolatileFileId = volatile_fid;
 	/* 4 for rfc1002 length field and 1 for Buffer */
 	req->InputBufferOffset =
 		cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
-	req->OutputBufferLength =
-		cpu_to_le32(sizeof(struct smb2_file_all_info) + MAX_NAME * 2);
+	req->OutputBufferLength = cpu_to_le32(output_len);
 
 	iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field */
@@ -1067,14 +1076,34 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset),
 				   le32_to_cpu(rsp->OutputBufferLength),
-				   &rsp->hdr, sizeof(struct smb2_file_all_info),
-				   (char *)data);
+				   &rsp->hdr, min_len, data);
 
 qinf_exit:
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
 
+int
+SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
+		u64 persistent_fid, u64 volatile_fid,
+		struct smb2_file_all_info *data)
+{
+	return query_info(xid, tcon, persistent_fid, volatile_fid,
+			  FILE_ALL_INFORMATION,
+			  sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+			  sizeof(struct smb2_file_all_info), data);
+}
+
+int
+SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
+		 u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid)
+{
+	return query_info(xid, tcon, persistent_fid, volatile_fid,
+			  FILE_INTERNAL_INFORMATION,
+			  sizeof(struct smb2_file_internal_info),
+			  sizeof(struct smb2_file_internal_info), uniqueid);
+}
+
 /*
  * This is a no-op for now. We're not really interested in the reply, but
  * rather in the fact that the server sent one and that server->lstrp
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 15dc8eea8273..0e962cbf8166 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -548,6 +548,10 @@ struct smb2_query_info_rsp {
 #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50
 #define FILE_STANDARD_LINK_INFORMATION	54
 
+struct smb2_file_internal_info {
+	__le64 IndexNumber;
+} __packed; /* level 6 Query */
+
 /*
  * This level 18, although with struct with same name is different from cifs
  * level 0x107. Level 0x107 has an extra u64 between AccessFlags and
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index f4ac72799f70..624d344e1a57 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -48,6 +48,8 @@ extern int smb2_setup_async_request(struct TCP_Server_Info *server,
 				    struct mid_q_entry **ret_mid);
 extern void smb2_echo_request(struct work_struct *work);
 
+extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
+				   struct smb2_file_all_info *src);
 extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 				struct cifs_sb_info *cifs_sb,
 				const char *full_path, FILE_ALL_INFO *data,
@@ -62,6 +64,12 @@ extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
 extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon,
 		       const char *name, struct cifs_sb_info *cifs_sb);
 
+extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
+			  const char *full_path, int disposition,
+			  int desired_access, int create_options,
+			  struct cifs_fid *fid, __u32 *oplock,
+			  FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb);
+
 /*
  * SMB2 Worker functions - most of protocol specific implementation details
  * are contained within these calls.
@@ -77,12 +85,16 @@ extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon);
 extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon,
 		     __le16 *path, u64 *persistent_fid, u64 *volatile_fid,
 		     __u32 desired_access, __u32 create_disposition,
-		     __u32 file_attributes, __u32 create_options);
+		     __u32 file_attributes, __u32 create_options,
+		     struct smb2_file_all_info *buf);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 			   u64 persistent_file_id, u64 volatile_file_id,
 			   struct smb2_file_all_info *data);
+extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
+			    u64 persistent_fid, u64 volatile_fid,
+			    __le64 *uniqueid);
 extern int SMB2_echo(struct TCP_Server_Info *server);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From 4ad6504453644f57f7b6cf9c7bfc9d1372c5ad15 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:26 -0700
Subject: CIFS: Move guery file info code to ops struct

and make cifs_get_file_info(_unix) calls static.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  3 +++
 fs/cifs/cifsproto.h |  2 --
 fs/cifs/inode.c     | 12 +++++++++---
 fs/cifs/smb1ops.c   |  8 ++++++++
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8a69dae81d3a..500ecb921b85 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -236,6 +236,9 @@ struct smb_version_operations {
 	int (*query_path_info)(const unsigned int, struct cifs_tcon *,
 			       struct cifs_sb_info *, const char *,
 			       FILE_ALL_INFO *, bool *);
+	/* query file data from the server */
+	int (*query_file_info)(const unsigned int, struct cifs_tcon *,
+			       struct cifs_fid *, FILE_ALL_INFO *);
 	/* get server index number */
 	int (*get_srv_inum)(const unsigned int, struct cifs_tcon *,
 			    struct cifs_sb_info *, const char *,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b67f7758f422..2c6ad78a16cc 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -137,11 +137,9 @@ extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
 extern struct inode *cifs_iget(struct super_block *sb,
 			       struct cifs_fattr *fattr);
 
-extern int cifs_get_file_info(struct file *filp);
 extern int cifs_get_inode_info(struct inode **inode, const char *full_path,
 			       FILE_ALL_INFO *data, struct super_block *sb,
 			       int xid, const __u16 *fid);
-extern int cifs_get_file_info_unix(struct file *filp);
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, unsigned int xid);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ea7428a82a31..c6f6b02cf3b5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -282,7 +282,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 	fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
 }
 
-int cifs_get_file_info_unix(struct file *filp)
+static int
+cifs_get_file_info_unix(struct file *filp)
 {
 	int rc;
 	unsigned int xid;
@@ -550,7 +551,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 	fattr->cf_gid = cifs_sb->mnt_gid;
 }
 
-int cifs_get_file_info(struct file *filp)
+static int
+cifs_get_file_info(struct file *filp)
 {
 	int rc;
 	unsigned int xid;
@@ -560,9 +562,13 @@ int cifs_get_file_info(struct file *filp)
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsFileInfo *cfile = filp->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+
+	if (!server->ops->query_file_info)
+		return -ENOSYS;
 
 	xid = get_xid();
-	rc = CIFSSMBQFileInfo(xid, tcon, cfile->fid.netfid, &find_data);
+	rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data);
 	switch (rc) {
 	case 0:
 		cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index bb758476e139..cbbc122a501a 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -489,6 +489,13 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 }
 
+static int
+cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
+		     struct cifs_fid *fid, FILE_ALL_INFO *data)
+{
+	return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data);
+}
+
 static char *
 cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 			struct cifs_tcon *tcon)
@@ -672,6 +679,7 @@ struct smb_version_operations smb1_operations = {
 	.qfs_tcon = cifs_qfs_tcon,
 	.is_path_accessible = cifs_is_path_accessible,
 	.query_path_info = cifs_query_path_info,
+	.query_file_info = cifs_query_file_info,
 	.get_srv_inum = cifs_get_srv_inum,
 	.build_path_to_root = cifs_build_path_to_root,
 	.echo = CIFSSMBEcho,
-- 
cgit 


From b7546bc54c4acf1a79c83030fa0591cd24875125 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:27 -0700
Subject: CIFS: Add SMB2 support for query_file_info

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index eba12b31dc60..0fd580168252 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -190,6 +190,26 @@ smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
 	return 0;
 }
 
+static int
+smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
+		     struct cifs_fid *fid, FILE_ALL_INFO *data)
+{
+	int rc;
+	struct smb2_file_all_info *smb2_data;
+
+	smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+			    GFP_KERNEL);
+	if (smb2_data == NULL)
+		return -ENOMEM;
+
+	rc = SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid,
+			     smb2_data);
+	if (!rc)
+		move_smb2_info_to_cifs(data, smb2_data);
+	kfree(smb2_data);
+	return rc;
+}
+
 static char *
 smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 			struct cifs_tcon *tcon)
@@ -334,6 +354,7 @@ struct smb_version_operations smb21_operations = {
 	.echo = SMB2_echo,
 	.query_path_info = smb2_query_path_info,
 	.get_srv_inum = smb2_get_srv_inum,
+	.query_file_info = smb2_query_file_info,
 	.build_path_to_root = smb2_build_path_to_root,
 	.mkdir = smb2_mkdir,
 	.mkdir_setinfo = smb2_mkdir_setinfo,
-- 
cgit 


From 253641388a49259f6bfefecfb14fa057ca58dc21 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:27 -0700
Subject: CIFS: Move create code use ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/dir.c     | 95 ++++++++++++++++++++++++++++++-------------------------
 fs/cifs/file.c    | 10 +++---
 fs/cifs/smb1ops.c |  2 +-
 3 files changed, 58 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 70823dc4f960..b99a1670dad4 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -160,17 +160,18 @@ check_name(struct dentry *direntry)
 static int
 cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 	       struct tcon_link *tlink, unsigned oflags, umode_t mode,
-	       __u32 *oplock, __u16 *fileHandle, int *created)
+	       __u32 *oplock, struct cifs_fid *fid, int *created)
 {
 	int rc = -ENOENT;
 	int create_options = CREATE_NOT_DIR;
-	int desiredAccess;
+	int desired_access;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifs_tcon *tcon = tlink_tcon(tlink);
 	char *full_path = NULL;
 	FILE_ALL_INFO *buf = NULL;
 	struct inode *newinode = NULL;
 	int disposition;
+	struct TCP_Server_Info *server = tcon->ses->server;
 
 	*oplock = 0;
 	if (tcon->ses->server->oplocks)
@@ -185,8 +186,8 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 	if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 			le64_to_cpu(tcon->fsUnixInfo.Capability))) {
-		rc = cifs_posix_open(full_path, &newinode,
-			inode->i_sb, mode, oflags, oplock, fileHandle, xid);
+		rc = cifs_posix_open(full_path, &newinode, inode->i_sb, mode,
+				     oflags, oplock, &fid->netfid, xid);
 		switch (rc) {
 		case 0:
 			if (newinode == NULL) {
@@ -202,7 +203,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 				 * close it and proceed as if it were a normal
 				 * lookup.
 				 */
-				CIFSSMBClose(xid, tcon, *fileHandle);
+				CIFSSMBClose(xid, tcon, fid->netfid);
 				goto cifs_create_get_file_info;
 			}
 			/* success, no need to query */
@@ -244,11 +245,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 		 */
 	}
 
-	desiredAccess = 0;
+	desired_access = 0;
 	if (OPEN_FMODE(oflags) & FMODE_READ)
-		desiredAccess |= GENERIC_READ; /* is this too little? */
+		desired_access |= GENERIC_READ; /* is this too little? */
 	if (OPEN_FMODE(oflags) & FMODE_WRITE)
-		desiredAccess |= GENERIC_WRITE;
+		desired_access |= GENERIC_WRITE;
 
 	disposition = FILE_OVERWRITE_IF;
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -260,8 +261,15 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 	else
 		cFYI(1, "Create flag not set in create function");
 
-	/* BB add processing to set equivalent of mode - e.g. via CreateX with
-	   ACLs */
+	/*
+	 * BB add processing to set equivalent of mode - e.g. via CreateX with
+	 * ACLs
+	 */
+
+	if (!server->ops->open) {
+		rc = -ENOSYS;
+		goto out;
+	}
 
 	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 	if (buf == NULL) {
@@ -279,28 +287,18 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
-	if (tcon->ses->capabilities & CAP_NT_SMBS)
-		rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
-			 desiredAccess, create_options,
-			 fileHandle, oplock, buf, cifs_sb->local_nls,
-			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-	else
-		rc = -EIO; /* no NT SMB support fall into legacy open below */
-
-	if (rc == -EIO) {
-		/* old server, retry the open legacy style */
-		rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
-			desiredAccess, create_options,
-			fileHandle, oplock, buf, cifs_sb->local_nls,
-			cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-	}
+	rc = server->ops->open(xid, tcon, full_path, disposition,
+			       desired_access, create_options, fid, oplock,
+			       buf, cifs_sb);
 	if (rc) {
 		cFYI(1, "cifs_create returned 0x%x", rc);
 		goto out;
 	}
 
-	/* If Open reported that we actually created a file
-	   then we now have to set the mode if possible */
+	/*
+	 * If Open reported that we actually created a file then we now have to
+	 * set the mode if possible.
+	 */
 	if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
 		struct cifs_unix_set_info_args args = {
 				.mode	= mode,
@@ -321,11 +319,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 			args.uid = NO_CHANGE_64;
 			args.gid = NO_CHANGE_64;
 		}
-		CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle,
-					current->tgid);
+		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid,
+				       current->tgid);
 	} else {
-		/* BB implement mode setting via Windows security
-		   descriptors e.g. */
+		/*
+		 * BB implement mode setting via Windows security
+		 * descriptors e.g.
+		 */
 		/* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/
 
 		/* Could set r/o dos attribute if mode & 0222 == 0 */
@@ -334,11 +334,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
 cifs_create_get_file_info:
 	/* server might mask mode so we have to query for it */
 	if (tcon->unix_ext)
-		rc = cifs_get_inode_info_unix(&newinode, full_path,
-					      inode->i_sb, xid);
+		rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb,
+					      xid);
 	else {
-		rc = cifs_get_inode_info(&newinode, full_path, buf,
-					 inode->i_sb, xid, fileHandle);
+		rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb,
+					 xid, &fid->netfid);
 		if (newinode) {
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
 				newinode->i_mode = mode;
@@ -356,7 +356,8 @@ cifs_create_get_file_info:
 cifs_create_set_dentry:
 	if (rc != 0) {
 		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
-		CIFSSMBClose(xid, tcon, *fileHandle);
+		if (server->ops->close)
+			server->ops->close(xid, tcon, fid);
 		goto out;
 	}
 	d_drop(direntry);
@@ -377,6 +378,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	unsigned int xid;
 	struct tcon_link *tlink;
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	struct cifs_fid fid;
 	__u32 oplock;
 	struct cifsFileInfo *file_info;
@@ -414,22 +416,25 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		goto out_free_xid;
 
 	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
 
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
-			    &oplock, &fid.netfid, opened);
+			    &oplock, &fid, opened);
 
 	if (rc)
 		goto out;
 
 	rc = finish_open(file, direntry, generic_file_open, opened);
 	if (rc) {
-		CIFSSMBClose(xid, tcon, fid.netfid);
+		if (server->ops->close)
+			server->ops->close(xid, tcon, &fid);
 		goto out;
 	}
 
 	file_info = cifs_new_fileinfo(&fid, file, tlink, oplock);
 	if (file_info == NULL) {
-		CIFSSMBClose(xid, tcon, fid.netfid);
+		if (server->ops->close)
+			server->ops->close(xid, tcon, &fid);
 		rc = -ENOMEM;
 	}
 
@@ -454,7 +459,9 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 	 */
 	unsigned oflags = O_EXCL | O_CREAT | O_RDWR;
 	struct tcon_link *tlink;
-	__u16 fileHandle;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	struct cifs_fid fid;
 	__u32 oplock;
 	int created = FILE_CREATED;
 
@@ -467,9 +474,11 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 		goto out_free_xid;
 
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
-			    &oplock, &fileHandle, &created);
-	if (!rc)
-		CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle);
+			    &oplock, &fid, &created);
+	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
+	if (!rc && server->ops->close)
+		server->ops->close(xid, tcon, &fid);
 
 	cifs_put_tlink(tlink);
 out_free_xid:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 14938ee4f6e4..dbca2b293e55 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -312,13 +312,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	if (list_empty(&cifsi->openFileList)) {
 		cFYI(1, "closing last open instance for inode %p",
 			cifs_file->dentry->d_inode);
-
-		/* in strict cache mode we need invalidate mapping on the last
-		   close  because it may cause a error when we open this file
-		   again and get at least level II oplock */
+		/*
+		 * In strict cache mode we need invalidate mapping on the last
+		 * close  because it may cause a error when we open this file
+		 * again and get at least level II oplock.
+		 */
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 			CIFS_I(inode)->invalid_mapping = true;
-
 		cifs_set_oplock_level(cifsi, 0);
 	}
 	spin_unlock(&cifs_file_list_lock);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index cbbc122a501a..dd64754ed8cb 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -622,7 +622,7 @@ cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 {
 	if (!(tcon->ses->capabilities & CAP_NT_SMBS))
 		return SMBLegacyOpen(xid, tcon, path, disposition,
-				     desired_access, CREATE_NOT_DIR,
+				     desired_access, create_options,
 				     &fid->netfid, oplock, buf,
 				     cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
 						& CIFS_MOUNT_MAP_SPECIAL_CHR);
-- 
cgit 


From 2ae78ba85cd7b4c3a164434ab1f6c9e515c6fa1d Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:27 -0700
Subject: CIFS: Move reopen code to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/file.c | 121 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 64 insertions(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index dbca2b293e55..628ee17007f8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -459,59 +459,66 @@ out:
 	return rc;
 }
 
-/* Try to reacquire byte range locks that were released when session */
-/* to server was lost */
+/*
+ * Try to reacquire byte range locks that were released when session
+ * to server was lost
+ */
 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
 {
 	int rc = 0;
 
-/* BB list all locks open on this file and relock */
+	/* BB list all locks open on this file and relock */
 
 	return rc;
 }
 
-static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
+static int
+cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 {
 	int rc = -EACCES;
 	unsigned int xid;
 	__u32 oplock;
 	struct cifs_sb_info *cifs_sb;
 	struct cifs_tcon *tcon;
-	struct cifsInodeInfo *pCifsInode;
+	struct TCP_Server_Info *server;
+	struct cifsInodeInfo *cinode;
 	struct inode *inode;
 	char *full_path = NULL;
-	int desiredAccess;
+	int desired_access;
 	int disposition = FILE_OPEN;
 	int create_options = CREATE_NOT_DIR;
-	__u16 netfid;
+	struct cifs_fid fid;
 
 	xid = get_xid();
-	mutex_lock(&pCifsFile->fh_mutex);
-	if (!pCifsFile->invalidHandle) {
-		mutex_unlock(&pCifsFile->fh_mutex);
+	mutex_lock(&cfile->fh_mutex);
+	if (!cfile->invalidHandle) {
+		mutex_unlock(&cfile->fh_mutex);
 		rc = 0;
 		free_xid(xid);
 		return rc;
 	}
 
-	inode = pCifsFile->dentry->d_inode;
+	inode = cfile->dentry->d_inode;
 	cifs_sb = CIFS_SB(inode->i_sb);
-	tcon = tlink_tcon(pCifsFile->tlink);
+	tcon = tlink_tcon(cfile->tlink);
+	server = tcon->ses->server;
 
-/* can not grab rename sem here because various ops, including
-   those that already have the rename sem can end up causing writepage
-   to get called and if the server was down that means we end up here,
-   and we can never tell if the caller already has the rename_sem */
-	full_path = build_path_from_dentry(pCifsFile->dentry);
+	/*
+	 * Can not grab rename sem here because various ops, including those
+	 * that already have the rename sem can end up causing writepage to get
+	 * called and if the server was down that means we end up here, and we
+	 * can never tell if the caller already has the rename_sem.
+	 */
+	full_path = build_path_from_dentry(cfile->dentry);
 	if (full_path == NULL) {
 		rc = -ENOMEM;
-		mutex_unlock(&pCifsFile->fh_mutex);
+		mutex_unlock(&cfile->fh_mutex);
 		free_xid(xid);
 		return rc;
 	}
 
-	cFYI(1, "inode = 0x%p file flags 0x%x for %s",
-		 inode, pCifsFile->f_flags, full_path);
+	cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
+	     full_path);
 
 	if (tcon->ses->server->oplocks)
 		oplock = REQ_OPLOCK;
@@ -525,69 +532,69 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
 		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 		 * original open. Must mask them off for a reopen.
 		 */
-		unsigned int oflags = pCifsFile->f_flags &
+		unsigned int oflags = cfile->f_flags &
 						~(O_CREAT | O_EXCL | O_TRUNC);
 
 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
-				cifs_sb->mnt_file_mode /* ignored */,
-				oflags, &oplock, &netfid, xid);
+				     cifs_sb->mnt_file_mode /* ignored */,
+				     oflags, &oplock, &fid.netfid, xid);
 		if (rc == 0) {
 			cFYI(1, "posix reopen succeeded");
 			goto reopen_success;
 		}
-		/* fallthrough to retry open the old way on errors, especially
-		   in the reconnect path it is important to retry hard */
+		/*
+		 * fallthrough to retry open the old way on errors, especially
+		 * in the reconnect path it is important to retry hard
+		 */
 	}
 
-	desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
+	desired_access = cifs_convert_flags(cfile->f_flags);
 
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
-	/* Can not refresh inode by passing in file_info buf to be returned
-	   by SMBOpen and then calling get_inode_info with returned buf
-	   since file might have write behind data that needs to be flushed
-	   and server version of file size can be stale. If we knew for sure
-	   that inode was not dirty locally we could do this */
-
-	rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
-			 create_options, &netfid, &oplock, NULL,
-			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
+	/*
+	 * Can not refresh inode by passing in file_info buf to be returned by
+	 * CIFSSMBOpen and then calling get_inode_info with returned buf since
+	 * file might have write behind data that needs to be flushed and server
+	 * version of file size can be stale. If we knew for sure that inode was
+	 * not dirty locally we could do this.
+	 */
+	rc = server->ops->open(xid, tcon, full_path, disposition,
+			       desired_access, create_options, &fid, &oplock,
+			       NULL, cifs_sb);
 	if (rc) {
-		mutex_unlock(&pCifsFile->fh_mutex);
-		cFYI(1, "cifs_open returned 0x%x", rc);
+		mutex_unlock(&cfile->fh_mutex);
+		cFYI(1, "cifs_reopen returned 0x%x", rc);
 		cFYI(1, "oplock: %d", oplock);
 		goto reopen_error_exit;
 	}
 
 reopen_success:
-	pCifsFile->fid.netfid = netfid;
-	pCifsFile->invalidHandle = false;
-	mutex_unlock(&pCifsFile->fh_mutex);
-	pCifsInode = CIFS_I(inode);
+	cfile->invalidHandle = false;
+	mutex_unlock(&cfile->fh_mutex);
+	cinode = CIFS_I(inode);
 
 	if (can_flush) {
 		rc = filemap_write_and_wait(inode->i_mapping);
 		mapping_set_error(inode->i_mapping, rc);
 
 		if (tcon->unix_ext)
-			rc = cifs_get_inode_info_unix(&inode,
-				full_path, inode->i_sb, xid);
+			rc = cifs_get_inode_info_unix(&inode, full_path,
+						      inode->i_sb, xid);
 		else
-			rc = cifs_get_inode_info(&inode,
-				full_path, NULL, inode->i_sb,
-				xid, NULL);
-	} /* else we are writing out data to server already
-	     and could deadlock if we tried to flush data, and
-	     since we do not know if we have data that would
-	     invalidate the current end of file on the server
-	     we can not go to the server to get the new inod
-	     info */
-
-	cifs_set_oplock_level(pCifsInode, oplock);
-
-	cifs_relock_file(pCifsFile);
+			rc = cifs_get_inode_info(&inode, full_path, NULL,
+						 inode->i_sb, xid, NULL);
+	}
+	/*
+	 * Else we are writing out data to server already and could deadlock if
+	 * we tried to flush data, and since we do not know if we have data that
+	 * would invalidate the current end of file on the server we can not go
+	 * to the server to get the new inode info.
+	 */
+
+	server->ops->set_fid(cfile, &fid, oplock);
+	cifs_relock_file(cfile);
 
 reopen_error_exit:
 	kfree(full_path);
-- 
cgit 


From 1d8c4c0009deda22b436b1f0ab9f2885863717fc Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:27 -0700
Subject: CIFS: Make flush code use ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  2 ++
 fs/cifs/file.c     | 20 ++++++++++++++++----
 fs/cifs/smb1ops.c  |  8 ++++++++
 3 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 500ecb921b85..abb831019039 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -274,6 +274,8 @@ struct smb_version_operations {
 	void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
 	/* close a file */
 	int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
+	/* send a flush request to the server */
+	int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 628ee17007f8..aa1dccf0df9e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2062,6 +2062,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
 	unsigned int xid;
 	int rc = 0;
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	struct cifsFileInfo *smbfile = file->private_data;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -2085,8 +2086,13 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
 	}
 
 	tcon = tlink_tcon(smbfile->tlink);
-	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
-		rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid);
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+		server = tcon->ses->server;
+		if (server->ops->flush)
+			rc = server->ops->flush(xid, tcon, &smbfile->fid);
+		else
+			rc = -ENOSYS;
+	}
 
 	free_xid(xid);
 	mutex_unlock(&inode->i_mutex);
@@ -2098,6 +2104,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	unsigned int xid;
 	int rc = 0;
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	struct cifsFileInfo *smbfile = file->private_data;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	struct inode *inode = file->f_mapping->host;
@@ -2113,8 +2120,13 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 		file->f_path.dentry->d_name.name, datasync);
 
 	tcon = tlink_tcon(smbfile->tlink);
-	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
-		rc = CIFSSMBFlush(xid, tcon, smbfile->fid.netfid);
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+		server = tcon->ses->server;
+		if (server->ops->flush)
+			rc = server->ops->flush(xid, tcon, &smbfile->fid);
+		else
+			rc = -ENOSYS;
+	}
 
 	free_xid(xid);
 	mutex_unlock(&inode->i_mutex);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index dd64754ed8cb..df20dd9e64ca 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -648,6 +648,13 @@ cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
 	return CIFSSMBClose(xid, tcon, fid->netfid);
 }
 
+static int
+cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
+		struct cifs_fid *fid)
+{
+	return CIFSSMBFlush(xid, tcon, fid->netfid);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -691,6 +698,7 @@ struct smb_version_operations smb1_operations = {
 	.open = cifs_open_file,
 	.set_fid = cifs_set_fid,
 	.close = cifs_close_file,
+	.flush = cifs_flush_file,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 7a5cfb1965854132f2f382eade8c6ce2eeb6f692 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:28 -0700
Subject: CIFS: Add SMB2 support for flush

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c   |  8 ++++++++
 fs/cifs/smb2pdu.c   | 38 ++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   | 15 +++++++++++++++
 fs/cifs/smb2proto.h |  2 ++
 4 files changed, 63 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0fd580168252..d81e1daeb8f0 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -329,6 +329,13 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
 	return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 }
 
+static int
+smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
+		struct cifs_fid *fid)
+{
+	return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -363,6 +370,7 @@ struct smb_version_operations smb21_operations = {
 	.open = smb2_open_file,
 	.set_fid = smb2_set_fid,
 	.close = smb2_close_file,
+	.flush = smb2_flush_file,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 231e9701ab85..ff374063f4e2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1152,3 +1152,41 @@ SMB2_echo(struct TCP_Server_Info *server)
 	cifs_small_buf_release(req);
 	return rc;
 }
+
+int
+SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
+	   u64 volatile_fid)
+{
+	struct smb2_flush_req *req;
+	struct TCP_Server_Info *server;
+	struct cifs_ses *ses = tcon->ses;
+	struct kvec iov[1];
+	int resp_buftype;
+	int rc = 0;
+
+	cFYI(1, "Flush");
+
+	if (ses && (ses->server))
+		server = ses->server;
+	else
+		return -EIO;
+
+	rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req);
+	if (rc)
+		return rc;
+
+	req->PersistentFileId = persistent_fid;
+	req->VolatileFileId = volatile_fid;
+
+	iov[0].iov_base = (char *)req;
+	/* 4 for rfc1002 length field */
+	iov[0].iov_len = get_rfc1002_length(req) + 4;
+
+	rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
+
+	if ((rc != 0) && tcon)
+		cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
+
+	free_rsp_buf(resp_buftype, iov[0].iov_base);
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 0e962cbf8166..f5bf63f66971 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -453,6 +453,21 @@ struct smb2_close_rsp {
 	__le32 Attributes;
 } __packed;
 
+struct smb2_flush_req {
+	struct smb2_hdr hdr;
+	__le16 StructureSize;	/* Must be 24 */
+	__le16 Reserved1;
+	__le32 Reserved2;
+	__u64  PersistentFileId; /* opaque endianness */
+	__u64  VolatileFileId; /* opaque endianness */
+} __packed;
+
+struct smb2_flush_rsp {
+	struct smb2_hdr hdr;
+	__le16 StructureSize;
+	__le16 Reserved;
+} __packed;
+
 struct smb2_echo_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;	/* Must be 4 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 624d344e1a57..51e6cd185c79 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -89,6 +89,8 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon,
 		     struct smb2_file_all_info *buf);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
+extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
+		      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 			   u64 persistent_file_id, u64 volatile_file_id,
 			   struct smb2_file_all_info *data);
-- 
cgit 


From 24985c53d5b04a56ac7c8ae7f74b8cb807e2ed2f Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:28 -0700
Subject: CIFS: Move r/wsize negotiating to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  61 +++++++++++++++++++++++
 fs/cifs/connect.c  | 144 +----------------------------------------------------
 fs/cifs/smb1ops.c  |  86 ++++++++++++++++++++++++++++++++
 3 files changed, 149 insertions(+), 142 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index abb831019039..e5cb1941e251 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -213,6 +213,10 @@ struct smb_version_operations {
 	bool (*need_neg)(struct TCP_Server_Info *);
 	/* negotiate to the server */
 	int (*negotiate)(const unsigned int, struct cifs_ses *);
+	/* set negotiated write size */
+	unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *);
+	/* set negotiated read size */
+	unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *);
 	/* setup smb sessionn */
 	int (*sess_setup)(const unsigned int, struct cifs_ses *,
 			  const struct nls_table *);
@@ -515,6 +519,63 @@ get_next_mid(struct TCP_Server_Info *server)
 	return server->ops->get_next_mid(server);
 }
 
+/*
+ * When the server supports very large reads and writes via POSIX extensions,
+ * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not
+ * including the RFC1001 length.
+ *
+ * Note that this might make for "interesting" allocation problems during
+ * writeback however as we have to allocate an array of pointers for the
+ * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
+ *
+ * For reads, there is a similar problem as we need to allocate an array
+ * of kvecs to handle the receive, though that should only need to be done
+ * once.
+ */
+#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
+#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4)
+
+/*
+ * When the server doesn't allow large posix writes, only allow a rsize/wsize
+ * of 2^17-1 minus the size of the call header. That allows for a read or
+ * write up to the maximum size described by RFC1002.
+ */
+#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
+#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4)
+
+/*
+ * The default wsize is 1M. find_get_pages seems to return a maximum of 256
+ * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
+ * a single wsize request with a single call.
+ */
+#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
+
+/*
+ * Windows only supports a max of 60kb reads and 65535 byte writes. Default to
+ * those values when posix extensions aren't in force. In actuality here, we
+ * use 65536 to allow for a write that is a multiple of 4k. Most servers seem
+ * to be ok with the extra byte even though Windows doesn't send writes that
+ * are that large.
+ *
+ * Citation:
+ *
+ * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx
+ */
+#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
+#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
+
+/*
+ * On hosts with high memory, we can't currently support wsize/rsize that are
+ * larger than we can kmap at once. Cap the rsize/wsize at
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
+ * larger than that anyway.
+ */
+#ifdef CONFIG_HIGHMEM
+#define CIFS_KMAP_SIZE_LIMIT   (LAST_PKMAP * PAGE_CACHE_SIZE)
+#else /* CONFIG_HIGHMEM */
+#define CIFS_KMAP_SIZE_LIMIT   (1<<24)
+#endif /* CONFIG_HIGHMEM */
+
 /*
  * Macros to allow the TCP_Server_Info->net field and related code to drop out
  * when CONFIG_NET_NS isn't set.
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6df6fa14cba8..c31b30b572e0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3261,146 +3261,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 			   "mount option supported");
 }
 
-/*
- * When the server supports very large reads and writes via POSIX extensions,
- * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not
- * including the RFC1001 length.
- *
- * Note that this might make for "interesting" allocation problems during
- * writeback however as we have to allocate an array of pointers for the
- * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
- *
- * For reads, there is a similar problem as we need to allocate an array
- * of kvecs to handle the receive, though that should only need to be done
- * once.
- */
-#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
-#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4)
-
-/*
- * When the server doesn't allow large posix writes, only allow a rsize/wsize
- * of 2^17-1 minus the size of the call header. That allows for a read or
- * write up to the maximum size described by RFC1002.
- */
-#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
-#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4)
-
-/*
- * The default wsize is 1M. find_get_pages seems to return a maximum of 256
- * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
- * a single wsize request with a single call.
- */
-#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
-
-/*
- * Windows only supports a max of 60kb reads and 65535 byte writes. Default to
- * those values when posix extensions aren't in force. In actuality here, we
- * use 65536 to allow for a write that is a multiple of 4k. Most servers seem
- * to be ok with the extra byte even though Windows doesn't send writes that
- * are that large.
- *
- * Citation:
- *
- * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx
- */
-#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
-#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
-
-/*
- * On hosts with high memory, we can't currently support wsize/rsize that are
- * larger than we can kmap at once. Cap the rsize/wsize at
- * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
- * larger than that anyway.
- */
-#ifdef CONFIG_HIGHMEM
-#define CIFS_KMAP_SIZE_LIMIT	(LAST_PKMAP * PAGE_CACHE_SIZE)
-#else /* CONFIG_HIGHMEM */
-#define CIFS_KMAP_SIZE_LIMIT	(1<<24)
-#endif /* CONFIG_HIGHMEM */
-
-static unsigned int
-cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
-{
-	__u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
-	struct TCP_Server_Info *server = tcon->ses->server;
-	unsigned int wsize;
-
-	/* start with specified wsize, or default */
-	if (pvolume_info->wsize)
-		wsize = pvolume_info->wsize;
-	else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
-		wsize = CIFS_DEFAULT_IOSIZE;
-	else
-		wsize = CIFS_DEFAULT_NON_POSIX_WSIZE;
-
-	/* can server support 24-bit write sizes? (via UNIX extensions) */
-	if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
-		wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
-
-	/*
-	 * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
-	 * Limit it to max buffer offered by the server, minus the size of the
-	 * WRITEX header, not including the 4 byte RFC1001 length.
-	 */
-	if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
-	    (!(server->capabilities & CAP_UNIX) &&
-	     (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
-		wsize = min_t(unsigned int, wsize,
-				server->maxBuf - sizeof(WRITE_REQ) + 4);
-
-	/* limit to the amount that we can kmap at once */
-	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
-
-	/* hard limit of CIFS_MAX_WSIZE */
-	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
-
-	return wsize;
-}
-
-static unsigned int
-cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
-{
-	__u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
-	struct TCP_Server_Info *server = tcon->ses->server;
-	unsigned int rsize, defsize;
-
-	/*
-	 * Set default value...
-	 *
-	 * HACK alert! Ancient servers have very small buffers. Even though
-	 * MS-CIFS indicates that servers are only limited by the client's
-	 * bufsize for reads, testing against win98se shows that it throws
-	 * INVALID_PARAMETER errors if you try to request too large a read.
-	 * OS/2 just sends back short reads.
-	 *
-	 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
-	 * it can't handle a read request larger than its MaxBufferSize either.
-	 */
-	if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
-		defsize = CIFS_DEFAULT_IOSIZE;
-	else if (server->capabilities & CAP_LARGE_READ_X)
-		defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
-	else
-		defsize = server->maxBuf - sizeof(READ_RSP);
-
-	rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize;
-
-	/*
-	 * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
-	 * the client's MaxBufferSize.
-	 */
-	if (!(server->capabilities & CAP_LARGE_READ_X))
-		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
-
-	/* limit to the amount that we can kmap at once */
-	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
-
-	/* hard limit of CIFS_MAX_RSIZE */
-	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
-
-	return rsize;
-}
-
 static void
 cleanup_volume_info_contents(struct smb_vol *volume_info)
 {
@@ -3651,8 +3511,8 @@ try_mount_again:
 	if (!tcon->ipc && server->ops->qfs_tcon)
 		server->ops->qfs_tcon(xid, tcon);
 
-	cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info);
-	cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info);
+	cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info);
+	cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
 
 	/* tune readahead according to rsize */
 	cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index df20dd9e64ca..7e8a2bdd69c8 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -17,6 +17,7 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include <linux/pagemap.h>
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
@@ -410,6 +411,89 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	return rc;
 }
 
+static unsigned int
+cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+	__u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	unsigned int wsize;
+
+	/* start with specified wsize, or default */
+	if (volume_info->wsize)
+		wsize = volume_info->wsize;
+	else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
+		wsize = CIFS_DEFAULT_IOSIZE;
+	else
+		wsize = CIFS_DEFAULT_NON_POSIX_WSIZE;
+
+	/* can server support 24-bit write sizes? (via UNIX extensions) */
+	if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
+		wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
+
+	/*
+	 * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
+	 * Limit it to max buffer offered by the server, minus the size of the
+	 * WRITEX header, not including the 4 byte RFC1001 length.
+	 */
+	if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
+	    (!(server->capabilities & CAP_UNIX) &&
+	     (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
+		wsize = min_t(unsigned int, wsize,
+				server->maxBuf - sizeof(WRITE_REQ) + 4);
+
+	/* limit to the amount that we can kmap at once */
+	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
+
+	/* hard limit of CIFS_MAX_WSIZE */
+	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
+
+	return wsize;
+}
+
+static unsigned int
+cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+	__u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	unsigned int rsize, defsize;
+
+	/*
+	 * Set default value...
+	 *
+	 * HACK alert! Ancient servers have very small buffers. Even though
+	 * MS-CIFS indicates that servers are only limited by the client's
+	 * bufsize for reads, testing against win98se shows that it throws
+	 * INVALID_PARAMETER errors if you try to request too large a read.
+	 * OS/2 just sends back short reads.
+	 *
+	 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
+	 * it can't handle a read request larger than its MaxBufferSize either.
+	 */
+	if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
+		defsize = CIFS_DEFAULT_IOSIZE;
+	else if (server->capabilities & CAP_LARGE_READ_X)
+		defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
+	else
+		defsize = server->maxBuf - sizeof(READ_RSP);
+
+	rsize = volume_info->rsize ? volume_info->rsize : defsize;
+
+	/*
+	 * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
+	 * the client's MaxBufferSize.
+	 */
+	if (!(server->capabilities & CAP_LARGE_READ_X))
+		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
+
+	/* limit to the amount that we can kmap at once */
+	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
+
+	/* hard limit of CIFS_MAX_RSIZE */
+	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
+
+	return rsize;
+}
+
 static void
 cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
 {
@@ -678,6 +762,8 @@ struct smb_version_operations smb1_operations = {
 	.check_trans2 = cifs_check_trans2,
 	.need_neg = cifs_need_neg,
 	.negotiate = cifs_negotiate,
+	.negotiate_wsize = cifs_negotiate_wsize,
+	.negotiate_rsize = cifs_negotiate_rsize,
 	.sess_setup = CIFS_SessSetup,
 	.logoff = CIFSSMBLogoff,
 	.tree_connect = CIFSTCon,
-- 
cgit 


From 3a3bab509f3f0e7295caab24e9102ce303edb50b Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:28 -0700
Subject: CIFS: Add SMB2 r/wsize negotiating

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d81e1daeb8f0..20afb756e97a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -17,6 +17,7 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include <linux/pagemap.h>
 #include "cifsglob.h"
 #include "smb2pdu.h"
 #include "smb2proto.h"
@@ -157,6 +158,48 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	return rc;
 }
 
+static unsigned int
+smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+	struct TCP_Server_Info *server = tcon->ses->server;
+	unsigned int wsize;
+
+	/* start with specified wsize, or default */
+	wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
+	wsize = min_t(unsigned int, wsize, server->max_write);
+	/*
+	 * limit write size to 2 ** 16, because we don't support multicredit
+	 * requests now.
+	 */
+	wsize = min_t(unsigned int, wsize, 2 << 15);
+
+	/* limit to the amount that we can kmap at once */
+	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
+
+	return wsize;
+}
+
+static unsigned int
+smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+	struct TCP_Server_Info *server = tcon->ses->server;
+	unsigned int rsize;
+
+	/* start with specified rsize, or default */
+	rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
+	rsize = min_t(unsigned int, rsize, server->max_read);
+	/*
+	 * limit write size to 2 ** 16, because we don't support multicredit
+	 * requests now.
+	 */
+	rsize = min_t(unsigned int, rsize, 2 << 15);
+
+	/* limit to the amount that we can kmap at once */
+	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
+
+	return rsize;
+}
+
 static int
 smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
 			struct cifs_sb_info *cifs_sb, const char *full_path)
@@ -352,6 +395,8 @@ struct smb_version_operations smb21_operations = {
 	.print_stats = smb2_print_stats,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
+	.negotiate_wsize = smb2_negotiate_wsize,
+	.negotiate_rsize = smb2_negotiate_rsize,
 	.sess_setup = SMB2_sess_setup,
 	.logoff = SMB2_logoff,
 	.tree_connect = SMB2_tcon,
-- 
cgit 


From fc9c59662e0cd37577556d0de865268baeb9b293 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:28 -0700
Subject: CIFS: Move async read to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h     | 3 +++
 fs/cifs/file.c         | 8 +++++++-
 fs/cifs/smb1ops.c      | 1 +
 fs/cifs/smb2maperror.c | 3 ++-
 4 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e5cb1941e251..fcf81c05635f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -172,6 +172,7 @@ struct dfs_info3_param;
 struct cifs_fattr;
 struct smb_vol;
 struct cifs_fid;
+struct cifs_readdata;
 
 struct smb_version_operations {
 	int (*send_cancel)(struct TCP_Server_Info *, void *,
@@ -280,6 +281,8 @@ struct smb_version_operations {
 	int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
 	/* send a flush request to the server */
 	int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
+	/* async read from the server */
+	int (*async_readv)(struct cifs_readdata *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index aa1dccf0df9e..ec7c2e6bcbdf 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2488,6 +2488,9 @@ static int
 cifs_retry_async_readv(struct cifs_readdata *rdata)
 {
 	int rc;
+	struct TCP_Server_Info *server;
+
+	server = tlink_tcon(rdata->cfile->tlink)->ses->server;
 
 	do {
 		if (rdata->cfile->invalidHandle) {
@@ -2495,7 +2498,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
 			if (rc != 0)
 				continue;
 		}
-		rc = cifs_async_readv(rdata);
+		rc = server->ops->async_readv(rdata);
 	} while (rc == -EAGAIN);
 
 	return rc;
@@ -2647,6 +2650,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
 	open_file = file->private_data;
 	tcon = tlink_tcon(open_file->tlink);
 
+	if (!tcon->ses->server->ops->async_readv)
+		return -ENOSYS;
+
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
 		pid = open_file->pid;
 	else
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 7e8a2bdd69c8..e2dbd22cb136 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -785,6 +785,7 @@ struct smb_version_operations smb1_operations = {
 	.set_fid = cifs_set_fid,
 	.close = cifs_close_file,
 	.flush = cifs_flush_file,
+	.async_readv = cifs_async_readv,
 };
 
 struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index be41478acc05..eaf5466d4041 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -2455,7 +2455,8 @@ map_smb2_to_linux_error(char *buf, bool log_err)
 		return 0;
 
 	/* mask facility */
-	if (log_err && (smb2err != (STATUS_MORE_PROCESSING_REQUIRED)))
+	if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) &&
+	    (smb2err != STATUS_END_OF_FILE))
 		smb2_print_status(smb2err);
 	else if (cifsFYI & CIFS_RC)
 		smb2_print_status(smb2err);
-- 
cgit 


From 09a4707e7638247302c6d798061aed117141fb74 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:29 -0700
Subject: CIFS: Add SMB2 support for cifs_iovec_read

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  25 ++++++++++
 fs/cifs/cifsproto.h |  20 +-------
 fs/cifs/cifssmb.c   |   2 +-
 fs/cifs/file.c      |   4 ++
 fs/cifs/smb2glob.h  |   6 +++
 fs/cifs/smb2misc.c  |   3 ++
 fs/cifs/smb2ops.c   |  19 ++++++++
 fs/cifs/smb2pdu.c   | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   |  28 +++++++++++
 fs/cifs/smb2proto.h |   1 +
 10 files changed, 225 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index fcf81c05635f..93dd582bb8d1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -857,12 +857,37 @@ struct cifsFileInfo {
 
 struct cifs_io_parms {
 	__u16 netfid;
+#ifdef CONFIG_CIFS_SMB2
+	__u64 persistent_fid;	/* persist file id for smb2 */
+	__u64 volatile_fid;	/* volatile file id for smb2 */
+#endif
 	__u32 pid;
 	__u64 offset;
 	unsigned int length;
 	struct cifs_tcon *tcon;
 };
 
+struct cifs_readdata;
+
+/* asynchronous read support */
+struct cifs_readdata {
+	struct kref			refcount;
+	struct list_head		list;
+	struct completion		done;
+	struct cifsFileInfo		*cfile;
+	struct address_space		*mapping;
+	__u64				offset;
+	unsigned int			bytes;
+	pid_t				pid;
+	int				result;
+	struct list_head		pages;
+	struct work_struct		work;
+	int (*marshal_iov) (struct cifs_readdata *rdata,
+			    unsigned int remaining);
+	unsigned int			nr_iov;
+	struct kvec			iov[1];
+};
+
 /*
  * Take a reference on the file private data. Must be called with
  * cifs_file_list_lock held.
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 2c6ad78a16cc..6656eb5dbf70 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -464,27 +464,9 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
 extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 			unsigned char *p24);
 
-/* asynchronous read support */
-struct cifs_readdata {
-	struct kref			refcount;
-	struct list_head		list;
-	struct completion		done;
-	struct cifsFileInfo		*cfile;
-	struct address_space		*mapping;
-	__u64				offset;
-	unsigned int			bytes;
-	pid_t				pid;
-	int				result;
-	struct list_head		pages;
-	struct work_struct		work;
-	int (*marshal_iov) (struct cifs_readdata *rdata,
-			    unsigned int remaining);
-	unsigned int			nr_iov;
-	struct kvec			iov[1];
-};
-
 void cifs_readdata_release(struct kref *refcount);
 int cifs_async_readv(struct cifs_readdata *rdata);
+int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid);
 
 /* asynchronous write support */
 struct cifs_writedata {
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4c48b9c60b26..8a07f218266f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1440,7 +1440,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	return 0;
 }
 
-static int
+int
 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 {
 	int length, len;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ec7c2e6bcbdf..29ac8ee46039 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2732,6 +2732,10 @@ restart_loop:
 	cifs_stats_bytes_read(tcon, total_read);
 	*poffset += total_read;
 
+	/* mask nodata case */
+	if (rc == -ENODATA)
+		rc = 0;
+
 	return total_read ? total_read : rc;
 }
 
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 33c1d89090c0..11505d73ff32 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -41,4 +41,10 @@
 #define SMB2_OP_RENAME 6
 #define SMB2_OP_DELETE 7
 
+/* Used when constructing chained read requests. */
+#define CHAINED_REQUEST 1
+#define START_OF_CHAIN 2
+#define END_OF_CHAIN 4
+#define RELATED_REQUEST 8
+
 #endif	/* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index e4d3b9964167..9275883c8530 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -244,6 +244,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
 		    ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength);
 		break;
 	case SMB2_READ:
+		*off = ((struct smb2_read_rsp *)hdr)->DataOffset;
+		*len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength);
+		break;
 	case SMB2_QUERY_DIRECTORY:
 	case SMB2_IOCTL:
 	case SMB2_CHANGE_NOTIFY:
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 20afb756e97a..d9ca357d9809 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -379,6 +379,20 @@ smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
 	return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 }
 
+static unsigned int
+smb2_read_data_offset(char *buf)
+{
+	struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf;
+	return rsp->DataOffset;
+}
+
+static unsigned int
+smb2_read_data_length(char *buf)
+{
+	struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf;
+	return le32_to_cpu(rsp->DataLength);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -388,6 +402,9 @@ struct smb_version_operations smb21_operations = {
 	.get_credits_field = smb2_get_credits_field,
 	.get_credits = smb2_get_credits,
 	.get_next_mid = smb2_get_next_mid,
+	.read_data_offset = smb2_read_data_offset,
+	.read_data_length = smb2_read_data_length,
+	.map_error = map_smb2_to_linux_error,
 	.find_mid = smb2_find_mid,
 	.check_message = smb2_check_message,
 	.dump_detail = smb2_dump_detail,
@@ -416,12 +433,14 @@ struct smb_version_operations smb21_operations = {
 	.set_fid = smb2_set_fid,
 	.close = smb2_close_file,
 	.flush = smb2_flush_file,
+	.async_readv = smb2_async_readv,
 };
 
 struct smb_version_values smb21_values = {
 	.version_string = SMB21_VERSION_STRING,
 	.header_size = sizeof(struct smb2_hdr),
 	.max_header_size = MAX_SMB2_HDR_SIZE,
+	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
 	.cap_unix = 0,
 	.cap_nt_find = SMB2_NT_FIND,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index ff374063f4e2..e18671852d41 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -31,6 +31,7 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/vfs.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/uaccess.h>
 #include <linux/xattr.h>
 #include "smb2pdu.h"
@@ -42,6 +43,7 @@
 #include "cifs_debug.h"
 #include "ntlmssp.h"
 #include "smb2status.h"
+#include "smb2glob.h"
 
 /*
  *  The following table defines the expected "StructureSize" of SMB2 requests
@@ -1190,3 +1192,138 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	free_rsp_buf(resp_buftype, iov[0].iov_base);
 	return rc;
 }
+
+/*
+ * To form a chain of read requests, any read requests after the first should
+ * have the end_of_chain boolean set to true.
+ */
+static int
+smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms,
+		  unsigned int remaining_bytes, int request_type)
+{
+	int rc = -EACCES;
+	struct smb2_read_req *req = NULL;
+
+	rc = small_smb2_init(SMB2_READ, io_parms->tcon, (void **) &req);
+	if (rc)
+		return rc;
+	if (io_parms->tcon->ses->server == NULL)
+		return -ECONNABORTED;
+
+	req->hdr.ProcessId = cpu_to_le32(io_parms->pid);
+
+	req->PersistentFileId = io_parms->persistent_fid;
+	req->VolatileFileId = io_parms->volatile_fid;
+	req->ReadChannelInfoOffset = 0; /* reserved */
+	req->ReadChannelInfoLength = 0; /* reserved */
+	req->Channel = 0; /* reserved */
+	req->MinimumCount = 0;
+	req->Length = cpu_to_le32(io_parms->length);
+	req->Offset = cpu_to_le64(io_parms->offset);
+
+	if (request_type & CHAINED_REQUEST) {
+		if (!(request_type & END_OF_CHAIN)) {
+			/* 4 for rfc1002 length field */
+			req->hdr.NextCommand =
+				cpu_to_le32(get_rfc1002_length(req) + 4);
+		} else /* END_OF_CHAIN */
+			req->hdr.NextCommand = 0;
+		if (request_type & RELATED_REQUEST) {
+			req->hdr.Flags |= SMB2_FLAGS_RELATED_OPERATIONS;
+			/*
+			 * Related requests use info from previous read request
+			 * in chain.
+			 */
+			req->hdr.SessionId = 0xFFFFFFFF;
+			req->hdr.TreeId = 0xFFFFFFFF;
+			req->PersistentFileId = 0xFFFFFFFF;
+			req->VolatileFileId = 0xFFFFFFFF;
+		}
+	}
+	if (remaining_bytes > io_parms->length)
+		req->RemainingBytes = cpu_to_le32(remaining_bytes);
+	else
+		req->RemainingBytes = 0;
+
+	iov[0].iov_base = (char *)req;
+	/* 4 for rfc1002 length field */
+	iov[0].iov_len = get_rfc1002_length(req) + 4;
+	return rc;
+}
+
+static void
+smb2_readv_callback(struct mid_q_entry *mid)
+{
+	struct cifs_readdata *rdata = mid->callback_data;
+	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base;
+	unsigned int credits_received = 1;
+
+	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
+		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
+
+	switch (mid->mid_state) {
+	case MID_RESPONSE_RECEIVED:
+		credits_received = le16_to_cpu(buf->CreditRequest);
+		/* result already set, check signature */
+		/* if (server->sec_mode &
+		    (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+			if (smb2_verify_signature(mid->resp_buf, server))
+				cERROR(1, "Unexpected SMB signature"); */
+		/* FIXME: should this be counted toward the initiating task? */
+		task_io_account_read(rdata->bytes);
+		cifs_stats_bytes_read(tcon, rdata->bytes);
+		break;
+	case MID_REQUEST_SUBMITTED:
+	case MID_RETRY_NEEDED:
+		rdata->result = -EAGAIN;
+		break;
+	default:
+		if (rdata->result != -ENODATA)
+			rdata->result = -EIO;
+	}
+
+	if (rdata->result)
+		cifs_stats_fail_inc(tcon, SMB2_READ_HE);
+
+	queue_work(cifsiod_wq, &rdata->work);
+	DeleteMidQEntry(mid);
+	add_credits(server, credits_received, 0);
+}
+
+/* smb2_async_readv - send an async write, and set up mid to handle result */
+int
+smb2_async_readv(struct cifs_readdata *rdata)
+{
+	int rc;
+	struct smb2_hdr *buf;
+	struct cifs_io_parms io_parms;
+
+	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
+		rdata->offset, rdata->bytes);
+
+	io_parms.tcon = tlink_tcon(rdata->cfile->tlink);
+	io_parms.offset = rdata->offset;
+	io_parms.length = rdata->bytes;
+	io_parms.persistent_fid = rdata->cfile->fid.persistent_fid;
+	io_parms.volatile_fid = rdata->cfile->fid.volatile_fid;
+	io_parms.pid = rdata->pid;
+	rc = smb2_new_read_req(&rdata->iov[0], &io_parms, 0, 0);
+	if (rc)
+		return rc;
+
+	buf = (struct smb2_hdr *)rdata->iov[0].iov_base;
+	/* 4 for rfc1002 length field */
+	rdata->iov[0].iov_len = get_rfc1002_length(rdata->iov[0].iov_base) + 4;
+
+	kref_get(&rdata->refcount);
+	rc = cifs_call_async(io_parms.tcon->ses->server, rdata->iov, 1,
+			     cifs_readv_receive, smb2_readv_callback,
+			     rdata, 0);
+	if (rc)
+		kref_put(&rdata->refcount, cifs_readdata_release);
+
+	cifs_small_buf_release(buf);
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f5bf63f66971..4abb58106809 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -468,6 +468,34 @@ struct smb2_flush_rsp {
 	__le16 Reserved;
 } __packed;
 
+struct smb2_read_req {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 49 */
+	__u8   Padding; /* offset from start of SMB2 header to place read */
+	__u8   Reserved;
+	__le32 Length;
+	__le64 Offset;
+	__u64  PersistentFileId; /* opaque endianness */
+	__u64  VolatileFileId; /* opaque endianness */
+	__le32 MinimumCount;
+	__le32 Channel; /* Reserved MBZ */
+	__le32 RemainingBytes;
+	__le16 ReadChannelInfoOffset; /* Reserved MBZ */
+	__le16 ReadChannelInfoLength; /* Reserved MBZ */
+	__u8   Buffer[1];
+} __packed;
+
+struct smb2_read_rsp {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 17 */
+	__u8   DataOffset;
+	__u8   Reserved;
+	__le32 DataLength;
+	__le32 DataRemaining;
+	__u32  Reserved2;
+	__u8   Buffer[1];
+} __packed;
+
 struct smb2_echo_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;	/* Must be 4 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 51e6cd185c79..f442e4699974 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -97,6 +97,7 @@ extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
 			    u64 persistent_fid, u64 volatile_fid,
 			    __le64 *uniqueid);
+extern int smb2_async_readv(struct cifs_readdata *rdata);
 extern int SMB2_echo(struct TCP_Server_Info *server);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From c9de5c80d536e556568ccd45b9599870d4993b7d Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:29 -0700
Subject: CIFS: Move async write to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  3 +++
 fs/cifs/cifssmb.c  |  4 +++-
 fs/cifs/file.c     | 13 +++++++++++--
 fs/cifs/smb1ops.c  |  1 +
 4 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 93dd582bb8d1..aef167470654 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -173,6 +173,7 @@ struct cifs_fattr;
 struct smb_vol;
 struct cifs_fid;
 struct cifs_readdata;
+struct cifs_writedata;
 
 struct smb_version_operations {
 	int (*send_cancel)(struct TCP_Server_Info *, void *,
@@ -283,6 +284,8 @@ struct smb_version_operations {
 	int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
 	/* async read from the server */
 	int (*async_readv)(struct cifs_readdata *);
+	/* async write to the server */
+	int (*async_writev)(struct cifs_writedata *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8a07f218266f..2a9b27387708 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1926,6 +1926,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
 {
 	int i, rc;
 	struct inode *inode = wdata->cfile->dentry->d_inode;
+	struct TCP_Server_Info *server;
 
 	for (i = 0; i < wdata->nr_pages; i++) {
 		lock_page(wdata->pages[i]);
@@ -1933,7 +1934,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
 	}
 
 	do {
-		rc = cifs_async_writev(wdata);
+		server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+		rc = server->ops->async_writev(wdata);
 	} while (rc == -EAGAIN);
 
 	for (i = 0; i < wdata->nr_pages; i++) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 29ac8ee46039..703c1648b068 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1754,6 +1754,7 @@ static int cifs_writepages(struct address_space *mapping,
 	bool done = false, scanned = false, range_whole = false;
 	pgoff_t end, index;
 	struct cifs_writedata *wdata;
+	struct TCP_Server_Info *server;
 	struct page *page;
 	int rc = 0;
 
@@ -1904,7 +1905,8 @@ retry:
 				break;
 			}
 			wdata->pid = wdata->cfile->pid;
-			rc = cifs_async_writev(wdata);
+			server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+			rc = server->ops->async_writev(wdata);
 		} while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
 
 		for (i = 0; i < nr_pages; ++i)
@@ -2235,6 +2237,9 @@ static int
 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
 {
 	int rc;
+	struct TCP_Server_Info *server;
+
+	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
 
 	do {
 		if (wdata->cfile->invalidHandle) {
@@ -2242,7 +2247,7 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata)
 			if (rc != 0)
 				continue;
 		}
-		rc = cifs_async_writev(wdata);
+		rc = server->ops->async_writev(wdata);
 	} while (rc == -EAGAIN);
 
 	return rc;
@@ -2277,6 +2282,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	open_file = file->private_data;
 	tcon = tlink_tcon(open_file->tlink);
+
+	if (!tcon->ses->server->ops->async_writev)
+		return -ENOSYS;
+
 	offset = *poffset;
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index e2dbd22cb136..50c3697af5aa 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -786,6 +786,7 @@ struct smb_version_operations smb1_operations = {
 	.close = cifs_close_file,
 	.flush = cifs_flush_file,
 	.async_readv = cifs_async_readv,
+	.async_writev = cifs_async_writev,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 33319141252fd14b58cf13685156c23dcaac2527 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:29 -0700
Subject: CIFS: Add SMB2 support for cifs_iovec_write

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c    |   4 ++
 fs/cifs/cifsglob.h  |  47 ++++++++++++++++++++
 fs/cifs/cifsproto.h |  18 --------
 fs/cifs/cifssmb.c   |  26 -----------
 fs/cifs/smb2ops.c   |   1 +
 fs/cifs/smb2pdu.c   | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   |  30 +++++++++++++
 fs/cifs/smb2proto.h |   1 +
 8 files changed, 206 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index db8a404a51dd..2829f374dbf7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -89,6 +89,10 @@ extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
 
+#ifdef CONFIG_HIGHMEM
+DEFINE_MUTEX(cifs_kmap_mutex);
+#endif
+
 static int
 cifs_read_super(struct super_block *sb)
 {
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index aef167470654..330f6259bb6d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -582,6 +582,33 @@ get_next_mid(struct TCP_Server_Info *server)
 #define CIFS_KMAP_SIZE_LIMIT   (1<<24)
 #endif /* CONFIG_HIGHMEM */
 
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+
+extern struct mutex cifs_kmap_mutex;
+
+static inline void
+cifs_kmap_lock(void)
+{
+	mutex_lock(&cifs_kmap_mutex);
+}
+
+static inline void
+cifs_kmap_unlock(void)
+{
+	mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while (0)
+#define cifs_kmap_unlock() do { ; } while (0)
+#endif /* CONFIG_HIGHMEM */
+
 /*
  * Macros to allow the TCP_Server_Info->net field and related code to drop out
  * when CONFIG_NET_NS isn't set.
@@ -891,6 +918,26 @@ struct cifs_readdata {
 	struct kvec			iov[1];
 };
 
+struct cifs_writedata;
+
+/* asynchronous write support */
+struct cifs_writedata {
+	struct kref			refcount;
+	struct list_head		list;
+	struct completion		done;
+	enum writeback_sync_modes	sync_mode;
+	struct work_struct		work;
+	struct cifsFileInfo		*cfile;
+	__u64				offset;
+	pid_t				pid;
+	unsigned int			bytes;
+	int				result;
+	void (*marshal_iov) (struct kvec *iov,
+			     struct cifs_writedata *wdata);
+	unsigned int			nr_pages;
+	struct page			*pages[1];
+};
+
 /*
  * Take a reference on the file private data. Must be called with
  * cifs_file_list_lock held.
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 6656eb5dbf70..8b320c7b582c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -468,24 +468,6 @@ void cifs_readdata_release(struct kref *refcount);
 int cifs_async_readv(struct cifs_readdata *rdata);
 int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid);
 
-/* asynchronous write support */
-struct cifs_writedata {
-	struct kref			refcount;
-	struct list_head		list;
-	struct completion		done;
-	enum writeback_sync_modes	sync_mode;
-	struct work_struct		work;
-	struct cifsFileInfo		*cfile;
-	__u64				offset;
-	pid_t				pid;
-	unsigned int			bytes;
-	int				result;
-	void (*marshal_iov) (struct kvec *iov,
-			     struct cifs_writedata *wdata);
-	unsigned int			nr_pages;
-	struct page			*pages[1];
-};
-
 int cifs_async_writev(struct cifs_writedata *wdata);
 void cifs_writev_complete(struct work_struct *work);
 struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2a9b27387708..f27b13ea08b8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,32 +86,6 @@ static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
 
-#ifdef CONFIG_HIGHMEM
-/*
- * On arches that have high memory, kmap address space is limited. By
- * serializing the kmap operations on those arches, we ensure that we don't
- * end up with a bunch of threads in writeback with partially mapped page
- * arrays, stuck waiting for kmap to come back. That situation prevents
- * progress and can deadlock.
- */
-static DEFINE_MUTEX(cifs_kmap_mutex);
-
-static inline void
-cifs_kmap_lock(void)
-{
-	mutex_lock(&cifs_kmap_mutex);
-}
-
-static inline void
-cifs_kmap_unlock(void)
-{
-	mutex_unlock(&cifs_kmap_mutex);
-}
-#else /* !CONFIG_HIGHMEM */
-#define cifs_kmap_lock() do { ; } while(0)
-#define cifs_kmap_unlock() do { ; } while(0)
-#endif /* CONFIG_HIGHMEM */
-
 /*
  * Mark as invalid, all open files on tree connections since they
  * were closed when session to server was lost.
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d9ca357d9809..da31235023fb 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -434,6 +434,7 @@ struct smb_version_operations smb21_operations = {
 	.close = smb2_close_file,
 	.flush = smb2_flush_file,
 	.async_readv = smb2_async_readv,
+	.async_writev = smb2_async_writev,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e18671852d41..cb6acc7b1ca8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -33,6 +33,7 @@
 #include <linux/vfs.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/uaccess.h>
+#include <linux/pagemap.h>
 #include <linux/xattr.h>
 #include "smb2pdu.h"
 #include "cifsglob.h"
@@ -1327,3 +1328,125 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	cifs_small_buf_release(buf);
 	return rc;
 }
+
+/*
+ * Check the mid_state and signature on received buffer (if any), and queue the
+ * workqueue completion task.
+ */
+static void
+smb2_writev_callback(struct mid_q_entry *mid)
+{
+	struct cifs_writedata *wdata = mid->callback_data;
+	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
+	unsigned int written;
+	struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
+	unsigned int credits_received = 1;
+
+	switch (mid->mid_state) {
+	case MID_RESPONSE_RECEIVED:
+		credits_received = le16_to_cpu(rsp->hdr.CreditRequest);
+		wdata->result = smb2_check_receive(mid, tcon->ses->server, 0);
+		if (wdata->result != 0)
+			break;
+
+		written = le32_to_cpu(rsp->DataLength);
+		/*
+		 * Mask off high 16 bits when bytes written as returned
+		 * by the server is greater than bytes requested by the
+		 * client. OS/2 servers are known to set incorrect
+		 * CountHigh values.
+		 */
+		if (written > wdata->bytes)
+			written &= 0xFFFF;
+
+		if (written < wdata->bytes)
+			wdata->result = -ENOSPC;
+		else
+			wdata->bytes = written;
+		break;
+	case MID_REQUEST_SUBMITTED:
+	case MID_RETRY_NEEDED:
+		wdata->result = -EAGAIN;
+		break;
+	default:
+		wdata->result = -EIO;
+		break;
+	}
+
+	if (wdata->result)
+		cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
+
+	queue_work(cifsiod_wq, &wdata->work);
+	DeleteMidQEntry(mid);
+	add_credits(tcon->ses->server, credits_received, 0);
+}
+
+/* smb2_async_writev - send an async write, and set up mid to handle result */
+int
+smb2_async_writev(struct cifs_writedata *wdata)
+{
+	int i, rc = -EACCES;
+	struct smb2_write_req *req = NULL;
+	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
+	struct kvec *iov = NULL;
+
+	rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
+	if (rc)
+		goto async_writev_out;
+
+	/* 1 iov per page + 1 for header */
+	iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS);
+	if (iov == NULL) {
+		rc = -ENOMEM;
+		goto async_writev_out;
+	}
+
+	req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
+
+	req->PersistentFileId = wdata->cfile->fid.persistent_fid;
+	req->VolatileFileId = wdata->cfile->fid.volatile_fid;
+	req->WriteChannelInfoOffset = 0;
+	req->WriteChannelInfoLength = 0;
+	req->Channel = 0;
+	req->Offset = cpu_to_le64(wdata->offset);
+	/* 4 for rfc1002 length field */
+	req->DataOffset = cpu_to_le16(
+				offsetof(struct smb2_write_req, Buffer) - 4);
+	req->RemainingBytes = 0;
+
+	/* 4 for rfc1002 length field and 1 for Buffer */
+	iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+	iov[0].iov_base = (char *)req;
+
+	/*
+	 * This function should marshal up the page array into the kvec
+	 * array, reserving [0] for the header. It should kmap the pages
+	 * and set the iov_len properly for each one. It may also set
+	 * wdata->bytes too.
+	 */
+	cifs_kmap_lock();
+	wdata->marshal_iov(iov, wdata);
+	cifs_kmap_unlock();
+
+	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
+
+	req->Length = cpu_to_le32(wdata->bytes);
+
+	inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
+
+	kref_get(&wdata->refcount);
+	rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1,
+			     NULL, smb2_writev_callback, wdata, 0);
+
+	if (rc)
+		kref_put(&wdata->refcount, cifs_writedata_release);
+
+	/* send is done, unmap pages */
+	for (i = 0; i < wdata->nr_pages; i++)
+		kunmap(wdata->pages[i]);
+
+async_writev_out:
+	cifs_small_buf_release(req);
+	kfree(iov);
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4abb58106809..21ec9ed280f9 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -496,6 +496,36 @@ struct smb2_read_rsp {
 	__u8   Buffer[1];
 } __packed;
 
+/* For write request Flags field below the following flag is defined: */
+#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
+
+struct smb2_write_req {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 49 */
+	__le16 DataOffset; /* offset from start of SMB2 header to write data */
+	__le32 Length;
+	__le64 Offset;
+	__u64  PersistentFileId; /* opaque endianness */
+	__u64  VolatileFileId; /* opaque endianness */
+	__le32 Channel; /* Reserved MBZ */
+	__le32 RemainingBytes;
+	__le16 WriteChannelInfoOffset; /* Reserved MBZ */
+	__le16 WriteChannelInfoLength; /* Reserved MBZ */
+	__le32 Flags;
+	__u8   Buffer[1];
+} __packed;
+
+struct smb2_write_rsp {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 17 */
+	__u8   DataOffset;
+	__u8   Reserved;
+	__le32 DataLength;
+	__le32 DataRemaining;
+	__u32  Reserved2;
+	__u8   Buffer[1];
+} __packed;
+
 struct smb2_echo_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;	/* Must be 4 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index f442e4699974..ec983be0ba31 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -98,6 +98,7 @@ extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
 			    u64 persistent_fid, u64 volatile_fid,
 			    __le64 *uniqueid);
 extern int smb2_async_readv(struct cifs_readdata *rdata);
+extern int smb2_async_writev(struct cifs_writedata *wdata);
 extern int SMB2_echo(struct TCP_Server_Info *server);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From f9c6e234c3ca64b8d49336908df99948518d6261 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:29 -0700
Subject: CIFS: Move readpage code to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  5 +++++
 fs/cifs/file.c     | 28 +++++++++++++++++-----------
 fs/cifs/smb1ops.c  | 10 ++++++++++
 3 files changed, 32 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 330f6259bb6d..5b1751d81901 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -174,6 +174,7 @@ struct smb_vol;
 struct cifs_fid;
 struct cifs_readdata;
 struct cifs_writedata;
+struct cifs_io_parms;
 
 struct smb_version_operations {
 	int (*send_cancel)(struct TCP_Server_Info *, void *,
@@ -286,6 +287,10 @@ struct smb_version_operations {
 	int (*async_readv)(struct cifs_readdata *);
 	/* async write to the server */
 	int (*async_writev)(struct cifs_writedata *);
+	/* sync read from the server */
+	int (*sync_read)(const unsigned int, struct cifsFileInfo *,
+			 struct cifs_io_parms *, unsigned int *, char **,
+			 int *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 703c1648b068..fae03c52f314 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2782,8 +2782,8 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
 	return cifs_user_readv(iocb, iov, nr_segs, pos);
 }
 
-static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
-			 loff_t *poffset)
+static ssize_t
+cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
 {
 	int rc = -EACCES;
 	unsigned int bytes_read = 0;
@@ -2792,8 +2792,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	unsigned int rsize;
 	struct cifs_sb_info *cifs_sb;
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	unsigned int xid;
-	char *current_offset;
+	char *cur_offset;
 	struct cifsFileInfo *open_file;
 	struct cifs_io_parms io_parms;
 	int buf_type = CIFS_NO_BUFFER;
@@ -2812,6 +2813,12 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	}
 	open_file = file->private_data;
 	tcon = tlink_tcon(open_file->tlink);
+	server = tcon->ses->server;
+
+	if (!server->ops->sync_read) {
+		free_xid(xid);
+		return -ENOSYS;
+	}
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
 		pid = open_file->pid;
@@ -2821,9 +2828,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
 		cFYI(1, "attempting read on write only file instance");
 
-	for (total_read = 0, current_offset = read_data;
-	     read_size > total_read;
-	     total_read += bytes_read, current_offset += bytes_read) {
+	for (total_read = 0, cur_offset = read_data; read_size > total_read;
+	     total_read += bytes_read, cur_offset += bytes_read) {
 		current_read_size = min_t(uint, read_size - total_read, rsize);
 		/*
 		 * For windows me and 9x we do not want to request more than it
@@ -2841,13 +2847,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 				if (rc != 0)
 					break;
 			}
-			io_parms.netfid = open_file->fid.netfid;
 			io_parms.pid = pid;
 			io_parms.tcon = tcon;
-			io_parms.offset = *poffset;
+			io_parms.offset = *offset;
 			io_parms.length = current_read_size;
-			rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
-					 &current_offset, &buf_type);
+			rc = server->ops->sync_read(xid, open_file, &io_parms,
+						    &bytes_read, &cur_offset,
+						    &buf_type);
 		}
 		if (rc || (bytes_read == 0)) {
 			if (total_read) {
@@ -2858,7 +2864,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
 			}
 		} else {
 			cifs_stats_bytes_read(tcon, total_read);
-			*poffset += bytes_read;
+			*offset += bytes_read;
 		}
 	}
 	free_xid(xid);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 50c3697af5aa..cea958ee8b7a 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -739,6 +739,15 @@ cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
 	return CIFSSMBFlush(xid, tcon, fid->netfid);
 }
 
+static int
+cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile,
+	       struct cifs_io_parms *parms, unsigned int *bytes_read,
+	       char **buf, int *buf_type)
+{
+	parms->netfid = cfile->fid.netfid;
+	return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -787,6 +796,7 @@ struct smb_version_operations smb1_operations = {
 	.flush = cifs_flush_file,
 	.async_readv = cifs_async_readv,
 	.async_writev = cifs_async_writev,
+	.sync_read = cifs_sync_read,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From d8e050398d23ef7c019c96200b80d73f4e5cec0c Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:30 -0700
Subject: CIFS: Add readpage support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c   | 12 ++++++++++++
 fs/cifs/smb2pdu.c   | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2proto.h |  2 ++
 3 files changed, 65 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index da31235023fb..fb289d2abae7 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -393,6 +393,17 @@ smb2_read_data_length(char *buf)
 	return le32_to_cpu(rsp->DataLength);
 }
 
+
+static int
+smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile,
+	       struct cifs_io_parms *parms, unsigned int *bytes_read,
+	       char **buf, int *buf_type)
+{
+	parms->persistent_fid = cfile->fid.persistent_fid;
+	parms->volatile_fid = cfile->fid.volatile_fid;
+	return SMB2_read(xid, parms, bytes_read, buf, buf_type);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -435,6 +446,7 @@ struct smb_version_operations smb21_operations = {
 	.flush = smb2_flush_file,
 	.async_readv = smb2_async_readv,
 	.async_writev = smb2_async_writev,
+	.sync_read = smb2_sync_read,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index cb6acc7b1ca8..23c569386f32 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1329,6 +1329,57 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	return rc;
 }
 
+int
+SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
+	  unsigned int *nbytes, char **buf, int *buf_type)
+{
+	int resp_buftype, rc = -EACCES;
+	struct smb2_read_rsp *rsp = NULL;
+	struct kvec iov[1];
+
+	*nbytes = 0;
+	rc = smb2_new_read_req(iov, io_parms, 0, 0);
+	if (rc)
+		return rc;
+
+	rc = SendReceive2(xid, io_parms->tcon->ses, iov, 1,
+			  &resp_buftype, CIFS_LOG_ERROR);
+
+	rsp = (struct smb2_read_rsp *)iov[0].iov_base;
+
+	if (rsp->hdr.Status == STATUS_END_OF_FILE) {
+		free_rsp_buf(resp_buftype, iov[0].iov_base);
+		return 0;
+	}
+
+	if (rc) {
+		cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
+		cERROR(1, "Send error in read = %d", rc);
+	} else {
+		*nbytes = le32_to_cpu(rsp->DataLength);
+		if ((*nbytes > CIFS_MAX_MSGSIZE) ||
+		    (*nbytes > io_parms->length)) {
+			cFYI(1, "bad length %d for count %d", *nbytes,
+				io_parms->length);
+			rc = -EIO;
+			*nbytes = 0;
+		}
+	}
+
+	if (*buf) {
+		memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+		       *nbytes);
+		free_rsp_buf(resp_buftype, iov[0].iov_base);
+	} else if (resp_buftype != CIFS_NO_BUFFER) {
+		*buf = iov[0].iov_base;
+		if (resp_buftype == CIFS_SMALL_BUFFER)
+			*buf_type = CIFS_SMALL_BUFFER;
+		else if (resp_buftype == CIFS_LARGE_BUFFER)
+			*buf_type = CIFS_LARGE_BUFFER;
+	}
+	return rc;
+}
+
 /*
  * Check the mid_state and signature on received buffer (if any), and queue the
  * workqueue completion task.
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index ec983be0ba31..97e62f3df410 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -98,6 +98,8 @@ extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
 			    u64 persistent_fid, u64 volatile_fid,
 			    __le64 *uniqueid);
 extern int smb2_async_readv(struct cifs_readdata *rdata);
+extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
+		     unsigned int *nbytes, char **buf, int *buf_type);
 extern int smb2_async_writev(struct cifs_writedata *wdata);
 extern int SMB2_echo(struct TCP_Server_Info *server);
 
-- 
cgit 


From ba9ad7257ae50b8aa72a3f44da839830e065363c Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:30 -0700
Subject: CIFS: Move writepage to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  4 ++++
 fs/cifs/cifsproto.h |  3 +--
 fs/cifs/cifssmb.c   |  6 ++----
 fs/cifs/file.c      | 36 ++++++++++++++++++++----------------
 fs/cifs/smb1ops.c   | 11 +++++++++++
 5 files changed, 38 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5b1751d81901..48c7c83a7a99 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -291,6 +291,10 @@ struct smb_version_operations {
 	int (*sync_read)(const unsigned int, struct cifsFileInfo *,
 			 struct cifs_io_parms *, unsigned int *, char **,
 			 int *);
+	/* sync write to the server */
+	int (*sync_write)(const unsigned int, struct cifsFileInfo *,
+			  struct cifs_io_parms *, unsigned int *, struct kvec *,
+			  unsigned long);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8b320c7b582c..f20d0c8ee7ce 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -369,8 +369,7 @@ extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms,
 			unsigned int *nbytes, const char *buf,
 			const char __user *ubuf, const int long_op);
 extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
-			unsigned int *nbytes, struct kvec *iov, const int nvec,
-			const int long_op);
+			unsigned int *nbytes, struct kvec *iov, const int nvec);
 extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon,
 				 const char *search_name, __u64 *inode_number,
 				 const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f27b13ea08b8..733119dad493 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2123,8 +2123,7 @@ async_writev_out:
 
 int
 CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
-	      unsigned int *nbytes, struct kvec *iov, int n_vec,
-	      const int long_op)
+	      unsigned int *nbytes, struct kvec *iov, int n_vec)
 {
 	int rc = -EACCES;
 	WRITE_REQ *pSMB = NULL;
@@ -2195,8 +2194,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
 		iov[0].iov_len = smb_hdr_len + 8;
 
 
-	rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
-			  long_op);
+	rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0);
 	cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
 	if (rc) {
 		cFYI(1, "Send error Write2 = %d", rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fae03c52f314..39fff77e38d4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1477,15 +1477,16 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 		cifsi->server_eof = end_of_write;
 }
 
-static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
-			  const char *write_data, size_t write_size,
-			  loff_t *poffset)
+static ssize_t
+cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
+	   size_t write_size, loff_t *offset)
 {
 	int rc = 0;
 	unsigned int bytes_written = 0;
 	unsigned int total_written;
 	struct cifs_sb_info *cifs_sb;
-	struct cifs_tcon *pTcon;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	unsigned int xid;
 	struct dentry *dentry = open_file->dentry;
 	struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
@@ -1494,9 +1495,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
 	cifs_sb = CIFS_SB(dentry->d_sb);
 
 	cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
-	   *poffset, dentry->d_name.name);
+	     *offset, dentry->d_name.name);
+
+	tcon = tlink_tcon(open_file->tlink);
+	server = tcon->ses->server;
 
-	pTcon = tlink_tcon(open_file->tlink);
+	if (!server->ops->sync_write)
+		return -ENOSYS;
 
 	xid = get_xid();
 
@@ -1522,13 +1527,12 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
 			/* iov[0] is reserved for smb header */
 			iov[1].iov_base = (char *)write_data + total_written;
 			iov[1].iov_len = len;
-			io_parms.netfid = open_file->fid.netfid;
 			io_parms.pid = pid;
-			io_parms.tcon = pTcon;
-			io_parms.offset = *poffset;
+			io_parms.tcon = tcon;
+			io_parms.offset = *offset;
 			io_parms.length = len;
-			rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
-					   1, 0);
+			rc = server->ops->sync_write(xid, open_file, &io_parms,
+						     &bytes_written, iov, 1);
 		}
 		if (rc || (bytes_written == 0)) {
 			if (total_written)
@@ -1539,18 +1543,18 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
 			}
 		} else {
 			spin_lock(&dentry->d_inode->i_lock);
-			cifs_update_eof(cifsi, *poffset, bytes_written);
+			cifs_update_eof(cifsi, *offset, bytes_written);
 			spin_unlock(&dentry->d_inode->i_lock);
-			*poffset += bytes_written;
+			*offset += bytes_written;
 		}
 	}
 
-	cifs_stats_bytes_written(pTcon, total_written);
+	cifs_stats_bytes_written(tcon, total_written);
 
 	if (total_written > 0) {
 		spin_lock(&dentry->d_inode->i_lock);
-		if (*poffset > dentry->d_inode->i_size)
-			i_size_write(dentry->d_inode, *poffset);
+		if (*offset > dentry->d_inode->i_size)
+			i_size_write(dentry->d_inode, *offset);
 		spin_unlock(&dentry->d_inode->i_lock);
 	}
 	mark_inode_dirty_sync(dentry->d_inode);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index cea958ee8b7a..aa55c2fbf793 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -748,6 +748,16 @@ cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile,
 	return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type);
 }
 
+static int
+cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
+		struct cifs_io_parms *parms, unsigned int *written,
+		struct kvec *iov, unsigned long nr_segs)
+{
+
+	parms->netfid = cfile->fid.netfid;
+	return CIFSSMBWrite2(xid, parms, written, iov, nr_segs);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -797,6 +807,7 @@ struct smb_version_operations smb1_operations = {
 	.async_readv = cifs_async_readv,
 	.async_writev = cifs_async_writev,
 	.sync_read = cifs_sync_read,
+	.sync_write = cifs_sync_write,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 009d344398bb3e844b31eb9e6a7860748c6f6dd3 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:30 -0700
Subject: CIFS: Add writepage support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c   | 12 +++++++++++
 fs/cifs/smb2pdu.c   | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2proto.h |  2 ++
 3 files changed, 75 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fb289d2abae7..f9c3dbee9010 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -404,6 +404,17 @@ smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile,
 	return SMB2_read(xid, parms, bytes_read, buf, buf_type);
 }
 
+static int
+smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
+		struct cifs_io_parms *parms, unsigned int *written,
+		struct kvec *iov, unsigned long nr_segs)
+{
+
+	parms->persistent_fid = cfile->fid.persistent_fid;
+	parms->volatile_fid = cfile->fid.volatile_fid;
+	return SMB2_write(xid, parms, written, iov, nr_segs);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -447,6 +458,7 @@ struct smb_version_operations smb21_operations = {
 	.async_readv = smb2_async_readv,
 	.async_writev = smb2_async_writev,
 	.sync_read = smb2_sync_read,
+	.sync_write = smb2_sync_write,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 23c569386f32..00dc45a7881c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1501,3 +1501,64 @@ async_writev_out:
 	kfree(iov);
 	return rc;
 }
+
+/*
+ * SMB2_write function gets iov pointer to kvec array with n_vec as a length.
+ * The length field from io_parms must be at least 1 and indicates a number of
+ * elements with data to write that begins with position 1 in iov array. All
+ * data length is specified by count.
+ */
+int
+SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
+	   unsigned int *nbytes, struct kvec *iov, int n_vec)
+{
+	int rc = 0;
+	struct smb2_write_req *req = NULL;
+	struct smb2_write_rsp *rsp = NULL;
+	int resp_buftype;
+	*nbytes = 0;
+
+	if (n_vec < 1)
+		return rc;
+
+	rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req);
+	if (rc)
+		return rc;
+
+	if (io_parms->tcon->ses->server == NULL)
+		return -ECONNABORTED;
+
+	req->hdr.ProcessId = cpu_to_le32(io_parms->pid);
+
+	req->PersistentFileId = io_parms->persistent_fid;
+	req->VolatileFileId = io_parms->volatile_fid;
+	req->WriteChannelInfoOffset = 0;
+	req->WriteChannelInfoLength = 0;
+	req->Channel = 0;
+	req->Length = cpu_to_le32(io_parms->length);
+	req->Offset = cpu_to_le64(io_parms->offset);
+	/* 4 for rfc1002 length field */
+	req->DataOffset = cpu_to_le16(
+				offsetof(struct smb2_write_req, Buffer) - 4);
+	req->RemainingBytes = 0;
+
+	iov[0].iov_base = (char *)req;
+	/* 4 for rfc1002 length field and 1 for Buffer */
+	iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+
+	/* length of entire message including data to be written */
+	inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */);
+
+	rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1,
+			  &resp_buftype, 0);
+
+	if (rc) {
+		cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
+		cERROR(1, "Send error in write = %d", rc);
+	} else {
+		rsp = (struct smb2_write_rsp *)iov[0].iov_base;
+		*nbytes = le32_to_cpu(rsp->DataLength);
+		free_rsp_buf(resp_buftype, rsp);
+	}
+	return rc;
+}
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 97e62f3df410..192d0c7d91eb 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -101,6 +101,8 @@ extern int smb2_async_readv(struct cifs_readdata *rdata);
 extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
 		     unsigned int *nbytes, char **buf, int *buf_type);
 extern int smb2_async_writev(struct cifs_writedata *wdata);
+extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
+		      unsigned int *nbytes, struct kvec *iov, int n_vec);
 extern int SMB2_echo(struct TCP_Server_Info *server);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From 3c1bf7e48e9e463b65b1b90da4500a93dd2b27a7 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:30 -0700
Subject: CIFS: Enable signing in SMB2

Use hmac-sha256 and rather than hmac-md5 that is used for CIFS/SMB.

Signature field in SMB2 header is 16 bytes instead of 8 bytes.

Automatically enable signing by client when requested by the server
when signing ability is available to the client.

Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/Kconfig         |   1 +
 fs/cifs/cifsencrypt.c   |  30 ++++++++-
 fs/cifs/cifsglob.h      |   2 +
 fs/cifs/cifsproto.h     |   1 +
 fs/cifs/smb2glob.h      |   4 ++
 fs/cifs/smb2pdu.c       |  52 +++++++++++++--
 fs/cifs/smb2proto.h     |   2 +
 fs/cifs/smb2transport.c | 165 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/cifs/transport.c     |  24 +++----
 9 files changed, 253 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index a08306a8bec9..802930179c2b 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -9,6 +9,7 @@ config CIFS
 	select CRYPTO_ARC4
 	select CRYPTO_ECB
 	select CRYPTO_DES
+	select CRYPTO_SHA256
 	help
 	  This is the client VFS module for the Common Internet File System
 	  (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 6a0d741159f0..724738c1a560 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -686,12 +686,17 @@ calc_seckey(struct cifs_ses *ses)
 void
 cifs_crypto_shash_release(struct TCP_Server_Info *server)
 {
+	if (server->secmech.hmacsha256)
+		crypto_free_shash(server->secmech.hmacsha256);
+
 	if (server->secmech.md5)
 		crypto_free_shash(server->secmech.md5);
 
 	if (server->secmech.hmacmd5)
 		crypto_free_shash(server->secmech.hmacmd5);
 
+	kfree(server->secmech.sdeschmacsha256);
+
 	kfree(server->secmech.sdeschmacmd5);
 
 	kfree(server->secmech.sdescmd5);
@@ -716,6 +721,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
 		goto crypto_allocate_md5_fail;
 	}
 
+	server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
+	if (IS_ERR(server->secmech.hmacsha256)) {
+		cERROR(1, "could not allocate crypto hmacsha256\n");
+		rc = PTR_ERR(server->secmech.hmacsha256);
+		goto crypto_allocate_hmacsha256_fail;
+	}
+
 	size = sizeof(struct shash_desc) +
 			crypto_shash_descsize(server->secmech.hmacmd5);
 	server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
@@ -727,7 +739,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
 	server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
 	server->secmech.sdeschmacmd5->shash.flags = 0x0;
 
-
 	size = sizeof(struct shash_desc) +
 			crypto_shash_descsize(server->secmech.md5);
 	server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
@@ -739,12 +750,29 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
 	server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
 	server->secmech.sdescmd5->shash.flags = 0x0;
 
+	size = sizeof(struct shash_desc) +
+			crypto_shash_descsize(server->secmech.hmacsha256);
+	server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL);
+	if (!server->secmech.sdeschmacsha256) {
+		cERROR(1, "%s: Can't alloc hmacsha256\n", __func__);
+		rc = -ENOMEM;
+		goto crypto_allocate_hmacsha256_sdesc_fail;
+	}
+	server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256;
+	server->secmech.sdeschmacsha256->shash.flags = 0x0;
+
 	return 0;
 
+crypto_allocate_hmacsha256_sdesc_fail:
+	kfree(server->secmech.sdescmd5);
+
 crypto_allocate_md5_sdesc_fail:
 	kfree(server->secmech.sdeschmacmd5);
 
 crypto_allocate_hmacmd5_sdesc_fail:
+	crypto_free_shash(server->secmech.hmacsha256);
+
+crypto_allocate_hmacsha256_fail:
 	crypto_free_shash(server->secmech.md5);
 
 crypto_allocate_md5_fail:
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 48c7c83a7a99..6217df707909 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -128,8 +128,10 @@ struct sdesc {
 struct cifs_secmech {
 	struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
 	struct crypto_shash *md5; /* md5 hash function */
+	struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */
 	struct sdesc *sdeschmacmd5;  /* ctxt to generate ntlmv2 hash, CR1 */
 	struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
+	struct sdesc *sdeschmacsha256;  /* ctxt to generate smb2 signature */
 };
 
 /* per smb session structure/fields */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f20d0c8ee7ce..541738fcaebc 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -65,6 +65,7 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata,
 extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
 					struct TCP_Server_Info *server);
 extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
+extern void cifs_delete_mid(struct mid_q_entry *mid);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 			   unsigned int nvec, mid_receive_t *receive,
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 11505d73ff32..8635574ea8b6 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -47,4 +47,8 @@
 #define END_OF_CHAIN 4
 #define RELATED_REQUEST 8
 
+#define SMB2_SIGNATURE_SIZE (16)
+#define SMB2_NTLMV2_SESSKEY_SIZE (16)
+#define SMB2_HMACSHA256_SIZE (32)
+
 #endif	/* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 00dc45a7881c..30c92c847fe8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -118,9 +118,9 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
 	/* BB how does SMB2 do case sensitive? */
 	/* if (tcon->nocase)
 		hdr->Flags |= SMBFLG_CASELESS; */
-	/* if (tcon->ses && tcon->ses->server &&
+	if (tcon->ses && tcon->ses->server &&
 	    (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED))
-		hdr->Flags |= SMB2_FLAGS_SIGNED; */
+		hdr->Flags |= SMB2_FLAGS_SIGNED;
 out:
 	pdu->StructureSize2 = cpu_to_le16(parmsize);
 	return;
@@ -441,6 +441,38 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		rc = -EIO;
 		goto neg_exit;
 	}
+
+	cFYI(1, "sec_flags 0x%x", sec_flags);
+	if (sec_flags & CIFSSEC_MUST_SIGN) {
+		cFYI(1, "Signing required");
+		if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED |
+		      SMB2_NEGOTIATE_SIGNING_ENABLED))) {
+			cERROR(1, "signing required but server lacks support");
+			rc = -EOPNOTSUPP;
+			goto neg_exit;
+		}
+		server->sec_mode |= SECMODE_SIGN_REQUIRED;
+	} else if (sec_flags & CIFSSEC_MAY_SIGN) {
+		cFYI(1, "Signing optional");
+		if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
+			cFYI(1, "Server requires signing");
+			server->sec_mode |= SECMODE_SIGN_REQUIRED;
+		} else {
+			server->sec_mode &=
+				~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
+		}
+	} else {
+		cFYI(1, "Signing disabled");
+		if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
+			cERROR(1, "Server requires packet signing to be enabled"
+				  " in /proc/fs/cifs/SecurityFlags.");
+			rc = -EOPNOTSUPP;
+			goto neg_exit;
+		}
+		server->sec_mode &=
+			~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
+	}
+
 #ifdef CONFIG_SMB2_ASN1  /* BB REMOVEME when updated asn1.c ready */
 	rc = decode_neg_token_init(security_blob, blob_length,
 				   &server->sec_type);
@@ -669,6 +701,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
 
 	 /* since no tcon, smb2_init can not do this, so do here */
 	req->hdr.SessionId = ses->Suid;
+	if (server->sec_mode & SECMODE_SIGN_REQUIRED)
+		req->hdr.Flags |= SMB2_FLAGS_SIGNED;
 
 	rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0);
 	/*
@@ -1268,10 +1302,16 @@ smb2_readv_callback(struct mid_q_entry *mid)
 	case MID_RESPONSE_RECEIVED:
 		credits_received = le16_to_cpu(buf->CreditRequest);
 		/* result already set, check signature */
-		/* if (server->sec_mode &
-		    (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-			if (smb2_verify_signature(mid->resp_buf, server))
-				cERROR(1, "Unexpected SMB signature"); */
+		if (server->sec_mode &
+		    (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
+			int rc;
+
+			rc = smb2_verify_signature2(rdata->iov, rdata->nr_iov,
+						    server);
+			if (rc)
+				cERROR(1, "SMB signature verification returned "
+				       "error = %d", rc);
+		}
 		/* FIXME: should this be counted toward the initiating task? */
 		task_io_account_read(rdata->bytes);
 		cifs_stats_bytes_read(tcon, rdata->bytes);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 192d0c7d91eb..dbbdc39fa209 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -39,6 +39,8 @@ extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
 					  struct cifs_sb_info *cifs_sb);
 
+extern int smb2_verify_signature2(struct kvec *, unsigned int,
+				  struct TCP_Server_Info *);
 extern int smb2_check_receive(struct mid_q_entry *mid,
 			      struct TCP_Server_Info *server, bool log_error);
 extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 31f5d420b3ea..66479f252ae5 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -36,6 +36,149 @@
 #include "smb2proto.h"
 #include "cifs_debug.h"
 #include "smb2status.h"
+#include "smb2glob.h"
+
+static int
+smb2_calc_signature2(const struct kvec *iov, int n_vec,
+		     struct TCP_Server_Info *server)
+{
+	int i, rc;
+	unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
+	unsigned char *sigptr = smb2_signature;
+	struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+
+	memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
+	memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
+
+	rc = crypto_shash_setkey(server->secmech.hmacsha256,
+		server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
+	if (rc) {
+		cERROR(1, "%s: Could not update with response\n", __func__);
+		return rc;
+	}
+
+	rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
+	if (rc) {
+		cERROR(1, "%s: Could not init md5\n", __func__);
+		return rc;
+	}
+
+	for (i = 0; i < n_vec; i++) {
+		if (iov[i].iov_len == 0)
+			continue;
+		if (iov[i].iov_base == NULL) {
+			cERROR(1, "null iovec entry");
+			return -EIO;
+		}
+		/*
+		 * The first entry includes a length field (which does not get
+		 * signed that occupies the first 4 bytes before the header).
+		 */
+		if (i == 0) {
+			if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
+				break; /* nothing to sign or corrupt header */
+			rc =
+			crypto_shash_update(
+				&server->secmech.sdeschmacsha256->shash,
+				iov[i].iov_base + 4, iov[i].iov_len - 4);
+		} else {
+			rc =
+			crypto_shash_update(
+				&server->secmech.sdeschmacsha256->shash,
+				iov[i].iov_base, iov[i].iov_len);
+		}
+		if (rc) {
+			cERROR(1, "%s: Could not update with payload\n",
+							__func__);
+			return rc;
+		}
+	}
+
+	rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
+				sigptr);
+	if (rc)
+		cERROR(1, "%s: Could not generate sha256 hash\n", __func__);
+
+	memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+
+	return rc;
+}
+
+/* must be called with server->srv_mutex held */
+static int
+smb2_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server)
+{
+	int rc = 0;
+	struct smb2_hdr *smb2_pdu = iov[0].iov_base;
+
+	if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) ||
+	    server->tcpStatus == CifsNeedNegotiate)
+		return rc;
+
+	if (!server->session_estab) {
+		strncpy(smb2_pdu->Signature, "BSRSPYL", 8);
+		return rc;
+	}
+
+	rc = smb2_calc_signature2(iov, n_vec, server);
+
+	return rc;
+}
+
+int
+smb2_verify_signature2(struct kvec *iov, unsigned int n_vec,
+		       struct TCP_Server_Info *server)
+{
+	unsigned int rc;
+	char server_response_sig[16];
+	struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+
+	if ((smb2_pdu->Command == SMB2_NEGOTIATE) ||
+	    (smb2_pdu->Command == SMB2_OPLOCK_BREAK) ||
+	    (!server->session_estab))
+		return 0;
+
+	/*
+	 * BB what if signatures are supposed to be on for session but
+	 * server does not send one? BB
+	 */
+
+	/* Do not need to verify session setups with signature "BSRSPYL " */
+	if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0)
+		cFYI(1, "dummy signature received for smb command 0x%x",
+			smb2_pdu->Command);
+
+	/*
+	 * Save off the origiginal signature so we can modify the smb and check
+	 * our calculated signature against what the server sent.
+	 */
+	memcpy(server_response_sig, smb2_pdu->Signature, SMB2_SIGNATURE_SIZE);
+
+	memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE);
+
+	mutex_lock(&server->srv_mutex);
+	rc = smb2_calc_signature2(iov, n_vec, server);
+	mutex_unlock(&server->srv_mutex);
+
+	if (rc)
+		return rc;
+
+	if (memcmp(server_response_sig, smb2_pdu->Signature,
+		   SMB2_SIGNATURE_SIZE))
+		return -EACCES;
+	else
+		return 0;
+}
+
+static int
+smb2_verify_signature(struct smb2_hdr *smb2_pdu, struct TCP_Server_Info *server)
+{
+	struct kvec iov;
+
+	iov.iov_base = (char *)smb2_pdu;
+	iov.iov_len = get_rfc1002_length(smb2_pdu) + 4;
+	return smb2_verify_signature2(&iov, 1, server);
+}
 
 /*
  * Set message id for the request. Should be called after wait_for_free_request
@@ -118,12 +261,15 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
 
 	dump_smb(mid->resp_buf, min_t(u32, 80, len));
 	/* convert the length into a more usable form */
-	/* BB - uncomment with SMB2 signing implementation */
-	/* if ((len > 24) &&
+	if ((len > 24) &&
 	    (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) {
-		if (smb2_verify_signature(mid->resp_buf, server))
-			cERROR(1, "Unexpected SMB signature");
-	} */
+		int rc;
+
+		rc = smb2_verify_signature(mid->resp_buf, server);
+		if (rc)
+			cERROR(1, "SMB signature verification returned error = "
+			       "%d", rc);
+	}
 
 	return map_smb2_to_linux_error(mid->resp_buf, log_error);
 }
@@ -141,9 +287,9 @@ smb2_setup_request(struct cifs_ses *ses, struct kvec *iov,
 	rc = smb2_get_mid_entry(ses, hdr, &mid);
 	if (rc)
 		return rc;
-	/* rc = smb2_sign_smb2(iov, nvec, ses->server);
+	rc = smb2_sign_smb2(iov, nvec, ses->server);
 	if (rc)
-		delete_mid(mid); */
+		cifs_delete_mid(mid);
 	*ret_mid = mid;
 	return rc;
 }
@@ -162,11 +308,12 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
 	if (mid == NULL)
 		return -ENOMEM;
 
-	/* rc = smb2_sign_smb2(iov, nvec, server);
+	rc = smb2_sign_smb2(iov, nvec, server);
 	if (rc) {
 		DeleteMidQEntry(mid);
 		return rc;
-	}*/
+	}
+
 	*ret_mid = mid;
 	return rc;
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index d9b639b95fa8..c4d7825dfc0a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -109,8 +109,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 	mempool_free(midEntry, cifs_mid_poolp);
 }
 
-static void
-delete_mid(struct mid_q_entry *mid)
+void
+cifs_delete_mid(struct mid_q_entry *mid)
 {
 	spin_lock(&GlobalMid_Lock);
 	list_del(&mid->qhead);
@@ -419,7 +419,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 	if (rc == 0)
 		return 0;
 
-	delete_mid(mid);
+	cifs_delete_mid(mid);
 	add_credits(server, 1, optype);
 	wake_up(&server->request_q);
 	return rc;
@@ -532,7 +532,7 @@ cifs_setup_request(struct cifs_ses *ses, struct kvec *iov,
 		return rc;
 	rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number);
 	if (rc)
-		delete_mid(mid);
+		cifs_delete_mid(mid);
 	*ret_mid = mid;
 	return rc;
 }
@@ -652,11 +652,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 	rc = ses->server->ops->check_receive(midQ, ses->server,
 					     flags & CIFS_LOG_ERROR);
 
-	/* mark it so buf will not be freed by delete_mid */
+	/* mark it so buf will not be freed by cifs_delete_mid */
 	if ((flags & CIFS_NO_RESP) == 0)
 		midQ->resp_buf = NULL;
 out:
-	delete_mid(midQ);
+	cifs_delete_mid(midQ);
 	add_credits(ses->server, credits, optype);
 
 	return rc;
@@ -762,7 +762,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
 	memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
 	rc = cifs_check_receive(midQ, ses->server, 0);
 out:
-	delete_mid(midQ);
+	cifs_delete_mid(midQ);
 	add_credits(ses->server, 1, 0);
 
 	return rc;
@@ -846,7 +846,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
 	if (rc) {
-		delete_mid(midQ);
+		cifs_delete_mid(midQ);
 		mutex_unlock(&ses->server->srv_mutex);
 		return rc;
 	}
@@ -859,7 +859,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 	mutex_unlock(&ses->server->srv_mutex);
 
 	if (rc < 0) {
-		delete_mid(midQ);
+		cifs_delete_mid(midQ);
 		return rc;
 	}
 
@@ -880,7 +880,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 			   blocking lock to return. */
 			rc = send_cancel(ses->server, in_buf, midQ);
 			if (rc) {
-				delete_mid(midQ);
+				cifs_delete_mid(midQ);
 				return rc;
 			}
 		} else {
@@ -892,7 +892,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 			/* If we get -ENOLCK back the lock may have
 			   already been removed. Don't exit in this case. */
 			if (rc && rc != -ENOLCK) {
-				delete_mid(midQ);
+				cifs_delete_mid(midQ);
 				return rc;
 			}
 		}
@@ -929,7 +929,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 	memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
 	rc = cifs_check_receive(midQ, ses->server, 0);
 out:
-	delete_mid(midQ);
+	cifs_delete_mid(midQ);
 	if (rstart && rc == -EACCES)
 		return -ERESTARTSYS;
 	return rc;
-- 
cgit 


From 8ceb984379462f94bdebef3288d569c6e1f912ea Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:30 -0700
Subject: CIFS: Move rename to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  3 ++
 fs/cifs/cifsproto.h |  5 ++-
 fs/cifs/cifssmb.c   | 22 +++++++------
 fs/cifs/inode.c     | 92 ++++++++++++++++++++++++++++-------------------------
 fs/cifs/smb1ops.c   |  1 +
 5 files changed, 66 insertions(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6217df707909..a0105c547ffd 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -275,6 +275,9 @@ struct smb_version_operations {
 	/* open, rename and delete file */
 	int (*rename_pending_delete)(const char *, struct dentry *,
 				     const unsigned int);
+	/* send rename request */
+	int (*rename)(const unsigned int, struct cifs_tcon *, const char *,
+		      const char *, struct cifs_sb_info *);
 	/* open a file for non-posix mounts */
 	int (*open)(const unsigned int, struct cifs_tcon *, const char *, int,
 		    int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 541738fcaebc..eecd233c6912 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -310,9 +310,8 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon,
 extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon,
 			  const char *name, struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
-			const char *fromName, const char *toName,
-			const struct nls_table *nls_codepage,
-			int remap_special_chars);
+			 const char *from_name, const char *to_name,
+			 struct cifs_sb_info *cifs_sb);
 extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon,
 				 int netfid, const char *target_name,
 				 const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 733119dad493..b8cd335d6f11 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2531,8 +2531,8 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id)
 
 int
 CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
-	      const char *fromName, const char *toName,
-	      const struct nls_table *nls_codepage, int remap)
+	      const char *from_name, const char *to_name,
+	      struct cifs_sb_info *cifs_sb)
 {
 	int rc = 0;
 	RENAME_REQ *pSMB = NULL;
@@ -2540,6 +2540,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
 	int bytes_returned;
 	int name_len, name_len2;
 	__u16 count;
+	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
 	cFYI(1, "In CIFSSMBRename");
 renameRetry:
@@ -2554,9 +2555,9 @@ renameRetry:
 			ATTR_DIRECTORY);
 
 	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
-		name_len =
-		    cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName,
-				       PATH_MAX, nls_codepage, remap);
+		name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName,
+					      from_name, PATH_MAX,
+					      cifs_sb->local_nls, remap);
 		name_len++;	/* trailing null */
 		name_len *= 2;
 		pSMB->OldFileName[name_len] = 0x04;	/* pad */
@@ -2564,17 +2565,18 @@ renameRetry:
 		pSMB->OldFileName[name_len + 1] = 0x00;
 		name_len2 =
 		    cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2],
-				       toName, PATH_MAX, nls_codepage, remap);
+				       to_name, PATH_MAX, cifs_sb->local_nls,
+				       remap);
 		name_len2 += 1 /* trailing null */  + 1 /* Signature word */ ;
 		name_len2 *= 2;	/* convert to bytes */
 	} else {	/* BB improve the check for buffer overruns BB */
-		name_len = strnlen(fromName, PATH_MAX);
+		name_len = strnlen(from_name, PATH_MAX);
 		name_len++;	/* trailing null */
-		strncpy(pSMB->OldFileName, fromName, name_len);
-		name_len2 = strnlen(toName, PATH_MAX);
+		strncpy(pSMB->OldFileName, from_name, name_len);
+		name_len2 = strnlen(to_name, PATH_MAX);
 		name_len2++;	/* trailing null */
 		pSMB->OldFileName[name_len] = 0x04;  /* 2nd buffer format */
-		strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2);
+		strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2);
 		name_len2++;	/* trailing null */
 		name_len2++;	/* signature byte */
 	}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index c6f6b02cf3b5..2f3235f08c3f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1512,29 +1512,32 @@ rmdir_exit:
 }
 
 static int
-cifs_do_rename(unsigned int xid, struct dentry *from_dentry,
-	       const char *fromPath, struct dentry *to_dentry,
-	       const char *toPath)
+cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
+	       const char *from_path, struct dentry *to_dentry,
+	       const char *to_path)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb);
 	struct tcon_link *tlink;
-	struct cifs_tcon *pTcon;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	__u16 srcfid;
 	int oplock, rc;
 
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	pTcon = tlink_tcon(tlink);
+	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
+
+	if (!server->ops->rename)
+		return -ENOSYS;
 
 	/* try path-based rename first */
-	rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls,
-			   cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
+	rc = server->ops->rename(xid, tcon, from_path, to_path, cifs_sb);
 
 	/*
-	 * don't bother with rename by filehandle unless file is busy and
-	 * source Note that cross directory moves do not work with
+	 * Don't bother with rename by filehandle unless file is busy and
+	 * source. Note that cross directory moves do not work with
 	 * rename by filehandle to various Windows servers.
 	 */
 	if (rc == 0 || rc != -ETXTBSY)
@@ -1545,29 +1548,28 @@ cifs_do_rename(unsigned int xid, struct dentry *from_dentry,
 		goto do_rename_exit;
 
 	/* open the file to be renamed -- we need DELETE perms */
-	rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
+	rc = CIFSSMBOpen(xid, tcon, from_path, FILE_OPEN, DELETE,
 			 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
 			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
-
 	if (rc == 0) {
-		rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid,
+		rc = CIFSSMBRenameOpenFile(xid, tcon, srcfid,
 				(const char *) to_dentry->d_name.name,
 				cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-
-		CIFSSMBClose(xid, pTcon, srcfid);
+		CIFSSMBClose(xid, tcon, srcfid);
 	}
 do_rename_exit:
 	cifs_put_tlink(tlink);
 	return rc;
 }
 
-int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
-	struct inode *target_dir, struct dentry *target_dentry)
+int
+cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
+	    struct inode *target_dir, struct dentry *target_dentry)
 {
-	char *fromName = NULL;
-	char *toName = NULL;
+	char *from_name = NULL;
+	char *to_name = NULL;
 	struct cifs_sb_info *cifs_sb;
 	struct tcon_link *tlink;
 	struct cifs_tcon *tcon;
@@ -1588,25 +1590,25 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 	 * we already have the rename sem so we do not need to
 	 * grab it again here to protect the path integrity
 	 */
-	fromName = build_path_from_dentry(source_dentry);
-	if (fromName == NULL) {
+	from_name = build_path_from_dentry(source_dentry);
+	if (from_name == NULL) {
 		rc = -ENOMEM;
 		goto cifs_rename_exit;
 	}
 
-	toName = build_path_from_dentry(target_dentry);
-	if (toName == NULL) {
+	to_name = build_path_from_dentry(target_dentry);
+	if (to_name == NULL) {
 		rc = -ENOMEM;
 		goto cifs_rename_exit;
 	}
 
-	rc = cifs_do_rename(xid, source_dentry, fromName,
-			    target_dentry, toName);
+	rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
+			    to_name);
 
 	if (rc == -EEXIST && tcon->unix_ext) {
 		/*
-		 * Are src and dst hardlinks of same inode? We can
-		 * only tell with unix extensions enabled
+		 * Are src and dst hardlinks of same inode? We can only tell
+		 * with unix extensions enabled.
 		 */
 		info_buf_source =
 			kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO),
@@ -1617,19 +1619,19 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 		}
 
 		info_buf_target = info_buf_source + 1;
-		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
-					info_buf_source,
-					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, from_name,
+					     info_buf_source,
+					     cifs_sb->local_nls,
+					     cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 		if (tmprc != 0)
 			goto unlink_target;
 
-		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName,
-					info_buf_target,
-					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
+		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, to_name,
+					     info_buf_target,
+					     cifs_sb->local_nls,
+					     cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 		if (tmprc == 0 && (info_buf_source->UniqueId ==
 				   info_buf_target->UniqueId)) {
@@ -1637,8 +1639,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 			rc = 0;
 			goto cifs_rename_exit;
 		}
-	} /* else ... BB we could add the same check for Windows by
-		     checking the UniqueId via FILE_INTERNAL_INFO */
+	}
+	/*
+	 * else ... BB we could add the same check for Windows by
+	 * checking the UniqueId via FILE_INTERNAL_INFO
+	 */
 
 unlink_target:
 	/* Try unlinking the target dentry if it's not negative */
@@ -1646,15 +1651,14 @@ unlink_target:
 		tmprc = cifs_unlink(target_dir, target_dentry);
 		if (tmprc)
 			goto cifs_rename_exit;
-
-		rc = cifs_do_rename(xid, source_dentry, fromName,
-				    target_dentry, toName);
+		rc = cifs_do_rename(xid, source_dentry, from_name,
+				    target_dentry, to_name);
 	}
 
 cifs_rename_exit:
 	kfree(info_buf_source);
-	kfree(fromName);
-	kfree(toName);
+	kfree(from_name);
+	kfree(to_name);
 	free_xid(xid);
 	cifs_put_tlink(tlink);
 	return rc;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index aa55c2fbf793..377392003655 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -800,6 +800,7 @@ struct smb_version_operations smb1_operations = {
 	.rmdir = CIFSSMBRmDir,
 	.unlink = CIFSSMBDelFile,
 	.rename_pending_delete = cifs_rename_pending_delete,
+	.rename = CIFSSMBRename,
 	.open = cifs_open_file,
 	.set_fid = cifs_set_fid,
 	.close = cifs_close_file,
-- 
cgit 


From 35143eb5c2e3ae6c91b29144449d23f05f573796 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:31 -0700
Subject: CIFS: Add SMB2 support for rename operation

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2inode.c |  25 ++++++++++++
 fs/cifs/smb2ops.c   |   1 +
 fs/cifs/smb2pdu.c   | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   |  28 ++++++++++++++
 fs/cifs/smb2proto.h |   6 +++
 5 files changed, 167 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index ee3a1ef686dd..a6952bafe331 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -74,6 +74,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 		 * SMB2_open() call.
 		 */
 		break;
+	case SMB2_OP_RENAME:
+		tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid,
+				    (__le16 *)data);
+		break;
 	default:
 		cERROR(1, "Invalid command");
 		break;
@@ -170,3 +174,24 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 				  0, CREATE_DELETE_ON_CLOSE, NULL,
 				  SMB2_OP_DELETE);
 }
+
+int
+smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
+		 const char *from_name, const char *to_name,
+		 struct cifs_sb_info *cifs_sb)
+{
+	__le16 *smb2_to_name = NULL;
+	int rc;
+
+	smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb);
+	if (smb2_to_name == NULL) {
+		rc = -ENOMEM;
+		goto smb2_rename_path;
+	}
+
+	rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, DELETE,
+				FILE_OPEN, 0, 0, smb2_to_name, SMB2_OP_RENAME);
+smb2_rename_path:
+	kfree(smb2_to_name);
+	return rc;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f9c3dbee9010..5aaccb0f3aae 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -451,6 +451,7 @@ struct smb_version_operations smb21_operations = {
 	.mkdir_setinfo = smb2_mkdir_setinfo,
 	.rmdir = smb2_rmdir,
 	.unlink = smb2_unlink,
+	.rename = smb2_rename_path,
 	.open = smb2_open_file,
 	.set_fid = smb2_set_fid,
 	.close = smb2_close_file,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 30c92c847fe8..1dc11ce7934e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1602,3 +1602,110 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 	}
 	return rc;
 }
+
+static int
+send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+	       u64 persistent_fid, u64 volatile_fid, int info_class,
+	       unsigned int num, void **data, unsigned int *size)
+{
+	struct smb2_set_info_req *req;
+	struct smb2_set_info_rsp *rsp = NULL;
+	struct kvec *iov;
+	int rc = 0;
+	int resp_buftype;
+	unsigned int i;
+	struct TCP_Server_Info *server;
+	struct cifs_ses *ses = tcon->ses;
+
+	if (ses && (ses->server))
+		server = ses->server;
+	else
+		return -EIO;
+
+	if (!num)
+		return -EINVAL;
+
+	iov = kmalloc(sizeof(struct kvec) * num, GFP_KERNEL);
+	if (!iov)
+		return -ENOMEM;
+
+	rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req);
+	if (rc) {
+		kfree(iov);
+		return rc;
+	}
+
+	req->InfoType = SMB2_O_INFO_FILE;
+	req->FileInfoClass = info_class;
+	req->PersistentFileId = persistent_fid;
+	req->VolatileFileId = volatile_fid;
+
+	/* 4 for RFC1001 length and 1 for Buffer */
+	req->BufferOffset =
+			cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4);
+	req->BufferLength = cpu_to_le32(*size);
+
+	inc_rfc1001_len(req, *size - 1 /* Buffer */);
+
+	memcpy(req->Buffer, *data, *size);
+
+	iov[0].iov_base = (char *)req;
+	/* 4 for RFC1001 length */
+	iov[0].iov_len = get_rfc1002_length(req) + 4;
+
+	for (i = 1; i < num; i++) {
+		inc_rfc1001_len(req, size[i]);
+		le32_add_cpu(&req->BufferLength, size[i]);
+		iov[i].iov_base = (char *)data[i];
+		iov[i].iov_len = size[i];
+	}
+
+	rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
+	rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
+
+	if (rc != 0) {
+		cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
+		goto out;
+	}
+
+	if (rsp == NULL) {
+		rc = -EIO;
+		goto out;
+	}
+
+out:
+	free_rsp_buf(resp_buftype, rsp);
+	kfree(iov);
+	return rc;
+}
+
+int
+SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
+	    u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
+{
+	struct smb2_file_rename_info info;
+	void **data;
+	unsigned int size[2];
+	int rc;
+	int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
+
+	data = kmalloc(sizeof(void *) * 2, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	info.ReplaceIfExists = 1; /* 1 = replace existing target with new */
+			      /* 0 = fail if target already exists */
+	info.RootDirectory = 0;  /* MBZ for network ops (why does spec say?) */
+	info.FileNameLength = cpu_to_le32(len);
+
+	data[0] = &info;
+	size[0] = sizeof(struct smb2_file_rename_info);
+
+	data[1] = target_file;
+	size[1] = len + 2 /* null */;
+
+	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
+			   FILE_RENAME_INFORMATION, 2, data, size);
+	kfree(data);
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 21ec9ed280f9..b03ca37d0d58 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -568,6 +568,25 @@ struct smb2_query_info_rsp {
 	__u8   Buffer[1];
 } __packed;
 
+struct smb2_set_info_req {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 33 */
+	__u8   InfoType;
+	__u8   FileInfoClass;
+	__le32 BufferLength;
+	__le16 BufferOffset;
+	__u16  Reserved;
+	__le32 AdditionalInformation;
+	__u64  PersistentFileId; /* opaque endianness */
+	__u64  VolatileFileId; /* opaque endianness */
+	__u8   Buffer[1];
+} __packed;
+
+struct smb2_set_info_rsp {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 2 */
+} __packed;
+
 /*
  *	PDU infolevel structure definitions
  *	BB consider moving to a different header
@@ -625,6 +644,15 @@ struct smb2_file_internal_info {
 	__le64 IndexNumber;
 } __packed; /* level 6 Query */
 
+struct smb2_file_rename_info { /* encoding of request for level 10 */
+	__u8   ReplaceIfExists; /* 1 = replace existing target with new */
+				/* 0 = fail if target already exists */
+	__u8   Reserved[7];
+	__u64  RootDirectory;  /* MBZ for network operations (why says spec?) */
+	__le32 FileNameLength;
+	char   FileName[0];     /* New name to be assigned */
+} __packed; /* level 10 Set */
+
 /*
  * This level 18, although with struct with same name is different from cifs
  * level 0x107. Level 0x107 has an extra u64 between AccessFlags and
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index dbbdc39fa209..b43036e8ad2a 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -65,6 +65,9 @@ extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
 		      const char *name, struct cifs_sb_info *cifs_sb);
 extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon,
 		       const char *name, struct cifs_sb_info *cifs_sb);
+extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
+			    const char *from_name, const char *to_name,
+			    struct cifs_sb_info *cifs_sb);
 
 extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
 			  const char *full_path, int disposition,
@@ -106,5 +109,8 @@ extern int smb2_async_writev(struct cifs_writedata *wdata);
 extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 		      unsigned int *nbytes, struct kvec *iov, int n_vec);
 extern int SMB2_echo(struct TCP_Server_Info *server);
+extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
+		       u64 persistent_fid, u64 volatile_fid,
+		       __le16 *target_file);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From d6e906f1b571d15ff5778a049802f6ef6f70159a Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Tue, 18 Sep 2012 16:20:31 -0700
Subject: CIFS: Move hardlink to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  4 +++
 fs/cifs/cifsproto.h |  8 +++---
 fs/cifs/cifssmb.c   | 20 ++++++++-------
 fs/cifs/link.c      | 74 +++++++++++++++++++++++++++++++----------------------
 fs/cifs/smb1ops.c   |  1 +
 5 files changed, 63 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a0105c547ffd..8595d498ad80 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -278,6 +278,10 @@ struct smb_version_operations {
 	/* send rename request */
 	int (*rename)(const unsigned int, struct cifs_tcon *, const char *,
 		      const char *, struct cifs_sb_info *);
+	/* send create hardlink request */
+	int (*create_hardlink)(const unsigned int, struct cifs_tcon *,
+			       const char *, const char *,
+			       struct cifs_sb_info *);
 	/* open a file for non-posix mounts */
 	int (*open)(const unsigned int, struct cifs_tcon *, const char *, int,
 		    int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index eecd233c6912..3c50555c7735 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -316,11 +316,9 @@ extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon,
 				 int netfid, const char *target_name,
 				 const struct nls_table *nls_codepage,
 				 int remap_special_chars);
-extern int CIFSCreateHardLink(const unsigned int xid,
-			struct cifs_tcon *tcon,
-			const char *fromName, const char *toName,
-			const struct nls_table *nls_codepage,
-			int remap_special_chars);
+extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon,
+			      const char *from_name, const char *to_name,
+			      struct cifs_sb_info *cifs_sb);
 extern int CIFSUnixCreateHardLink(const unsigned int xid,
 			struct cifs_tcon *tcon,
 			const char *fromName, const char *toName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b8cd335d6f11..eb3d2cf76e6e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2924,8 +2924,8 @@ createHardLinkRetry:
 
 int
 CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon,
-		   const char *fromName, const char *toName,
-		   const struct nls_table *nls_codepage, int remap)
+		   const char *from_name, const char *to_name,
+		   struct cifs_sb_info *cifs_sb)
 {
 	int rc = 0;
 	NT_RENAME_REQ *pSMB = NULL;
@@ -2933,6 +2933,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon,
 	int bytes_returned;
 	int name_len, name_len2;
 	__u16 count;
+	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
 
 	cFYI(1, "In CIFSCreateHardLink");
 winCreateHardLinkRetry:
@@ -2952,8 +2953,8 @@ winCreateHardLinkRetry:
 
 	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
 		name_len =
-		    cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName,
-				       PATH_MAX, nls_codepage, remap);
+		    cifsConvertToUTF16((__le16 *) pSMB->OldFileName, from_name,
+				       PATH_MAX, cifs_sb->local_nls, remap);
 		name_len++;	/* trailing null */
 		name_len *= 2;
 
@@ -2962,17 +2963,18 @@ winCreateHardLinkRetry:
 		pSMB->OldFileName[name_len + 1] = 0x00; /* pad */
 		name_len2 =
 		    cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2],
-				       toName, PATH_MAX, nls_codepage, remap);
+				       to_name, PATH_MAX, cifs_sb->local_nls,
+				       remap);
 		name_len2 += 1 /* trailing null */  + 1 /* Signature word */ ;
 		name_len2 *= 2;	/* convert to bytes */
 	} else {	/* BB improve the check for buffer overruns BB */
-		name_len = strnlen(fromName, PATH_MAX);
+		name_len = strnlen(from_name, PATH_MAX);
 		name_len++;	/* trailing null */
-		strncpy(pSMB->OldFileName, fromName, name_len);
-		name_len2 = strnlen(toName, PATH_MAX);
+		strncpy(pSMB->OldFileName, from_name, name_len);
+		name_len2 = strnlen(to_name, PATH_MAX);
 		name_len2++;	/* trailing null */
 		pSMB->OldFileName[name_len] = 0x04;	/* 2nd buffer format */
-		strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2);
+		strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2);
 		name_len2++;	/* trailing null */
 		name_len2++;	/* signature byte */
 	}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e6ce3b112875..51dc2fb6e854 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -391,72 +391,86 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
 {
 	int rc = -EACCES;
 	unsigned int xid;
-	char *fromName = NULL;
-	char *toName = NULL;
+	char *from_name = NULL;
+	char *to_name = NULL;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct tcon_link *tlink;
-	struct cifs_tcon *pTcon;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 	struct cifsInodeInfo *cifsInode;
 
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	pTcon = tlink_tcon(tlink);
+	tcon = tlink_tcon(tlink);
 
 	xid = get_xid();
 
-	fromName = build_path_from_dentry(old_file);
-	toName = build_path_from_dentry(direntry);
-	if ((fromName == NULL) || (toName == NULL)) {
+	from_name = build_path_from_dentry(old_file);
+	to_name = build_path_from_dentry(direntry);
+	if ((from_name == NULL) || (to_name == NULL)) {
 		rc = -ENOMEM;
 		goto cifs_hl_exit;
 	}
 
-	if (pTcon->unix_ext)
-		rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName,
+	if (tcon->unix_ext)
+		rc = CIFSUnixCreateHardLink(xid, tcon, from_name, to_name,
 					    cifs_sb->local_nls,
 					    cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 	else {
-		rc = CIFSCreateHardLink(xid, pTcon, fromName, toName,
-					cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
+		server = tcon->ses->server;
+		if (!server->ops->create_hardlink)
+			return -ENOSYS;
+		rc = server->ops->create_hardlink(xid, tcon, from_name, to_name,
+						  cifs_sb);
 		if ((rc == -EIO) || (rc == -EINVAL))
 			rc = -EOPNOTSUPP;
 	}
 
 	d_drop(direntry);	/* force new lookup from server of target */
 
-	/* if source file is cached (oplocked) revalidate will not go to server
-	   until the file is closed or oplock broken so update nlinks locally */
+	/*
+	 * if source file is cached (oplocked) revalidate will not go to server
+	 * until the file is closed or oplock broken so update nlinks locally
+	 */
 	if (old_file->d_inode) {
 		cifsInode = CIFS_I(old_file->d_inode);
 		if (rc == 0) {
 			spin_lock(&old_file->d_inode->i_lock);
 			inc_nlink(old_file->d_inode);
 			spin_unlock(&old_file->d_inode->i_lock);
-/* BB should we make this contingent on superblock flag NOATIME? */
-/*			old_file->d_inode->i_ctime = CURRENT_TIME;*/
-			/* parent dir timestamps will update from srv
-			within a second, would it really be worth it
-			to set the parent dir cifs inode time to zero
-			to force revalidate (faster) for it too? */
+			/*
+			 * BB should we make this contingent on superblock flag
+			 * NOATIME?
+			 */
+			/* old_file->d_inode->i_ctime = CURRENT_TIME; */
+			/*
+			 * parent dir timestamps will update from srv within a
+			 * second, would it really be worth it to set the parent
+			 * dir cifs inode time to zero to force revalidate
+			 * (faster) for it too?
+			 */
 		}
-		/* if not oplocked will force revalidate to get info
-		   on source file from srv */
+		/*
+		 * if not oplocked will force revalidate to get info on source
+		 * file from srv
+		 */
 		cifsInode->time = 0;
 
-		/* Will update parent dir timestamps from srv within a second.
-		   Would it really be worth it to set the parent dir (cifs
-		   inode) time field to zero to force revalidate on parent
-		   directory faster ie
-			CIFS_I(inode)->time = 0;  */
+		/*
+		 * Will update parent dir timestamps from srv within a second.
+		 * Would it really be worth it to set the parent dir (cifs
+		 * inode) time field to zero to force revalidate on parent
+		 * directory faster ie
+		 *
+		 * CIFS_I(inode)->time = 0;
+		 */
 	}
 
 cifs_hl_exit:
-	kfree(fromName);
-	kfree(toName);
+	kfree(from_name);
+	kfree(to_name);
 	free_xid(xid);
 	cifs_put_tlink(tlink);
 	return rc;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 377392003655..e5d63444f0ff 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -801,6 +801,7 @@ struct smb_version_operations smb1_operations = {
 	.unlink = CIFSSMBDelFile,
 	.rename_pending_delete = cifs_rename_pending_delete,
 	.rename = CIFSSMBRename,
+	.create_hardlink = CIFSCreateHardLink,
 	.open = cifs_open_file,
 	.set_fid = cifs_set_fid,
 	.close = cifs_close_file,
-- 
cgit 


From 568798cc6211553e2494a6876fa19d064c822e79 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:31 -0700
Subject: CIFS: Add SMB2 support for hardlink operation

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2glob.h  |  1 +
 fs/cifs/smb2inode.c | 34 ++++++++++++++++++++++++++++------
 fs/cifs/smb2ops.c   |  1 +
 fs/cifs/smb2pdu.c   | 31 +++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   |  9 +++++++++
 fs/cifs/smb2proto.h |  6 ++++++
 6 files changed, 76 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 8635574ea8b6..21555d8744fd 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -40,6 +40,7 @@
 #define SMB2_OP_MKDIR 5
 #define SMB2_OP_RENAME 6
 #define SMB2_OP_DELETE 7
+#define SMB2_OP_HARDLINK 8
 
 /* Used when constructing chained read requests. */
 #define CHAINED_REQUEST 1
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index a6952bafe331..1921c9c87ccd 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -78,6 +78,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 		tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid,
 				    (__le16 *)data);
 		break;
+	case SMB2_OP_HARDLINK:
+		tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid,
+					  volatile_fid, (__le16 *)data);
+		break;
 	default:
 		cERROR(1, "Invalid command");
 		break;
@@ -175,10 +179,10 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 				  SMB2_OP_DELETE);
 }
 
-int
-smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
-		 const char *from_name, const char *to_name,
-		 struct cifs_sb_info *cifs_sb)
+static int
+smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
+		   const char *from_name, const char *to_name,
+		   struct cifs_sb_info *cifs_sb, __u32 access, int command)
 {
 	__le16 *smb2_to_name = NULL;
 	int rc;
@@ -189,9 +193,27 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
 		goto smb2_rename_path;
 	}
 
-	rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, DELETE,
-				FILE_OPEN, 0, 0, smb2_to_name, SMB2_OP_RENAME);
+	rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access,
+				FILE_OPEN, 0, 0, smb2_to_name, command);
 smb2_rename_path:
 	kfree(smb2_to_name);
 	return rc;
 }
+
+int
+smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
+		 const char *from_name, const char *to_name,
+		 struct cifs_sb_info *cifs_sb)
+{
+	return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb,
+				  DELETE, SMB2_OP_RENAME);
+}
+
+int
+smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
+		     const char *from_name, const char *to_name,
+		     struct cifs_sb_info *cifs_sb)
+{
+	return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb,
+				  FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK);
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 5aaccb0f3aae..75693e983e76 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -452,6 +452,7 @@ struct smb_version_operations smb21_operations = {
 	.rmdir = smb2_rmdir,
 	.unlink = smb2_unlink,
 	.rename = smb2_rename_path,
+	.create_hardlink = smb2_create_hardlink,
 	.open = smb2_open_file,
 	.set_fid = smb2_set_fid,
 	.close = smb2_close_file,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1dc11ce7934e..a684c4ab42d6 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1709,3 +1709,34 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 	kfree(data);
 	return rc;
 }
+
+int
+SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
+		  u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
+{
+	struct smb2_file_link_info info;
+	void **data;
+	unsigned int size[2];
+	int rc;
+	int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
+
+	data = kmalloc(sizeof(void *) * 2, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	info.ReplaceIfExists = 0; /* 1 = replace existing link with new */
+			      /* 0 = fail if link already exists */
+	info.RootDirectory = 0;  /* MBZ for network ops (why does spec say?) */
+	info.FileNameLength = cpu_to_le32(len);
+
+	data[0] = &info;
+	size[0] = sizeof(struct smb2_file_link_info);
+
+	data[1] = target_file;
+	size[1] = len + 2 /* null */;
+
+	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
+			   FILE_LINK_INFORMATION, 2, data, size);
+	kfree(data);
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index b03ca37d0d58..0f3c4828cd00 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -653,6 +653,15 @@ struct smb2_file_rename_info { /* encoding of request for level 10 */
 	char   FileName[0];     /* New name to be assigned */
 } __packed; /* level 10 Set */
 
+struct smb2_file_link_info { /* encoding of request for level 11 */
+	__u8   ReplaceIfExists; /* 1 = replace existing link with new */
+				/* 0 = fail if link already exists */
+	__u8   Reserved[7];
+	__u64  RootDirectory;  /* MBZ for network operations (why says spec?) */
+	__le32 FileNameLength;
+	char   FileName[0];     /* Name to be assigned to new link */
+} __packed; /* level 11 Set */
+
 /*
  * This level 18, although with struct with same name is different from cifs
  * level 0x107. Level 0x107 has an extra u64 between AccessFlags and
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index b43036e8ad2a..99f6945c8643 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -68,6 +68,9 @@ extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon,
 extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
 			    const char *from_name, const char *to_name,
 			    struct cifs_sb_info *cifs_sb);
+extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
+				const char *from_name, const char *to_name,
+				struct cifs_sb_info *cifs_sb);
 
 extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
 			  const char *full_path, int disposition,
@@ -112,5 +115,8 @@ extern int SMB2_echo(struct TCP_Server_Info *server);
 extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 		       u64 persistent_fid, u64 volatile_fid,
 		       __le16 *target_file);
+extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
+			     u64 persistent_fid, u64 volatile_fid,
+			     __le16 *target_file);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From d143341815bdc7c45d5289a3ab5743c838332518 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:31 -0700
Subject: CIFS: Move set_file_size to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |   6 +++
 fs/cifs/cifsproto.h |  10 ++---
 fs/cifs/cifssmb.c   |  40 ++++++++++----------
 fs/cifs/inode.c     | 106 +++++++++++++++++++++++++++-------------------------
 fs/cifs/smb1ops.c   |   2 +
 5 files changed, 89 insertions(+), 75 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8595d498ad80..803c21218633 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -252,6 +252,12 @@ struct smb_version_operations {
 	int (*get_srv_inum)(const unsigned int, struct cifs_tcon *,
 			    struct cifs_sb_info *, const char *,
 			    u64 *uniqueid, FILE_ALL_INFO *);
+	/* set size by path */
+	int (*set_path_size)(const unsigned int, struct cifs_tcon *,
+			     const char *, __u64, struct cifs_sb_info *, bool);
+	/* set size by file handle */
+	int (*set_file_size)(const unsigned int, struct cifs_tcon *,
+			     struct cifsFileInfo *, __u64, bool);
 	/* build a full path to the root of the mount */
 	char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *,
 				     struct cifs_tcon *);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 3c50555c7735..3d99fe9afccc 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -270,13 +270,11 @@ extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon,
 			const struct nls_table *nls_codepage);
 #endif /* possibly unneeded function */
 extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
-			const char *fileName, __u64 size,
-			bool setAllocationSizeFlag,
-			const struct nls_table *nls_codepage,
-			int remap_special_chars);
+			 const char *file_name, __u64 size,
+			 struct cifs_sb_info *cifs_sb, bool set_allocation);
 extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon,
-			 __u64 size, __u16 fileHandle, __u32 opener_pid,
-			bool AllocSizeFlag);
+			      struct cifsFileInfo *cfile, __u64 size,
+			      bool set_allocation);
 
 struct cifs_unix_set_info_args {
 	__u64	ctime;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index eb3d2cf76e6e..c4f43cf671dc 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5395,16 +5395,16 @@ QFSPosixRetry:
 }
 
 
-/* We can not use write of zero bytes trick to
-   set file size due to need for large file support.  Also note that
-   this SetPathInfo is preferred to SetFileInfo based method in next
-   routine which is only needed to work around a sharing violation bug
-   in Samba which this routine can run into */
-
+/*
+ * We can not use write of zero bytes trick to set file size due to need for
+ * large file support. Also note that this SetPathInfo is preferred to
+ * SetFileInfo based method in next routine which is only needed to work around
+ * a sharing violation bugin Samba which this routine can run into.
+ */
 int
 CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
-	      const char *fileName, __u64 size, bool SetAllocation,
-	      const struct nls_table *nls_codepage, int remap)
+	      const char *file_name, __u64 size, struct cifs_sb_info *cifs_sb,
+	      bool set_allocation)
 {
 	struct smb_com_transaction2_spi_req *pSMB = NULL;
 	struct smb_com_transaction2_spi_rsp *pSMBr = NULL;
@@ -5412,6 +5412,8 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
 	int name_len;
 	int rc = 0;
 	int bytes_returned = 0;
+	int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
+
 	__u16 params, byte_count, data_count, param_offset, offset;
 
 	cFYI(1, "In SetEOF");
@@ -5423,14 +5425,14 @@ SetEOFRetry:
 
 	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
 		name_len =
-		    cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName,
-				       PATH_MAX, nls_codepage, remap);
+		    cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name,
+				       PATH_MAX, cifs_sb->local_nls, remap);
 		name_len++;	/* trailing null */
 		name_len *= 2;
 	} else {	/* BB improve the check for buffer overruns BB */
-		name_len = strnlen(fileName, PATH_MAX);
+		name_len = strnlen(file_name, PATH_MAX);
 		name_len++;	/* trailing null */
-		strncpy(pSMB->FileName, fileName, name_len);
+		strncpy(pSMB->FileName, file_name, name_len);
 	}
 	params = 6 + name_len;
 	data_count = sizeof(struct file_end_of_file_info);
@@ -5444,7 +5446,7 @@ SetEOFRetry:
 	param_offset = offsetof(struct smb_com_transaction2_spi_req,
 				InformationLevel) - 4;
 	offset = param_offset + params;
-	if (SetAllocation) {
+	if (set_allocation) {
 		if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)
 			pSMB->InformationLevel =
 				cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2);
@@ -5491,8 +5493,8 @@ SetEOFRetry:
 }
 
 int
-CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size,
-		   __u16 fid, __u32 pid_of_opener, bool SetAllocation)
+CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon,
+		   struct cifsFileInfo *cfile, __u64 size, bool set_allocation)
 {
 	struct smb_com_transaction2_sfi_req *pSMB  = NULL;
 	struct file_end_of_file_info *parm_data;
@@ -5506,8 +5508,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size,
 	if (rc)
 		return rc;
 
-	pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
-	pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
+	pSMB->hdr.Pid = cpu_to_le16((__u16)cfile->pid);
+	pSMB->hdr.PidHigh = cpu_to_le16((__u16)(cfile->pid >> 16));
 
 	params = 6;
 	pSMB->MaxSetupCount = 0;
@@ -5536,8 +5538,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size,
 				+ offset);
 	pSMB->DataOffset = cpu_to_le16(offset);
 	parm_data->FileSize = cpu_to_le64(size);
-	pSMB->Fid = fid;
-	if (SetAllocation) {
+	pSMB->Fid = cfile->fid.netfid;
+	if (set_allocation) {
 		if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)
 			pSMB->InformationLevel =
 				cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2f3235f08c3f..85e1b0a405a8 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1883,7 +1883,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct tcon_link *tlink = NULL;
-	struct cifs_tcon *pTcon = NULL;
+	struct cifs_tcon *tcon = NULL;
+	struct TCP_Server_Info *server;
 	struct cifs_io_parms io_parms;
 
 	/*
@@ -1897,19 +1898,21 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 	 */
 	open_file = find_writable_file(cifsInode, true);
 	if (open_file) {
-		__u16 nfid = open_file->fid.netfid;
-		__u32 npid = open_file->pid;
-		pTcon = tlink_tcon(open_file->tlink);
-		rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
-					npid, false);
+		tcon = tlink_tcon(open_file->tlink);
+		server = tcon->ses->server;
+		if (server->ops->set_file_size)
+			rc = server->ops->set_file_size(xid, tcon, open_file,
+							attrs->ia_size, false);
+		else
+			rc = -ENOSYS;
 		cifsFileInfo_put(open_file);
 		cFYI(1, "SetFSize for attrs rc = %d", rc);
 		if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 			unsigned int bytes_written;
 
-			io_parms.netfid = nfid;
-			io_parms.pid = npid;
-			io_parms.tcon = pTcon;
+			io_parms.netfid = open_file->fid.netfid;
+			io_parms.pid = open_file->pid;
+			io_parms.tcon = tcon;
 			io_parms.offset = 0;
 			io_parms.length = attrs->ia_size;
 			rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
@@ -1919,52 +1922,55 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 	} else
 		rc = -EINVAL;
 
-	if (rc != 0) {
-		if (pTcon == NULL) {
-			tlink = cifs_sb_tlink(cifs_sb);
-			if (IS_ERR(tlink))
-				return PTR_ERR(tlink);
-			pTcon = tlink_tcon(tlink);
-		}
+	if (!rc)
+		goto set_size_out;
 
-		/* Set file size by pathname rather than by handle
-		   either because no valid, writeable file handle for
-		   it was found or because there was an error setting
-		   it by handle */
-		rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,
-				   false, cifs_sb->local_nls,
+	if (tcon == NULL) {
+		tlink = cifs_sb_tlink(cifs_sb);
+		if (IS_ERR(tlink))
+			return PTR_ERR(tlink);
+		tcon = tlink_tcon(tlink);
+		server = tcon->ses->server;
+	}
+
+	/*
+	 * Set file size by pathname rather than by handle either because no
+	 * valid, writeable file handle for it was found or because there was
+	 * an error setting it by handle.
+	 */
+	if (server->ops->set_path_size)
+		rc = server->ops->set_path_size(xid, tcon, full_path,
+						attrs->ia_size, cifs_sb, false);
+	else
+		rc = -ENOSYS;
+	cFYI(1, "SetEOF by path (setattrs) rc = %d", rc);
+	if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+		__u16 netfid;
+		int oplock = 0;
+
+		rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN,
+				   GENERIC_WRITE, CREATE_NOT_DIR, &netfid,
+				   &oplock, NULL, cifs_sb->local_nls,
 				   cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		cFYI(1, "SetEOF by path (setattrs) rc = %d", rc);
-		if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-			__u16 netfid;
-			int oplock = 0;
-
-			rc = SMBLegacyOpen(xid, pTcon, full_path,
-				FILE_OPEN, GENERIC_WRITE,
-				CREATE_NOT_DIR, &netfid, &oplock, NULL,
-				cifs_sb->local_nls,
-				cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-			if (rc == 0) {
-				unsigned int bytes_written;
-
-				io_parms.netfid = netfid;
-				io_parms.pid = current->tgid;
-				io_parms.tcon = pTcon;
-				io_parms.offset = 0;
-				io_parms.length = attrs->ia_size;
-				rc = CIFSSMBWrite(xid, &io_parms,
-						  &bytes_written,
-						  NULL, NULL,  1);
-				cFYI(1, "wrt seteof rc %d", rc);
-				CIFSSMBClose(xid, pTcon, netfid);
-			}
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+		if (rc == 0) {
+			unsigned int bytes_written;
+
+			io_parms.netfid = netfid;
+			io_parms.pid = current->tgid;
+			io_parms.tcon = tcon;
+			io_parms.offset = 0;
+			io_parms.length = attrs->ia_size;
+			rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL,
+					  NULL,  1);
+			cFYI(1, "wrt seteof rc %d", rc);
+			CIFSSMBClose(xid, tcon, netfid);
 		}
-		if (tlink)
-			cifs_put_tlink(tlink);
 	}
+	if (tlink)
+		cifs_put_tlink(tlink);
 
+set_size_out:
 	if (rc == 0) {
 		cifsInode->server_eof = attrs->ia_size;
 		cifs_setsize(inode, attrs->ia_size);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index e5d63444f0ff..b73d2750296d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -793,6 +793,8 @@ struct smb_version_operations smb1_operations = {
 	.query_path_info = cifs_query_path_info,
 	.query_file_info = cifs_query_file_info,
 	.get_srv_inum = cifs_get_srv_inum,
+	.set_path_size = CIFSSMBSetEOF,
+	.set_file_size = CIFSSMBSetFileSize,
 	.build_path_to_root = cifs_build_path_to_root,
 	.echo = CIFSSMBEcho,
 	.mkdir = CIFSSMBMkDir,
-- 
cgit 


From c839ff244ba2d54d0933596e29a4b03e3c846a9a Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:32 -0700
Subject: CIFS: Add SMB2 support for set_file_size

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2glob.h  |  1 +
 fs/cifs/smb2inode.c | 15 +++++++++++++++
 fs/cifs/smb2ops.c   | 11 +++++++++++
 fs/cifs/smb2pdu.c   | 26 +++++++++++++++++++++++---
 fs/cifs/smb2pdu.h   |  4 ++++
 fs/cifs/smb2proto.h |  6 ++++++
 6 files changed, 60 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 21555d8744fd..05d429b1c37e 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -41,6 +41,7 @@
 #define SMB2_OP_RENAME 6
 #define SMB2_OP_DELETE 7
 #define SMB2_OP_HARDLINK 8
+#define SMB2_OP_SET_EOF 9
 
 /* Used when constructing chained read requests. */
 #define CHAINED_REQUEST 1
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1921c9c87ccd..290583092293 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -82,6 +82,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 		tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid,
 					  volatile_fid, (__le16 *)data);
 		break;
+	case SMB2_OP_SET_EOF:
+		tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid,
+				     current->tgid, (__le64 *)data);
+		break;
 	default:
 		cERROR(1, "Invalid command");
 		break;
@@ -217,3 +221,14 @@ smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 	return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb,
 				  FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK);
 }
+
+int
+smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
+		   const char *full_path, __u64 size,
+		   struct cifs_sb_info *cifs_sb, bool set_alloc)
+{
+	__le64 eof = cpu_to_le64(size);
+	return smb2_open_op_close(xid, tcon, cifs_sb, full_path,
+				  FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof,
+				  SMB2_OP_SET_EOF);
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 75693e983e76..e0daf3a4dcc2 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -415,6 +415,15 @@ smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
 	return SMB2_write(xid, parms, written, iov, nr_segs);
 }
 
+static int
+smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
+		   struct cifsFileInfo *cfile, __u64 size, bool set_alloc)
+{
+	__le64 eof = cpu_to_le64(size);
+	return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+			    cfile->fid.volatile_fid, cfile->pid, &eof);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -446,6 +455,8 @@ struct smb_version_operations smb21_operations = {
 	.query_path_info = smb2_query_path_info,
 	.get_srv_inum = smb2_get_srv_inum,
 	.query_file_info = smb2_query_file_info,
+	.set_path_size = smb2_set_path_size,
+	.set_file_size = smb2_set_file_size,
 	.build_path_to_root = smb2_build_path_to_root,
 	.mkdir = smb2_mkdir,
 	.mkdir_setinfo = smb2_mkdir_setinfo,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a684c4ab42d6..74a8381400b1 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1605,7 +1605,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 
 static int
 send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
-	       u64 persistent_fid, u64 volatile_fid, int info_class,
+	       u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class,
 	       unsigned int num, void **data, unsigned int *size)
 {
 	struct smb2_set_info_req *req;
@@ -1635,6 +1635,8 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 		return rc;
 	}
 
+	req->hdr.ProcessId = cpu_to_le32(pid);
+
 	req->InfoType = SMB2_O_INFO_FILE;
 	req->FileInfoClass = info_class;
 	req->PersistentFileId = persistent_fid;
@@ -1705,7 +1707,8 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 	size[1] = len + 2 /* null */;
 
 	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			   FILE_RENAME_INFORMATION, 2, data, size);
+			   current->tgid, FILE_RENAME_INFORMATION, 2, data,
+			   size);
 	kfree(data);
 	return rc;
 }
@@ -1736,7 +1739,24 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 	size[1] = len + 2 /* null */;
 
 	rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-			   FILE_LINK_INFORMATION, 2, data, size);
+			   current->tgid, FILE_LINK_INFORMATION, 2, data, size);
 	kfree(data);
 	return rc;
 }
+
+int
+SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
+	     u64 volatile_fid, u32 pid, __le64 *eof)
+{
+	struct smb2_file_eof_info info;
+	void *data;
+	unsigned int size;
+
+	info.EndOfFile = *eof;
+
+	data = &info;
+	size = sizeof(struct smb2_file_eof_info);
+
+	return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid,
+			     FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 0f3c4828cd00..d775941f552a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -690,4 +690,8 @@ struct smb2_file_all_info { /* data block encoding of response to level 18 */
 	char   FileName[1];
 } __packed; /* level 18 Query */
 
+struct smb2_file_eof_info { /* encoding of request for level 10 */
+	__le64 EndOfFile; /* new end of file value */
+} __packed; /* level 20 Set */
+
 #endif				/* _SMB2PDU_H */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 99f6945c8643..3d4866279530 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -56,6 +56,9 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 				struct cifs_sb_info *cifs_sb,
 				const char *full_path, FILE_ALL_INFO *data,
 				bool *adjust_tz);
+extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
+			      const char *full_path, __u64 size,
+			      struct cifs_sb_info *cifs_sb, bool set_alloc);
 extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon,
 		      const char *name, struct cifs_sb_info *cifs_sb);
 extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
@@ -118,5 +121,8 @@ extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 			     u64 persistent_fid, u64 volatile_fid,
 			     __le16 *target_file);
+extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
+			u64 persistent_fid, u64 volatile_fid, u32 pid,
+			__le64 *eof);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From 6bdf6dbd662176c0da5c3ac8ed10ac94e7776c85 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:32 -0700
Subject: CIFS: Move set_file_info to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  3 +++
 fs/cifs/inode.c    | 79 +++++-------------------------------------------------
 fs/cifs/smb1ops.c  | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 73 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 803c21218633..dff35830601f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -258,6 +258,9 @@ struct smb_version_operations {
 	/* set size by file handle */
 	int (*set_file_size)(const unsigned int, struct cifs_tcon *,
 			     struct cifsFileInfo *, __u64, bool);
+	/* set attributes */
+	int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *,
+			     const unsigned int);
 	/* build a full path to the root of the mount */
 	char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *,
 				     struct cifs_tcon *);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 85e1b0a405a8..e74e1bceb416 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -886,21 +886,18 @@ int
 cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 		   char *full_path, __u32 dosattr)
 {
-	int rc;
-	int oplock = 0;
-	__u16 netfid;
-	__u32 netpid;
 	bool set_time = false;
-	struct cifsFileInfo *open_file;
-	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-	struct tcon_link *tlink = NULL;
-	struct cifs_tcon *pTcon;
+	struct TCP_Server_Info *server;
 	FILE_BASIC_INFO	info_buf;
 
 	if (attrs == NULL)
 		return -EINVAL;
 
+	server = cifs_sb_master_tcon(cifs_sb)->ses->server;
+	if (!server->ops->set_file_info)
+		return -ENOSYS;
+
 	if (attrs->ia_valid & ATTR_ATIME) {
 		set_time = true;
 		info_buf.LastAccessTime =
@@ -931,71 +928,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
 	info_buf.CreationTime = 0;	/* don't change */
 	info_buf.Attributes = cpu_to_le32(dosattr);
 
-	/*
-	 * If the file is already open for write, just use that fileid
-	 */
-	open_file = find_writable_file(cifsInode, true);
-	if (open_file) {
-		netfid = open_file->fid.netfid;
-		netpid = open_file->pid;
-		pTcon = tlink_tcon(open_file->tlink);
-		goto set_via_filehandle;
-	}
-
-	tlink = cifs_sb_tlink(cifs_sb);
-	if (IS_ERR(tlink)) {
-		rc = PTR_ERR(tlink);
-		tlink = NULL;
-		goto out;
-	}
-	pTcon = tlink_tcon(tlink);
-
-	/*
-	 * NT4 apparently returns success on this call, but it doesn't
-	 * really work.
-	 */
-	if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
-		rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
-				     &info_buf, cifs_sb->local_nls,
-				     cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc == 0) {
-			cifsInode->cifsAttrs = dosattr;
-			goto out;
-		} else if (rc != -EOPNOTSUPP && rc != -EINVAL)
-			goto out;
-	}
-
-	cFYI(1, "calling SetFileInfo since SetPathInfo for "
-		 "times not supported by this server");
-	rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
-			 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
-			 CREATE_NOT_DIR, &netfid, &oplock,
-			 NULL, cifs_sb->local_nls,
-			 cifs_sb->mnt_cifs_flags &
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
-
-	if (rc != 0) {
-		if (rc == -EIO)
-			rc = -EINVAL;
-		goto out;
-	}
-
-	netpid = current->tgid;
-
-set_via_filehandle:
-	rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
-	if (!rc)
-		cifsInode->cifsAttrs = dosattr;
-
-	if (open_file == NULL)
-		CIFSSMBClose(xid, pTcon, netfid);
-	else
-		cifsFileInfo_put(open_file);
-out:
-	if (tlink != NULL)
-		cifs_put_tlink(tlink);
-	return rc;
+	return server->ops->set_file_info(inode, full_path, &info_buf, xid);
 }
 
 /*
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index b73d2750296d..ed311968437a 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -758,6 +758,84 @@ cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
 	return CIFSSMBWrite2(xid, parms, written, iov, nr_segs);
 }
 
+static int
+smb_set_file_info(struct inode *inode, const char *full_path,
+		  FILE_BASIC_INFO *buf, const unsigned int xid)
+{
+	int oplock = 0;
+	int rc;
+	__u16 netfid;
+	__u32 netpid;
+	struct cifsFileInfo *open_file;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct tcon_link *tlink = NULL;
+	struct cifs_tcon *tcon;
+	FILE_BASIC_INFO info_buf;
+
+	/* if the file is already open for write, just use that fileid */
+	open_file = find_writable_file(cinode, true);
+	if (open_file) {
+		netfid = open_file->fid.netfid;
+		netpid = open_file->pid;
+		tcon = tlink_tcon(open_file->tlink);
+		goto set_via_filehandle;
+	}
+
+	tlink = cifs_sb_tlink(cifs_sb);
+	if (IS_ERR(tlink)) {
+		rc = PTR_ERR(tlink);
+		tlink = NULL;
+		goto out;
+	}
+	tcon = tlink_tcon(tlink);
+
+	/*
+	 * NT4 apparently returns success on this call, but it doesn't really
+	 * work.
+	 */
+	if (!(tcon->ses->flags & CIFS_SES_NT4)) {
+		rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf,
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
+						CIFS_MOUNT_MAP_SPECIAL_CHR);
+		if (rc == 0) {
+			cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
+			goto out;
+		} else if (rc != -EOPNOTSUPP && rc != -EINVAL)
+			goto out;
+	}
+
+	cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported "
+		"by this server");
+	rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
+			 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
+			 &netfid, &oplock, NULL, cifs_sb->local_nls,
+			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+	if (rc != 0) {
+		if (rc == -EIO)
+			rc = -EINVAL;
+		goto out;
+	}
+
+	netpid = current->tgid;
+
+set_via_filehandle:
+	rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid);
+	if (!rc)
+		cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
+
+	if (open_file == NULL)
+		CIFSSMBClose(xid, tcon, netfid);
+	else
+		cifsFileInfo_put(open_file);
+out:
+	if (tlink != NULL)
+		cifs_put_tlink(tlink);
+	return rc;
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -795,6 +873,7 @@ struct smb_version_operations smb1_operations = {
 	.get_srv_inum = cifs_get_srv_inum,
 	.set_path_size = CIFSSMBSetEOF,
 	.set_file_size = CIFSSMBSetFileSize,
+	.set_file_info = smb_set_file_info,
 	.build_path_to_root = cifs_build_path_to_root,
 	.echo = CIFSSMBEcho,
 	.mkdir = CIFSSMBMkDir,
-- 
cgit 


From 1feeaac753e0a9b3864740556b7840643642abdb Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:32 -0700
Subject: CIFS: Add set_file_info support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2inode.c | 22 ++++++++++++++++++++++
 fs/cifs/smb2ops.c   |  1 +
 fs/cifs/smb2pdu.c   | 11 +++++++++++
 fs/cifs/smb2proto.h |  5 +++++
 4 files changed, 39 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 290583092293..1bd6b0f0c10e 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -86,6 +86,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 		tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid,
 				     current->tgid, (__le64 *)data);
 		break;
+	case SMB2_OP_SET_INFO:
+		tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid,
+				      (FILE_BASIC_INFO *)data);
+		break;
 	default:
 		cERROR(1, "Invalid command");
 		break;
@@ -232,3 +236,21 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
 				  FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof,
 				  SMB2_OP_SET_EOF);
 }
+
+int
+smb2_set_file_info(struct inode *inode, const char *full_path,
+		   FILE_BASIC_INFO *buf, const unsigned int xid)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct tcon_link *tlink;
+	int rc;
+
+	tlink = cifs_sb_tlink(cifs_sb);
+	if (IS_ERR(tlink))
+		return PTR_ERR(tlink);
+	rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
+				FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf,
+				SMB2_OP_SET_INFO);
+	cifs_put_tlink(tlink);
+	return rc;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e0daf3a4dcc2..52bc93125726 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -457,6 +457,7 @@ struct smb_version_operations smb21_operations = {
 	.query_file_info = smb2_query_file_info,
 	.set_path_size = smb2_set_path_size,
 	.set_file_size = smb2_set_file_size,
+	.set_file_info = smb2_set_file_info,
 	.build_path_to_root = smb2_build_path_to_root,
 	.mkdir = smb2_mkdir,
 	.mkdir_setinfo = smb2_mkdir_setinfo,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 74a8381400b1..a1314f99b4cc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1760,3 +1760,14 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid,
 			     FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
 }
+
+int
+SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+	      u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf)
+{
+	unsigned int size;
+	size = sizeof(FILE_BASIC_INFO);
+	return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+			     current->tgid, FILE_BASIC_INFORMATION, 1,
+			     (void **)&buf, &size);
+}
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3d4866279530..277472433158 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -59,6 +59,8 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
 			      const char *full_path, __u64 size,
 			      struct cifs_sb_info *cifs_sb, bool set_alloc);
+extern int smb2_set_file_info(struct inode *inode, const char *full_path,
+			      FILE_BASIC_INFO *buf, const unsigned int xid);
 extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon,
 		      const char *name, struct cifs_sb_info *cifs_sb);
 extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
@@ -124,5 +126,8 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
 			u64 persistent_fid, u64 volatile_fid, u32 pid,
 			__le64 *eof);
+extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+			 u64 persistent_fid, u64 volatile_fid,
+			 FILE_BASIC_INFO *buf);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From 92fc65a74a2be1388d774f7dbf82c9adea1745cf Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:32 -0700
Subject: CIFS: Move readdir code to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  15 +++++
 fs/cifs/cifsproto.h |   2 +-
 fs/cifs/file.c      |  60 ++++++++++---------
 fs/cifs/netmisc.c   |   3 +-
 fs/cifs/readdir.c   | 165 ++++++++++++++++++++++++++++------------------------
 fs/cifs/smb1ops.c   |  31 ++++++++++
 6 files changed, 173 insertions(+), 103 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index dff35830601f..9adf211ca95a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -177,6 +177,7 @@ struct cifs_fid;
 struct cifs_readdata;
 struct cifs_writedata;
 struct cifs_io_parms;
+struct cifs_search_info;
 
 struct smb_version_operations {
 	int (*send_cancel)(struct TCP_Server_Info *, void *,
@@ -313,6 +314,20 @@ struct smb_version_operations {
 	int (*sync_write)(const unsigned int, struct cifsFileInfo *,
 			  struct cifs_io_parms *, unsigned int *, struct kvec *,
 			  unsigned long);
+	/* open dir, start readdir */
+	int (*query_dir_first)(const unsigned int, struct cifs_tcon *,
+			       const char *, struct cifs_sb_info *,
+			       struct cifs_fid *, __u16,
+			       struct cifs_search_info *);
+	/* continue readdir */
+	int (*query_dir_next)(const unsigned int, struct cifs_tcon *,
+			      struct cifs_fid *,
+			      __u16, struct cifs_search_info *srch_inf);
+	/* close dir */
+	int (*close_dir)(const unsigned int, struct cifs_tcon *,
+			 struct cifs_fid *);
+	/* calculate a size of SMB message */
+	unsigned int (*calc_smb_size)(void *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 3d99fe9afccc..c7ad9a8cf82a 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -100,7 +100,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 			    unsigned int bytes_written);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-extern unsigned int smbCalcSize(struct smb_hdr *ptr);
+extern unsigned int smbCalcSize(void *buf);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			struct TCP_Server_Info *server);
 extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 39fff77e38d4..fb6b4413255b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -618,39 +618,47 @@ int cifs_closedir(struct inode *inode, struct file *file)
 	int rc = 0;
 	unsigned int xid;
 	struct cifsFileInfo *cfile = file->private_data;
-	char *tmp;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	char *buf;
 
 	cFYI(1, "Closedir inode = 0x%p", inode);
 
+	if (cfile == NULL)
+		return rc;
+
 	xid = get_xid();
+	tcon = tlink_tcon(cfile->tlink);
+	server = tcon->ses->server;
 
-	if (cfile) {
-		struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	cFYI(1, "Freeing private data in close dir");
+	spin_lock(&cifs_file_list_lock);
+	if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+		cfile->invalidHandle = true;
+		spin_unlock(&cifs_file_list_lock);
+		if (server->ops->close_dir)
+			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
+		else
+			rc = -ENOSYS;
+		cFYI(1, "Closing uncompleted readdir with rc %d", rc);
+		/* not much we can do if it fails anyway, ignore rc */
+		rc = 0;
+	} else
+		spin_unlock(&cifs_file_list_lock);
 
-		cFYI(1, "Freeing private data in close dir");
-		spin_lock(&cifs_file_list_lock);
-		if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
-			cfile->invalidHandle = true;
-			spin_unlock(&cifs_file_list_lock);
-			rc = CIFSFindClose(xid, tcon, cfile->fid.netfid);
-			cFYI(1, "Closing uncompleted readdir with rc %d", rc);
-			/* not much we can do if it fails anyway, ignore rc */
-			rc = 0;
-		} else
-			spin_unlock(&cifs_file_list_lock);
-		tmp = cfile->srch_inf.ntwrk_buf_start;
-		if (tmp) {
-			cFYI(1, "closedir free smb buf in srch struct");
-			cfile->srch_inf.ntwrk_buf_start = NULL;
-			if (cfile->srch_inf.smallBuf)
-				cifs_small_buf_release(tmp);
-			else
-				cifs_buf_release(tmp);
-		}
-		cifs_put_tlink(cfile->tlink);
-		kfree(file->private_data);
-		file->private_data = NULL;
+	buf = cfile->srch_inf.ntwrk_buf_start;
+	if (buf) {
+		cFYI(1, "closedir free smb buf in srch struct");
+		cfile->srch_inf.ntwrk_buf_start = NULL;
+		if (cfile->srch_inf.smallBuf)
+			cifs_small_buf_release(buf);
+		else
+			cifs_buf_release(buf);
 	}
+
+	cifs_put_tlink(cfile->tlink);
+	kfree(file->private_data);
+	file->private_data = NULL;
 	/* BB can we lock the filestruct while this is going on? */
 	free_xid(xid);
 	return rc;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 581c225f7f50..e7bab3be5cf9 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -913,8 +913,9 @@ map_smb_to_linux_error(char *buf, bool logErr)
  * portion, the number of word parameters and the data portion of the message
  */
 unsigned int
-smbCalcSize(struct smb_hdr *ptr)
+smbCalcSize(void *buf)
 {
+	struct smb_hdr *ptr = (struct smb_hdr *)buf;
 	return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
 		2 /* size of the bcc field */ + get_bcc(ptr));
 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 9e76e3b3289b..b0f4a428398d 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -220,7 +220,8 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
 }
  */
 
-static int initiate_cifs_search(const unsigned int xid, struct file *file)
+static int
+initiate_cifs_search(const unsigned int xid, struct file *file)
 {
 	__u16 search_flags;
 	int rc = 0;
@@ -229,6 +230,7 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file)
 	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	struct tcon_link *tlink = NULL;
 	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
 
 	if (file->private_data == NULL) {
 		tlink = cifs_sb_tlink(cifs_sb);
@@ -248,6 +250,13 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file)
 		tcon = tlink_tcon(cifsFile->tlink);
 	}
 
+	server = tcon->ses->server;
+
+	if (!server->ops->query_dir_first) {
+		rc = -ENOSYS;
+		goto error_exit;
+	}
+
 	cifsFile->invalidHandle = true;
 	cifsFile->srch_inf.endOfSearch = false;
 
@@ -278,10 +287,10 @@ ffirst_retry:
 	if (backup_cred(cifs_sb))
 		search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
 
-	rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls,
-		&cifsFile->fid.netfid, search_flags, &cifsFile->srch_inf,
-		cifs_sb->mnt_cifs_flags &
-			CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
+	rc = server->ops->query_dir_first(xid, tcon, full_path, cifs_sb,
+					  &cifsFile->fid, search_flags,
+					  &cifsFile->srch_inf);
+
 	if (rc == 0)
 		cifsFile->invalidHandle = false;
 	/* BB add following call to handle readdir on new NTFS symlink errors
@@ -501,62 +510,67 @@ static int cifs_save_resume_key(const char *current_entry,
 	return rc;
 }
 
-/* find the corresponding entry in the search */
-/* Note that the SMB server returns search entries for . and .. which
-   complicates logic here if we choose to parse for them and we do not
-   assume that they are located in the findfirst return buffer.*/
-/* We start counting in the buffer with entry 2 and increment for every
-   entry (do not increment for . or .. entry) */
-static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon,
-	struct file *file, char **ppCurrentEntry, int *num_to_ret)
+/*
+ * Find the corresponding entry in the search. Note that the SMB server returns
+ * search entries for . and .. which complicates logic here if we choose to
+ * parse for them and we do not assume that they are located in the findfirst
+ * return buffer. We start counting in the buffer with entry 2 and increment for
+ * every entry (do not increment for . or .. entry).
+ */
+static int
+find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon,
+		struct file *file, char **current_entry, int *num_to_ret)
 {
 	__u16 search_flags;
 	int rc = 0;
 	int pos_in_buf = 0;
 	loff_t first_entry_in_buffer;
 	loff_t index_to_find = file->f_pos;
-	struct cifsFileInfo *cifsFile = file->private_data;
+	struct cifsFileInfo *cfile = file->private_data;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct TCP_Server_Info *server = tcon->ses->server;
 	/* check if index in the buffer */
 
-	if ((cifsFile == NULL) || (ppCurrentEntry == NULL) ||
-	   (num_to_ret == NULL))
+	if (!server->ops->query_dir_first || !server->ops->query_dir_next)
+		return -ENOSYS;
+
+	if ((cfile == NULL) || (current_entry == NULL) || (num_to_ret == NULL))
 		return -ENOENT;
 
-	*ppCurrentEntry = NULL;
-	first_entry_in_buffer =
-		cifsFile->srch_inf.index_of_last_entry -
-			cifsFile->srch_inf.entries_in_buffer;
+	*current_entry = NULL;
+	first_entry_in_buffer = cfile->srch_inf.index_of_last_entry -
+					cfile->srch_inf.entries_in_buffer;
 
-	/* if first entry in buf is zero then is first buffer
-	in search response data which means it is likely . and ..
-	will be in this buffer, although some servers do not return
-	. and .. for the root of a drive and for those we need
-	to start two entries earlier */
+	/*
+	 * If first entry in buf is zero then is first buffer
+	 * in search response data which means it is likely . and ..
+	 * will be in this buffer, although some servers do not return
+	 * . and .. for the root of a drive and for those we need
+	 * to start two entries earlier.
+	 */
 
 	dump_cifs_file_struct(file, "In fce ");
-	if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) &&
-	     is_dir_changed(file)) ||
-	   (index_to_find < first_entry_in_buffer)) {
+	if (((index_to_find < cfile->srch_inf.index_of_last_entry) &&
+	     is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
 		/* close and restart search */
 		cFYI(1, "search backing up - close and restart search");
 		spin_lock(&cifs_file_list_lock);
-		if (!cifsFile->srch_inf.endOfSearch &&
-		    !cifsFile->invalidHandle) {
-			cifsFile->invalidHandle = true;
+		if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+			cfile->invalidHandle = true;
 			spin_unlock(&cifs_file_list_lock);
-			CIFSFindClose(xid, pTcon, cifsFile->fid.netfid);
+			if (server->ops->close)
+				server->ops->close(xid, tcon, &cfile->fid);
 		} else
 			spin_unlock(&cifs_file_list_lock);
-		if (cifsFile->srch_inf.ntwrk_buf_start) {
+		if (cfile->srch_inf.ntwrk_buf_start) {
 			cFYI(1, "freeing SMB ff cache buf on search rewind");
-			if (cifsFile->srch_inf.smallBuf)
-				cifs_small_buf_release(cifsFile->srch_inf.
+			if (cfile->srch_inf.smallBuf)
+				cifs_small_buf_release(cfile->srch_inf.
 						ntwrk_buf_start);
 			else
-				cifs_buf_release(cifsFile->srch_inf.
+				cifs_buf_release(cfile->srch_inf.
 						ntwrk_buf_start);
-			cifsFile->srch_inf.ntwrk_buf_start = NULL;
+			cfile->srch_inf.ntwrk_buf_start = NULL;
 		}
 		rc = initiate_cifs_search(xid, file);
 		if (rc) {
@@ -565,65 +579,64 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon,
 			return rc;
 		}
 		/* FindFirst/Next set last_entry to NULL on malformed reply */
-		if (cifsFile->srch_inf.last_entry)
-			cifs_save_resume_key(cifsFile->srch_inf.last_entry,
-						cifsFile);
+		if (cfile->srch_inf.last_entry)
+			cifs_save_resume_key(cfile->srch_inf.last_entry, cfile);
 	}
 
 	search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
 	if (backup_cred(cifs_sb))
 		search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
 
-	while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
-	      (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
+	while ((index_to_find >= cfile->srch_inf.index_of_last_entry) &&
+	       (rc == 0) && !cfile->srch_inf.endOfSearch) {
 		cFYI(1, "calling findnext2");
-		rc = CIFSFindNext(xid, pTcon, cifsFile->fid.netfid,
-				  search_flags, &cifsFile->srch_inf);
+		rc = server->ops->query_dir_next(xid, tcon, &cfile->fid,
+						 search_flags,
+						 &cfile->srch_inf);
 		/* FindFirst/Next set last_entry to NULL on malformed reply */
-		if (cifsFile->srch_inf.last_entry)
-			cifs_save_resume_key(cifsFile->srch_inf.last_entry,
-						cifsFile);
+		if (cfile->srch_inf.last_entry)
+			cifs_save_resume_key(cfile->srch_inf.last_entry, cfile);
 		if (rc)
 			return -ENOENT;
 	}
-	if (index_to_find < cifsFile->srch_inf.index_of_last_entry) {
+	if (index_to_find < cfile->srch_inf.index_of_last_entry) {
 		/* we found the buffer that contains the entry */
 		/* scan and find it */
 		int i;
-		char *current_entry;
-		char *end_of_smb = cifsFile->srch_inf.ntwrk_buf_start +
-			smbCalcSize((struct smb_hdr *)
-				cifsFile->srch_inf.ntwrk_buf_start);
-
-		current_entry = cifsFile->srch_inf.srch_entries_start;
-		first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
-					- cifsFile->srch_inf.entries_in_buffer;
+		char *cur_ent;
+		char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
+			server->ops->calc_smb_size(
+					cfile->srch_inf.ntwrk_buf_start);
+
+		cur_ent = cfile->srch_inf.srch_entries_start;
+		first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
+					- cfile->srch_inf.entries_in_buffer;
 		pos_in_buf = index_to_find - first_entry_in_buffer;
 		cFYI(1, "found entry - pos_in_buf %d", pos_in_buf);
 
-		for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) {
+		for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) {
 			/* go entry by entry figuring out which is first */
-			current_entry = nxt_dir_entry(current_entry, end_of_smb,
-						cifsFile->srch_inf.info_level);
+			cur_ent = nxt_dir_entry(cur_ent, end_of_smb,
+						cfile->srch_inf.info_level);
 		}
-		if ((current_entry == NULL) && (i < pos_in_buf)) {
+		if ((cur_ent == NULL) && (i < pos_in_buf)) {
 			/* BB fixme - check if we should flag this error */
 			cERROR(1, "reached end of buf searching for pos in buf"
-			  " %d index to find %lld rc %d",
-			  pos_in_buf, index_to_find, rc);
+				  " %d index to find %lld rc %d", pos_in_buf,
+				  index_to_find, rc);
 		}
 		rc = 0;
-		*ppCurrentEntry = current_entry;
+		*current_entry = cur_ent;
 	} else {
 		cFYI(1, "index not in buffer - could not findnext into it");
 		return 0;
 	}
 
-	if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) {
+	if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) {
 		cFYI(1, "can not return entries pos_in_buf beyond last");
 		*num_to_ret = 0;
 	} else
-		*num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf;
+		*num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf;
 
 	return rc;
 }
@@ -723,7 +736,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 	int rc = 0;
 	unsigned int xid;
 	int i;
-	struct cifs_tcon *pTcon;
+	struct cifs_tcon *tcon;
 	struct cifsFileInfo *cifsFile = NULL;
 	char *current_entry;
 	int num_to_fill = 0;
@@ -781,12 +794,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 			}
 		} /* else {
 			cifsFile->invalidHandle = true;
-			CIFSFindClose(xid, pTcon, cifsFile->fid.netfid);
+			tcon->ses->server->close(xid, tcon, &cifsFile->fid);
 		} */
 
-		pTcon = tlink_tcon(cifsFile->tlink);
-		rc = find_cifs_entry(xid, pTcon, file,
-				&current_entry, &num_to_fill);
+		tcon = tlink_tcon(cifsFile->tlink);
+		rc = find_cifs_entry(xid, tcon, file, &current_entry,
+				     &num_to_fill);
 		if (rc) {
 			cFYI(1, "fce error %d", rc);
 			goto rddir2_exit;
@@ -798,7 +811,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 		}
 		cFYI(1, "loop through %d times filling dir for net buf %p",
 			num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
-		max_len = smbCalcSize((struct smb_hdr *)
+		max_len = tcon->ses->server->ops->calc_smb_size(
 				cifsFile->srch_inf.ntwrk_buf_start);
 		end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
 
@@ -815,10 +828,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 					  num_to_fill, i);
 				break;
 			}
-			/* if buggy server returns . and .. late do
-			we want to check for that here? */
-			rc = cifs_filldir(current_entry, file,
-					filldir, direntry, tmp_buf, max_len);
+			/*
+			 * if buggy server returns . and .. late do we want to
+			 * check for that here?
+			 */
+			rc = cifs_filldir(current_entry, file, filldir,
+					  direntry, tmp_buf, max_len);
 			if (rc == -EOVERFLOW) {
 				rc = 0;
 				break;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index ed311968437a..068d609bd02a 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -836,6 +836,33 @@ out:
 	return rc;
 }
 
+static int
+cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
+		     const char *path, struct cifs_sb_info *cifs_sb,
+		     struct cifs_fid *fid, __u16 search_flags,
+		     struct cifs_search_info *srch_inf)
+{
+	return CIFSFindFirst(xid, tcon, path, cifs_sb->local_nls,
+			     &fid->netfid, search_flags, srch_inf,
+			     cifs_sb->mnt_cifs_flags &
+			     CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
+}
+
+static int
+cifs_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon,
+		    struct cifs_fid *fid, __u16 search_flags,
+		    struct cifs_search_info *srch_inf)
+{
+	return CIFSFindNext(xid, tcon, fid->netfid, search_flags, srch_inf);
+}
+
+static int
+cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
+	       struct cifs_fid *fid)
+{
+	return CIFSFindClose(xid, tcon, fid->netfid);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -891,6 +918,10 @@ struct smb_version_operations smb1_operations = {
 	.async_writev = cifs_async_writev,
 	.sync_read = cifs_sync_read,
 	.sync_write = cifs_sync_write,
+	.query_dir_first = cifs_query_dir_first,
+	.query_dir_next = cifs_query_dir_next,
+	.close_dir = cifs_close_dir,
+	.calc_smb_size = smbCalcSize,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From d324f08d6a87149597817f4496ef0f7ac185e8da Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:33 -0700
Subject: CIFS: Add readdir support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2misc.c  |   8 ++-
 fs/cifs/smb2ops.c   |  57 ++++++++++++++++++
 fs/cifs/smb2pdu.c   | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   |  28 +++++++++
 fs/cifs/smb2proto.h |   5 +-
 5 files changed, 264 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 9275883c8530..78225f517a60 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -248,6 +248,11 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
 		*len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength);
 		break;
 	case SMB2_QUERY_DIRECTORY:
+		*off = le16_to_cpu(
+		  ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset);
+		*len = le32_to_cpu(
+		  ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength);
+		break;
 	case SMB2_IOCTL:
 	case SMB2_CHANGE_NOTIFY:
 	default:
@@ -290,8 +295,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
  * portion, the number of word parameters and the data portion of the message.
  */
 unsigned int
-smb2_calc_size(struct smb2_hdr *hdr)
+smb2_calc_size(void *buf)
 {
+	struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
 	struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
 	int offset; /* the offset from the beginning of SMB to data area */
 	int data_length; /* the length of the variable length data area */
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 52bc93125726..0fa086c1b5dc 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -424,6 +424,59 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
 			    cfile->fid.volatile_fid, cfile->pid, &eof);
 }
 
+static int
+smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
+		     const char *path, struct cifs_sb_info *cifs_sb,
+		     struct cifs_fid *fid, __u16 search_flags,
+		     struct cifs_search_info *srch_inf)
+{
+	__le16 *utf16_path;
+	int rc;
+	__u64 persistent_fid, volatile_fid;
+
+	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
+	if (!utf16_path)
+		return -ENOMEM;
+
+	rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
+		       FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0,
+		       NULL);
+	kfree(utf16_path);
+	if (rc) {
+		cERROR(1, "open dir failed");
+		return rc;
+	}
+
+	srch_inf->entries_in_buffer = 0;
+	srch_inf->index_of_last_entry = 0;
+	fid->persistent_fid = persistent_fid;
+	fid->volatile_fid = volatile_fid;
+
+	rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0,
+				  srch_inf);
+	if (rc) {
+		cERROR(1, "query directory failed");
+		SMB2_close(xid, tcon, persistent_fid, volatile_fid);
+	}
+	return rc;
+}
+
+static int
+smb2_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon,
+		    struct cifs_fid *fid, __u16 search_flags,
+		    struct cifs_search_info *srch_inf)
+{
+	return SMB2_query_directory(xid, tcon, fid->persistent_fid,
+				    fid->volatile_fid, 0, srch_inf);
+}
+
+static int
+smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
+	       struct cifs_fid *fid)
+{
+	return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -473,6 +526,10 @@ struct smb_version_operations smb21_operations = {
 	.async_writev = smb2_async_writev,
 	.sync_read = smb2_sync_read,
 	.sync_write = smb2_sync_write,
+	.query_dir_first = smb2_query_dir_first,
+	.query_dir_next = smb2_query_dir_next,
+	.close_dir = smb2_close_dir,
+	.calc_smb_size = smb2_calc_size,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a1314f99b4cc..21b3a652e192 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -45,6 +45,7 @@
 #include "ntlmssp.h"
 #include "smb2status.h"
 #include "smb2glob.h"
+#include "cifspdu.h"
 
 /*
  *  The following table defines the expected "StructureSize" of SMB2 requests
@@ -1603,6 +1604,173 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 	return rc;
 }
 
+static unsigned int
+num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size)
+{
+	int len;
+	unsigned int entrycount = 0;
+	unsigned int next_offset = 0;
+	FILE_DIRECTORY_INFO *entryptr;
+
+	if (bufstart == NULL)
+		return 0;
+
+	entryptr = (FILE_DIRECTORY_INFO *)bufstart;
+
+	while (1) {
+		entryptr = (FILE_DIRECTORY_INFO *)
+					((char *)entryptr + next_offset);
+
+		if ((char *)entryptr + size > end_of_buf) {
+			cERROR(1, "malformed search entry would overflow");
+			break;
+		}
+
+		len = le32_to_cpu(entryptr->FileNameLength);
+		if ((char *)entryptr + len + size > end_of_buf) {
+			cERROR(1, "directory entry name would overflow frame "
+				  "end of buf %p", end_of_buf);
+			break;
+		}
+
+		*lastentry = (char *)entryptr;
+		entrycount++;
+
+		next_offset = le32_to_cpu(entryptr->NextEntryOffset);
+		if (!next_offset)
+			break;
+	}
+
+	return entrycount;
+}
+
+/*
+ * Readdir/FindFirst
+ */
+int
+SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
+		     u64 persistent_fid, u64 volatile_fid, int index,
+		     struct cifs_search_info *srch_inf)
+{
+	struct smb2_query_directory_req *req;
+	struct smb2_query_directory_rsp *rsp = NULL;
+	struct kvec iov[2];
+	int rc = 0;
+	int len;
+	int resp_buftype;
+	unsigned char *bufptr;
+	struct TCP_Server_Info *server;
+	struct cifs_ses *ses = tcon->ses;
+	__le16 asteriks = cpu_to_le16('*');
+	char *end_of_smb;
+	unsigned int output_size = CIFSMaxBufSize;
+	size_t info_buf_size;
+
+	if (ses && (ses->server))
+		server = ses->server;
+	else
+		return -EIO;
+
+	rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req);
+	if (rc)
+		return rc;
+
+	switch (srch_inf->info_level) {
+	case SMB_FIND_FILE_DIRECTORY_INFO:
+		req->FileInformationClass = FILE_DIRECTORY_INFORMATION;
+		info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1;
+		break;
+	case SMB_FIND_FILE_ID_FULL_DIR_INFO:
+		req->FileInformationClass = FILEID_FULL_DIRECTORY_INFORMATION;
+		info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1;
+		break;
+	default:
+		cERROR(1, "info level %u isn't supported",
+		       srch_inf->info_level);
+		rc = -EINVAL;
+		goto qdir_exit;
+	}
+
+	req->FileIndex = cpu_to_le32(index);
+	req->PersistentFileId = persistent_fid;
+	req->VolatileFileId = volatile_fid;
+
+	len = 0x2;
+	bufptr = req->Buffer;
+	memcpy(bufptr, &asteriks, len);
+
+	req->FileNameOffset =
+		cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4);
+	req->FileNameLength = cpu_to_le16(len);
+	/*
+	 * BB could be 30 bytes or so longer if we used SMB2 specific
+	 * buffer lengths, but this is safe and close enough.
+	 */
+	output_size = min_t(unsigned int, output_size, server->maxBuf);
+	output_size = min_t(unsigned int, output_size, 2 << 15);
+	req->OutputBufferLength = cpu_to_le32(output_size);
+
+	iov[0].iov_base = (char *)req;
+	/* 4 for RFC1001 length and 1 for Buffer */
+	iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+
+	iov[1].iov_base = (char *)(req->Buffer);
+	iov[1].iov_len = len;
+
+	inc_rfc1001_len(req, len - 1 /* Buffer */);
+
+	rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0);
+	if (rc) {
+		cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
+		goto qdir_exit;
+	}
+	rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
+
+	rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset),
+			  le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr,
+			  info_buf_size);
+	if (rc)
+		goto qdir_exit;
+
+	srch_inf->unicode = true;
+
+	if (srch_inf->ntwrk_buf_start) {
+		if (srch_inf->smallBuf)
+			cifs_small_buf_release(srch_inf->ntwrk_buf_start);
+		else
+			cifs_buf_release(srch_inf->ntwrk_buf_start);
+	}
+	srch_inf->ntwrk_buf_start = (char *)rsp;
+	srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ +
+		(char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset);
+	/* 4 for rfc1002 length field */
+	end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr;
+	srch_inf->entries_in_buffer =
+			num_entries(srch_inf->srch_entries_start, end_of_smb,
+				    &srch_inf->last_entry, info_buf_size);
+	srch_inf->index_of_last_entry += srch_inf->entries_in_buffer;
+	cFYI(1, "num entries %d last_index %lld srch start %p srch end %p",
+		srch_inf->entries_in_buffer, srch_inf->index_of_last_entry,
+		srch_inf->srch_entries_start, srch_inf->last_entry);
+	if (resp_buftype == CIFS_LARGE_BUFFER)
+		srch_inf->smallBuf = false;
+	else if (resp_buftype == CIFS_SMALL_BUFFER)
+		srch_inf->smallBuf = true;
+	else
+		cERROR(1, "illegal search buffer type");
+
+	if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
+		srch_inf->endOfSearch = 1;
+	else
+		srch_inf->endOfSearch = 0;
+
+	return rc;
+
+qdir_exit:
+	free_rsp_buf(resp_buftype, rsp);
+	return rc;
+}
+
 static int
 send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 	       u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index d775941f552a..e6ddd1f79706 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -538,6 +538,34 @@ struct smb2_echo_rsp {
 	__u16  Reserved;
 } __packed;
 
+/* search (query_directory) Flags field */
+#define SMB2_RESTART_SCANS		0x01
+#define SMB2_RETURN_SINGLE_ENTRY	0x02
+#define SMB2_INDEX_SPECIFIED		0x04
+#define SMB2_REOPEN			0x10
+
+struct smb2_query_directory_req {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 33 */
+	__u8   FileInformationClass;
+	__u8   Flags;
+	__le32 FileIndex;
+	__u64  PersistentFileId; /* opaque endianness */
+	__u64  VolatileFileId; /* opaque endianness */
+	__le16 FileNameOffset;
+	__le16 FileNameLength;
+	__le32 OutputBufferLength;
+	__u8   Buffer[1];
+} __packed;
+
+struct smb2_query_directory_rsp {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 9 */
+	__le16 OutputBufferOffset;
+	__le32 OutputBufferLength;
+	__u8   Buffer[1];
+} __packed;
+
 /* Possible InfoType values */
 #define SMB2_O_INFO_FILE	0x01
 #define SMB2_O_INFO_FILESYSTEM	0x02
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 277472433158..0d29db222a5a 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -34,7 +34,7 @@ struct statfs;
  */
 extern int map_smb2_to_linux_error(char *buf, bool log_err);
 extern int smb2_check_message(char *buf, unsigned int length);
-extern unsigned int smb2_calc_size(struct smb2_hdr *hdr);
+extern unsigned int smb2_calc_size(void *buf);
 extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
 					  struct cifs_sb_info *cifs_sb);
@@ -117,6 +117,9 @@ extern int smb2_async_writev(struct cifs_writedata *wdata);
 extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 		      unsigned int *nbytes, struct kvec *iov, int n_vec);
 extern int SMB2_echo(struct TCP_Server_Info *server);
+extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
+				u64 persistent_fid, u64 volatile_fid, int index,
+				struct cifs_search_info *srch_inf);
 extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
 		       u64 persistent_fid, u64 volatile_fid,
 		       __le16 *target_file);
-- 
cgit 


From 2e44b2887882134abf353b28867b82645e9f0856 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:33 -0700
Subject: CIFS: Process oplocks for SMB2

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  2 ++
 fs/cifs/connect.c   |  4 ++++
 fs/cifs/smb2file.c  | 24 ++++++++++++++++++++++--
 fs/cifs/smb2inode.c |  3 ++-
 fs/cifs/smb2ops.c   | 34 ++++++++++++++++++++++++++++++----
 fs/cifs/smb2pdu.c   | 10 ++++++----
 fs/cifs/smb2proto.h |  3 ++-
 7 files changed, 68 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9adf211ca95a..3eb59ed6904a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -328,6 +328,8 @@ struct smb_version_operations {
 			 struct cifs_fid *);
 	/* calculate a size of SMB message */
 	unsigned int (*calc_smb_size)(void *);
+	/* check for STATUS_PENDING and process it in a positive case */
+	bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c31b30b572e0..549409b1c776 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -819,6 +819,10 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		cifs_dump_mem("Bad SMB: ", buf,
 			min_t(unsigned int, server->total_read, 48));
 
+	if (server->ops->is_status_pending &&
+	    server->ops->is_status_pending(buf, server, length))
+		return -1;
+
 	if (!mid)
 		return length;
 
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index a7618dfb7712..5ff25e025215 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -34,6 +34,26 @@
 #include "fscache.h"
 #include "smb2proto.h"
 
+void
+smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
+{
+	oplock &= 0xFF;
+	if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+		cinode->clientCanCacheAll = true;
+		cinode->clientCanCacheRead = true;
+		cFYI(1, "Exclusive Oplock granted on inode %p",
+		     &cinode->vfs_inode);
+	} else if (oplock == SMB2_OPLOCK_LEVEL_II) {
+		cinode->clientCanCacheAll = false;
+		cinode->clientCanCacheRead = true;
+		cFYI(1, "Level II Oplock granted on inode %p",
+		    &cinode->vfs_inode);
+	} else {
+		cinode->clientCanCacheAll = false;
+		cinode->clientCanCacheRead = false;
+	}
+}
+
 int
 smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 	       int disposition, int desired_access, int create_options,
@@ -58,10 +78,11 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 	}
 
 	desired_access |= FILE_READ_ATTRIBUTES;
+	*oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
 
 	rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid,
 		       &fid->volatile_fid, desired_access, disposition,
-		       0, 0, smb2_data);
+		       0, 0, (__u8 *)oplock, smb2_data);
 	if (rc)
 		goto out;
 
@@ -79,7 +100,6 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 	}
 
 out:
-	*oplock = 0;
 	kfree(smb2_data);
 	kfree(smb2_path);
 	return rc;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1bd6b0f0c10e..706482452df4 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -47,6 +47,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc, tmprc = 0;
 	u64 persistent_fid, volatile_fid;
 	__le16 *utf16_path;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 
 	utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
 	if (!utf16_path)
@@ -54,7 +55,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
 		       desired_access, create_disposition, file_attributes,
-		       create_options, NULL);
+		       create_options, &oplock, NULL);
 	if (rc) {
 		kfree(utf16_path);
 		return rc;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0fa086c1b5dc..2d88c90ded0c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -23,6 +23,7 @@
 #include "smb2proto.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
+#include "smb2status.h"
 
 static int
 change_conf(struct TCP_Server_Info *server)
@@ -207,13 +208,14 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc;
 	__u64 persistent_fid, volatile_fid;
 	__le16 *utf16_path;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 
 	utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
 	if (!utf16_path)
 		return -ENOMEM;
 
 	rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
-		       FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, NULL);
+		       FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL);
 	if (rc) {
 		kfree(utf16_path);
 		return rc;
@@ -358,10 +360,10 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 static void
 smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 {
-	/* struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); */
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 	cfile->fid.persistent_fid = fid->persistent_fid;
 	cfile->fid.volatile_fid = fid->volatile_fid;
-	/* cifs_set_oplock_level(cinode, oplock); */
+	smb2_set_oplock_level(cinode, oplock);
 	/* cinode->can_cache_brlcks = cinode->clientCanCacheAll; */
 }
 
@@ -432,6 +434,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 {
 	__le16 *utf16_path;
 	int rc;
+	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	__u64 persistent_fid, volatile_fid;
 
 	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
@@ -440,7 +443,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
 		       FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0,
-		       NULL);
+		       &oplock, NULL);
 	kfree(utf16_path);
 	if (rc) {
 		cERROR(1, "open dir failed");
@@ -477,6 +480,28 @@ smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
 	return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 }
 
+/*
+* If we negotiate SMB2 protocol and get STATUS_PENDING - update
+* the number of credits and return true. Otherwise - return false.
+*/
+static bool
+smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
+{
+	struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
+
+	if (le32_to_cpu(hdr->Status) != STATUS_PENDING)
+		return false;
+
+	if (!length) {
+		spin_lock(&server->req_lock);
+		server->credits += le16_to_cpu(hdr->CreditRequest);
+		spin_unlock(&server->req_lock);
+		wake_up(&server->request_q);
+	}
+
+	return true;
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -530,6 +555,7 @@ struct smb_version_operations smb21_operations = {
 	.query_dir_next = smb2_query_dir_next,
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
+	.is_status_pending = smb2_is_status_pending,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 21b3a652e192..e97c256c8a42 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -872,7 +872,7 @@ int
 SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	  u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access,
 	  __u32 create_disposition, __u32 file_attributes, __u32 create_options,
-	  struct smb2_file_all_info *buf)
+	  __u8 *oplock, struct smb2_file_all_info *buf)
 {
 	struct smb2_create_req *req;
 	struct smb2_create_rsp *rsp;
@@ -895,9 +895,9 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	if (rc)
 		return rc;
 
-	/* if (server->oplocks)
-		req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH;
-	else */
+	if (server->oplocks)
+		req->RequestedOplockLevel = *oplock;
+	else
 		req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
 	req->ImpersonationLevel = IL_IMPERSONATION;
 	req->DesiredAccess = cpu_to_le32(desired_access);
@@ -954,6 +954,8 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 		buf->NumberOfLinks = cpu_to_le32(1);
 		buf->DeletePending = 0;
 	}
+
+	*oplock = rsp->OplockLevel;
 creat_exit:
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 0d29db222a5a..d18339fcc8a2 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -82,6 +82,7 @@ extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
 			  int desired_access, int create_options,
 			  struct cifs_fid *fid, __u32 *oplock,
 			  FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb);
+extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 
 /*
  * SMB2 Worker functions - most of protocol specific implementation details
@@ -99,7 +100,7 @@ extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon,
 		     __le16 *path, u64 *persistent_fid, u64 *volatile_fid,
 		     __u32 desired_access, __u32 create_disposition,
 		     __u32 file_attributes, __u32 create_options,
-		     struct smb2_file_all_info *buf);
+		     __u8 *oplock, struct smb2_file_all_info *buf);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
-- 
cgit 


From 95a3f2f377735ed13e42d3b8039aa1d73af2c90e Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:33 -0700
Subject: CIFS: Move oplock break to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  4 ++++
 fs/cifs/file.c     |  7 +++----
 fs/cifs/smb1ops.c  | 10 ++++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3eb59ed6904a..a95c56dc7058 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -178,6 +178,7 @@ struct cifs_readdata;
 struct cifs_writedata;
 struct cifs_io_parms;
 struct cifs_search_info;
+struct cifsInodeInfo;
 
 struct smb_version_operations {
 	int (*send_cancel)(struct TCP_Server_Info *, void *,
@@ -330,6 +331,9 @@ struct smb_version_operations {
 	unsigned int (*calc_smb_size)(void *);
 	/* check for STATUS_PENDING and process it in a positive case */
 	bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
+	/* send oplock break response */
+	int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
+			       struct cifsInodeInfo *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fb6b4413255b..2418618118b6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3404,6 +3404,7 @@ void cifs_oplock_break(struct work_struct *work)
 						  oplock_break);
 	struct inode *inode = cfile->dentry->d_inode;
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	int rc = 0;
 
 	if (inode && S_ISREG(inode->i_mode)) {
@@ -3431,10 +3432,8 @@ void cifs_oplock_break(struct work_struct *work)
 	 * disconnected since oplock already released by the server
 	 */
 	if (!cfile->oplock_break_cancelled) {
-		rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid,
-				 current->tgid, 0, 0, 0, 0,
-				 LOCKING_ANDX_OPLOCK_RELEASE, false,
-				 cinode->clientCanCacheRead ? 1 : 0);
+		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
+							     cinode);
 		cFYI(1, "Oplock release rc = %d", rc);
 	}
 }
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 068d609bd02a..f55b2e3476e8 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -863,6 +863,15 @@ cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
 	return CIFSFindClose(xid, tcon, fid->netfid);
 }
 
+static int
+cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
+		     struct cifsInodeInfo *cinode)
+{
+	return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
+			   LOCKING_ANDX_OPLOCK_RELEASE, false,
+			   cinode->clientCanCacheRead ? 1 : 0);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -922,6 +931,7 @@ struct smb_version_operations smb1_operations = {
 	.query_dir_next = cifs_query_dir_next,
 	.close_dir = cifs_close_dir,
 	.calc_smb_size = smbCalcSize,
+	.oplock_response = cifs_oplock_response,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 983c88a497914d60c91f431b05a8449ddda19167 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:33 -0700
Subject: CIFS: Add oplock break support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2misc.c  | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/cifs/smb2ops.c   | 22 ++++++++++++++++
 fs/cifs/smb2pdu.c   | 30 +++++++++++++++++++++
 fs/cifs/smb2pdu.h   | 10 +++++++
 fs/cifs/smb2proto.h |  5 ++++
 5 files changed, 141 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 78225f517a60..01479a3fee8d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -142,8 +142,8 @@ smb2_check_message(char *buf, unsigned int length)
 	}
 
 	if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) {
-		if (hdr->Status == 0 ||
-		    pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2) {
+		if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 ||
+		    pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) {
 			/* error packets have 9 byte structure size */
 			cERROR(1, "Illegal response size %u for command %d",
 				   le16_to_cpu(pdu->StructureSize2), command);
@@ -162,6 +162,9 @@ smb2_check_message(char *buf, unsigned int length)
 	if (4 + len != clc_len) {
 		cFYI(1, "Calculated size %u length %u mismatch mid %llu",
 			clc_len, 4 + len, mid);
+		/* Windows 7 server returns 24 bytes more */
+		if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
+			return 0;
 		/* server can return one byte more */
 		if (clc_len == 4 + len + 1)
 			return 0;
@@ -356,3 +359,72 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 	return to;
 }
+
+bool
+smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
+{
+	struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
+	struct list_head *tmp, *tmp1, *tmp2;
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+	struct cifsInodeInfo *cinode;
+	struct cifsFileInfo *cfile;
+
+	cFYI(1, "Checking for oplock break");
+
+	if (rsp->hdr.Command != SMB2_OPLOCK_BREAK)
+		return false;
+
+	if (le16_to_cpu(rsp->StructureSize) !=
+				smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
+		return false;
+	}
+
+	cFYI(1, "oplock level 0x%d", rsp->OplockLevel);
+
+	/* look up tcon based on tid & uid */
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each(tmp, &server->smb_ses_list) {
+		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+		list_for_each(tmp1, &ses->tcon_list) {
+			tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+
+			cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
+			spin_lock(&cifs_file_list_lock);
+			list_for_each(tmp2, &tcon->openFileList) {
+				cfile = list_entry(tmp2, struct cifsFileInfo,
+						     tlist);
+				if (rsp->PersistentFid !=
+				    cfile->fid.persistent_fid ||
+				    rsp->VolatileFid !=
+				    cfile->fid.volatile_fid)
+					continue;
+
+				cFYI(1, "file id match, oplock break");
+				cinode = CIFS_I(cfile->dentry->d_inode);
+
+				if (!cinode->clientCanCacheAll &&
+				    rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
+					cfile->oplock_break_cancelled = true;
+				else
+					cfile->oplock_break_cancelled = false;
+
+				smb2_set_oplock_level(cinode,
+				  rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0);
+
+				queue_work(cifsiod_wq, &cfile->oplock_break);
+
+				spin_unlock(&cifs_file_list_lock);
+				spin_unlock(&cifs_tcp_ses_lock);
+				return true;
+			}
+			spin_unlock(&cifs_file_list_lock);
+			spin_unlock(&cifs_tcp_ses_lock);
+			cFYI(1, "No matching file for oplock break");
+			return true;
+		}
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+	cFYI(1, "Can not process oplock break for non-existent connection");
+	return false;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 2d88c90ded0c..8c72e0768d12 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -65,6 +65,17 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
 	server->in_flight--;
 	if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
 		rc = change_conf(server);
+	/*
+	 * Sometimes server returns 0 credits on oplock break ack - we need to
+	 * rebalance credits in this case.
+	 */
+	else if (server->in_flight > 0 && server->oplock_credits == 0 &&
+		 server->oplocks) {
+		if (server->credits > 1) {
+			server->credits--;
+			server->oplock_credits++;
+		}
+	}
 	spin_unlock(&server->req_lock);
 	wake_up(&server->request_q);
 	if (rc)
@@ -502,6 +513,15 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
 	return true;
 }
 
+static int
+smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
+		     struct cifsInodeInfo *cinode)
+{
+	return SMB2_oplock_break(0, tcon, fid->persistent_fid,
+				 fid->volatile_fid,
+				 cinode->clientCanCacheRead ? 1 : 0);
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -519,6 +539,7 @@ struct smb_version_operations smb21_operations = {
 	.dump_detail = smb2_dump_detail,
 	.clear_stats = smb2_clear_stats,
 	.print_stats = smb2_print_stats,
+	.is_oplock_break = smb2_is_valid_oplock_break,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
 	.negotiate_wsize = smb2_negotiate_wsize,
@@ -556,6 +577,7 @@ struct smb_version_operations smb21_operations = {
 	.close_dir = smb2_close_dir,
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
+	.oplock_response = smb2_oplock_response,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e97c256c8a42..566d86b0ad96 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1941,3 +1941,33 @@ SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 			     current->tgid, FILE_BASIC_INFORMATION, 1,
 			     (void **)&buf, &size);
 }
+
+int
+SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
+		  const u64 persistent_fid, const u64 volatile_fid,
+		  __u8 oplock_level)
+{
+	int rc;
+	struct smb2_oplock_break *req = NULL;
+
+	cFYI(1, "SMB2_oplock_break");
+	rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
+
+	if (rc)
+		return rc;
+
+	req->VolatileFid = volatile_fid;
+	req->PersistentFid = persistent_fid;
+	req->OplockLevel = oplock_level;
+	req->hdr.CreditRequest = cpu_to_le16(1);
+
+	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP);
+	/* SMB2 buffer freed by function above */
+
+	if (rc) {
+		cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
+		cFYI(1, "Send error in Oplock Break = %d", rc);
+	}
+
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e6ddd1f79706..8a0745764297 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -615,6 +615,16 @@ struct smb2_set_info_rsp {
 	__le16 StructureSize; /* Must be 2 */
 } __packed;
 
+struct smb2_oplock_break {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 24 */
+	__u8   OplockLevel;
+	__u8   Reserved;
+	__le32 Reserved2;
+	__u64  PersistentFid;
+	__u64  VolatileFid;
+} __packed;
+
 /*
  *	PDU infolevel structure definitions
  *	BB consider moving to a different header
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index d18339fcc8a2..de554b716fdf 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -49,6 +49,8 @@ extern int smb2_setup_async_request(struct TCP_Server_Info *server,
 				    struct kvec *iov, unsigned int nvec,
 				    struct mid_q_entry **ret_mid);
 extern void smb2_echo_request(struct work_struct *work);
+extern bool smb2_is_valid_oplock_break(char *buffer,
+				       struct TCP_Server_Info *srv);
 
 extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
 				   struct smb2_file_all_info *src);
@@ -133,5 +135,8 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_fid, u64 volatile_fid,
 			 FILE_BASIC_INFO *buf);
+extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
+			     const u64 persistent_fid, const u64 volatile_fid,
+			     const __u8 oplock_level);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From 76ec5e33846de386f44826f145cd725b92c23630 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:33 -0700
Subject: CIFS: Move statfs to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsfs.c   | 29 ++++-------------------------
 fs/cifs/cifsglob.h |  5 +++++
 fs/cifs/smb1ops.c  | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2829f374dbf7..4dda4890d776 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -51,7 +51,6 @@
 #ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
 #endif
-#define CIFS_MAGIC_NUMBER 0xFF534D42	/* the first four bytes of SMB PDUs */
 
 int cifsFYI = 0;
 int cifsERROR = 1;
@@ -164,13 +163,12 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct super_block *sb = dentry->d_sb;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-	int rc = -EOPNOTSUPP;
+	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int xid;
+	int rc = 0;
 
 	xid = get_xid();
 
-	buf->f_type = CIFS_MAGIC_NUMBER;
-
 	/*
 	 * PATH_MAX may be too long - it would presumably be total path,
 	 * but note that some servers (includinng Samba 3) have a shorter
@@ -182,27 +180,8 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = 0;	/* undefined */
 	buf->f_ffree = 0;	/* unlimited */
 
-	/*
-	 * We could add a second check for a QFS Unix capability bit
-	 */
-	if ((tcon->ses->capabilities & CAP_UNIX) &&
-	    (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability)))
-		rc = CIFSSMBQFSPosixInfo(xid, tcon, buf);
-
-	/*
-	 * Only need to call the old QFSInfo if failed on newer one,
-	 * e.g. by OS/2.
-	 **/
-	if (rc && (tcon->ses->capabilities & CAP_NT_SMBS))
-		rc = CIFSSMBQFSInfo(xid, tcon, buf);
-
-	/*
-	 * Some old Windows servers also do not support level 103, retry with
-	 * older level one if old server failed the previous call or we
-	 * bypassed it because we detected that this was an older LANMAN sess
-	 */
-	if (rc)
-		rc = SMBOldQFSInfo(xid, tcon, buf);
+	if (server->ops->queryfs)
+		rc = server->ops->queryfs(xid, tcon, buf);
 
 	free_xid(xid);
 	return 0;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a95c56dc7058..3c007fe641f9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -32,6 +32,8 @@
 #include "smb2pdu.h"
 #endif
 
+#define CIFS_MAGIC_NUMBER 0xFF534D42      /* the first four bytes of SMB PDUs */
+
 /*
  * The sizes of various internal tables and strings
  */
@@ -334,6 +336,9 @@ struct smb_version_operations {
 	/* send oplock break response */
 	int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
 			       struct cifsInodeInfo *);
+	/* query remote filesystem */
+	int (*queryfs)(const unsigned int, struct cifs_tcon *,
+		       struct kstatfs *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index f55b2e3476e8..f6c7a1c9a1b6 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/vfs.h>
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
@@ -872,6 +873,38 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 			   cinode->clientCanCacheRead ? 1 : 0);
 }
 
+static int
+cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
+	     struct kstatfs *buf)
+{
+	int rc = -EOPNOTSUPP;
+
+	buf->f_type = CIFS_MAGIC_NUMBER;
+
+	/*
+	 * We could add a second check for a QFS Unix capability bit
+	 */
+	if ((tcon->ses->capabilities & CAP_UNIX) &&
+	    (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability)))
+		rc = CIFSSMBQFSPosixInfo(xid, tcon, buf);
+
+	/*
+	 * Only need to call the old QFSInfo if failed on newer one,
+	 * e.g. by OS/2.
+	 **/
+	if (rc && (tcon->ses->capabilities & CAP_NT_SMBS))
+		rc = CIFSSMBQFSInfo(xid, tcon, buf);
+
+	/*
+	 * Some old Windows servers also do not support level 103, retry with
+	 * older level one if old server failed the previous call or we
+	 * bypassed it because we detected that this was an older LANMAN sess
+	 */
+	if (rc)
+		rc = SMBOldQFSInfo(xid, tcon, buf);
+	return rc;
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -932,6 +965,7 @@ struct smb_version_operations smb1_operations = {
 	.close_dir = cifs_close_dir,
 	.calc_smb_size = smbCalcSize,
 	.oplock_response = cifs_oplock_response,
+	.queryfs = cifs_queryfs,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 6fc05c25ca35e65ee1759dd803f23576a268f5ec Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Tue, 18 Sep 2012 16:20:34 -0700
Subject: CIFS: Add statfs support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2glob.h  |  2 ++
 fs/cifs/smb2ops.c   | 22 +++++++++++++++
 fs/cifs/smb2pdu.c   | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   | 19 +++++++++++++
 fs/cifs/smb2proto.h |  3 ++
 5 files changed, 127 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 05d429b1c37e..7c0e2143e775 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -23,6 +23,8 @@
 #ifndef _SMB2_GLOB_H
 #define _SMB2_GLOB_H
 
+#define SMB2_MAGIC_NUMBER 0xFE534D42
+
 /*
  *****************************************************************
  * Constants go here
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 8c72e0768d12..3a8c68258026 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -18,12 +18,14 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/vfs.h>
 #include "cifsglob.h"
 #include "smb2pdu.h"
 #include "smb2proto.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
 #include "smb2status.h"
+#include "smb2glob.h"
 
 static int
 change_conf(struct TCP_Server_Info *server)
@@ -522,6 +524,25 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 				 cinode->clientCanCacheRead ? 1 : 0);
 }
 
+static int
+smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
+	     struct kstatfs *buf)
+{
+	int rc;
+	u64 persistent_fid, volatile_fid;
+	__le16 srch_path = 0; /* Null - open root of share */
+	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+
+	rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid,
+		       FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL);
+	if (rc)
+		return rc;
+	buf->f_type = SMB2_MAGIC_NUMBER;
+	rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf);
+	SMB2_close(xid, tcon, persistent_fid, volatile_fid);
+	return rc;
+}
+
 struct smb_version_operations smb21_operations = {
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
@@ -578,6 +599,7 @@ struct smb_version_operations smb21_operations = {
 	.calc_smb_size = smb2_calc_size,
 	.is_status_pending = smb2_is_status_pending,
 	.oplock_response = smb2_oplock_response,
+	.queryfs = smb2_queryfs,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 566d86b0ad96..994c184ac9a9 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1971,3 +1971,84 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 
 	return rc;
 }
+
+static void
+copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
+			struct kstatfs *kst)
+{
+	kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
+			  le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
+	kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
+	kst->f_bfree  = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
+	kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
+	return;
+}
+
+static int
+build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
+		   int outbuf_len, u64 persistent_fid, u64 volatile_fid)
+{
+	int rc;
+	struct smb2_query_info_req *req;
+
+	cFYI(1, "Query FSInfo level %d", level);
+
+	if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
+		return -EIO;
+
+	rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req);
+	if (rc)
+		return rc;
+
+	req->InfoType = SMB2_O_INFO_FILESYSTEM;
+	req->FileInfoClass = level;
+	req->PersistentFileId = persistent_fid;
+	req->VolatileFileId = volatile_fid;
+	/* 4 for rfc1002 length field and 1 for pad */
+	req->InputBufferOffset =
+			cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
+	req->OutputBufferLength = cpu_to_le32(
+		outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4);
+
+	iov->iov_base = (char *)req;
+	/* 4 for rfc1002 length field */
+	iov->iov_len = get_rfc1002_length(req) + 4;
+	return 0;
+}
+
+int
+SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
+	      u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
+{
+	struct smb2_query_info_rsp *rsp = NULL;
+	struct kvec iov;
+	int rc = 0;
+	int resp_buftype;
+	struct cifs_ses *ses = tcon->ses;
+	struct smb2_fs_full_size_info *info = NULL;
+
+	rc = build_qfs_info_req(&iov, tcon, FS_FULL_SIZE_INFORMATION,
+				sizeof(struct smb2_fs_full_size_info),
+				persistent_fid, volatile_fid);
+	if (rc)
+		return rc;
+
+	rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
+	if (rc) {
+		cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+		goto qinf_exit;
+	}
+	rsp = (struct smb2_query_info_rsp *)iov.iov_base;
+
+	info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ +
+		le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
+	rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset),
+			  le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr,
+			  sizeof(struct smb2_fs_full_size_info));
+	if (!rc)
+		copy_fs_info_to_kstatfs(info, fsdata);
+
+qinf_exit:
+	free_rsp_buf(resp_buftype, iov.iov_base);
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 8a0745764297..3c8e99ea88ad 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -630,6 +630,25 @@ struct smb2_oplock_break {
  *	BB consider moving to a different header
  */
 
+/* File System Information Classes */
+#define FS_VOLUME_INFORMATION		1 /* Query */
+#define FS_LABEL_INFORMATION		2 /* Set */
+#define FS_SIZE_INFORMATION		3 /* Query */
+#define FS_DEVICE_INFORMATION		4 /* Query */
+#define FS_ATTRIBUTE_INFORMATION	5 /* Query */
+#define FS_CONTROL_INFORMATION		6 /* Query, Set */
+#define FS_FULL_SIZE_INFORMATION	7 /* Query */
+#define FS_OBJECT_ID_INFORMATION	8 /* Query, Set */
+#define FS_DRIVER_PATH_INFORMATION	9 /* Query */
+
+struct smb2_fs_full_size_info {
+	__le64 TotalAllocationUnits;
+	__le64 CallerAvailableAllocationUnits;
+	__le64 ActualAvailableAllocationUnits;
+	__le32 SectorsPerAllocationUnit;
+	__le32 BytesPerSector;
+} __packed;
+
 /* partial list of QUERY INFO levels */
 #define FILE_DIRECTORY_INFORMATION	1
 #define FILE_FULL_DIRECTORY_INFORMATION 2
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index de554b716fdf..a73a963af8f4 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -138,5 +138,8 @@ extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 			     const u64 persistent_fid, const u64 volatile_fid,
 			     const __u8 oplock_level);
+extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
+			 u64 persistent_file_id, u64 volatile_file_id,
+			 struct kstatfs *FSData);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From bf5ea0e2f29b00d4fe5f203d8e59120f797ce451 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:34 -0700
Subject: cifs: change signing routines to deal with smb_rqst structs

We need a way to represent a call to be sent on the wire that does not
require having all of the page data kmapped. Behold the smb_rqst struct.
This new struct represents an array of kvecs immediately followed by an
array of pages.

Convert the signing routines to use these structs under the hood and
turn the existing functions for this into wrappers around that. For now,
we're just changing these functions to take different args. Later, we'll
teach them how to deal with arrays of pages.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsencrypt.c | 26 ++++++++++++++++++--------
 fs/cifs/cifsglob.h    | 14 ++++++++++++++
 fs/cifs/cifsproto.h   |  7 +++++--
 fs/cifs/cifssmb.c     |  7 ++++---
 fs/cifs/transport.c   |  4 +++-
 5 files changed, 44 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 724738c1a560..af520522ef20 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -37,11 +37,13 @@
  * the sequence number before this function is called. Also, this function
  * should be called with the server->srv_mutex held.
  */
-static int cifs_calc_signature(const struct kvec *iov, int n_vec,
+static int cifs_calc_signature(struct smb_rqst *rqst,
 			struct TCP_Server_Info *server, char *signature)
 {
 	int i;
 	int rc;
+	struct kvec *iov = rqst->rq_iov;
+	int n_vec = rqst->rq_nvec;
 
 	if (iov == NULL || signature == NULL || server == NULL)
 		return -EINVAL;
@@ -99,12 +101,12 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec,
 }
 
 /* must be called with server->srv_mutex held */
-int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
+int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
 		   __u32 *pexpected_response_sequence_number)
 {
 	int rc = 0;
 	char smb_signature[20];
-	struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base;
+	struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
 
 	if ((cifs_pdu == NULL) || (server == NULL))
 		return -EINVAL;
@@ -125,7 +127,7 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 	*pexpected_response_sequence_number = server->sequence_number++;
 	server->sequence_number++;
 
-	rc = cifs_calc_signature(iov, n_vec, server, smb_signature);
+	rc = cifs_calc_signature(rqst, server, smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -134,6 +136,15 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 	return rc;
 }
 
+int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
+		   __u32 *pexpected_response_sequence)
+{
+	struct smb_rqst rqst = { .rq_iov = iov,
+				 .rq_nvec = n_vec };
+
+	return cifs_sign_rqst(&rqst, server, pexpected_response_sequence);
+}
+
 /* must be called with server->srv_mutex held */
 int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 		  __u32 *pexpected_response_sequence_number)
@@ -147,14 +158,14 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 			      pexpected_response_sequence_number);
 }
 
-int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
+int cifs_verify_signature(struct smb_rqst *rqst,
 			  struct TCP_Server_Info *server,
 			  __u32 expected_sequence_number)
 {
 	unsigned int rc;
 	char server_response_sig[8];
 	char what_we_think_sig_should_be[20];
-	struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base;
+	struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
 
 	if (cifs_pdu == NULL || server == NULL)
 		return -EINVAL;
@@ -186,8 +197,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
 	cifs_pdu->Signature.Sequence.Reserved = 0;
 
 	mutex_lock(&server->srv_mutex);
-	rc = cifs_calc_signature(iov, nr_iov, server,
-				 what_we_think_sig_should_be);
+	rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be);
 	mutex_unlock(&server->srv_mutex);
 
 	if (rc)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3c007fe641f9..5ea50dd316c5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -162,6 +162,20 @@ struct cifs_cred {
  *****************************************************************
  */
 
+/*
+ * A smb_rqst represents a complete request to be issued to a server. It's
+ * formed by a kvec array, followed by an array of pages. Page data is assumed
+ * to start at the beginning of the first page.
+ */
+struct smb_rqst {
+	struct kvec	*rq_iov;	/* array of kvecs */
+	unsigned int	rq_nvec;	/* number of kvecs in array */
+	struct page	**rq_pages;	/* pointer to array of page ptrs */
+	unsigned int	rq_npages;	/* number pages in array */
+	unsigned int	rq_pagesz;	/* page size to use */
+	unsigned int	rq_tailsz;	/* length of last page */
+};
+
 enum smb_version {
 	Smb_1 = 1,
 	Smb_21,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c7ad9a8cf82a..8e071a5a7da4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -24,6 +24,7 @@
 
 struct statfs;
 struct smb_vol;
+struct smb_rqst;
 
 /*
  *****************************************************************
@@ -394,10 +395,12 @@ extern void sesInfoFree(struct cifs_ses *);
 extern struct cifs_tcon *tconInfoAlloc(void);
 extern void tconInfoFree(struct cifs_tcon *);
 
-extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
+extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+		   __u32 *pexpected_response_sequence_number);
 extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 			  __u32 *);
-extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
+extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
+extern int cifs_verify_signature(struct smb_rqst *rqst,
 				 struct TCP_Server_Info *server,
 				__u32 expected_sequence_number);
 extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c4f43cf671dc..6786b5ee5326 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1541,6 +1541,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
 	struct cifs_readdata *rdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
+	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+				 .rq_nvec = rdata->nr_iov };
 
 	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
 		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
@@ -1552,9 +1554,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
 		    (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 			int rc = 0;
 
-			rc = cifs_verify_signature(rdata->iov, rdata->nr_iov,
-						   server,
-						   mid->sequence_number + 1);
+			rc = cifs_verify_signature(&rqst, server,
+						  mid->sequence_number + 1);
 			if (rc)
 				cERROR(1, "SMB signature verification returned "
 				       "error = %d", rc);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c4d7825dfc0a..bc9ccddad937 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -504,11 +504,13 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
 	if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 		struct kvec iov;
 		int rc = 0;
+		struct smb_rqst rqst = { .rq_iov = &iov,
+					 .rq_nvec = 1 };
 
 		iov.iov_base = mid->resp_buf;
 		iov.iov_len = len;
 		/* FIXME: add code to kill session */
-		rc = cifs_verify_signature(&iov, 1, server,
+		rc = cifs_verify_signature(&rqst, server,
 					   mid->sequence_number + 1);
 		if (rc)
 			cERROR(1, "SMB signature verification returned error = "
-- 
cgit 


From 0b688cfc8b3472f5bad104abe0675a060e32ad7b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:34 -0700
Subject: cifs: change smb2 signing routines to use smb_rqst structs

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c       |  5 +++--
 fs/cifs/smb2proto.h     |  4 ++--
 fs/cifs/smb2transport.c | 44 ++++++++++++++++++++++----------------------
 3 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 994c184ac9a9..e188d137cab2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1297,6 +1297,8 @@ smb2_readv_callback(struct mid_q_entry *mid)
 	struct TCP_Server_Info *server = tcon->ses->server;
 	struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base;
 	unsigned int credits_received = 1;
+	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+				 .rq_nvec = rdata->nr_iov };
 
 	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
 		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
@@ -1309,8 +1311,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
 		    (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 			int rc;
 
-			rc = smb2_verify_signature2(rdata->iov, rdata->nr_iov,
-						    server);
+			rc = smb2_verify_signature(&rqst, server);
 			if (rc)
 				cERROR(1, "SMB signature verification returned "
 				       "error = %d", rc);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index a73a963af8f4..a9bda043e26e 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -26,6 +26,7 @@
 #include <linux/key-type.h>
 
 struct statfs;
+struct smb_rqst;
 
 /*
  *****************************************************************
@@ -39,8 +40,7 @@ extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
 					  struct cifs_sb_info *cifs_sb);
 
-extern int smb2_verify_signature2(struct kvec *, unsigned int,
-				  struct TCP_Server_Info *);
+extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *);
 extern int smb2_check_receive(struct mid_q_entry *mid,
 			      struct TCP_Server_Info *server, bool log_error);
 extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 66479f252ae5..1850d9ec3c90 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -39,12 +39,13 @@
 #include "smb2glob.h"
 
 static int
-smb2_calc_signature2(const struct kvec *iov, int n_vec,
-		     struct TCP_Server_Info *server)
+smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
 	int i, rc;
 	unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
 	unsigned char *sigptr = smb2_signature;
+	struct kvec *iov = rqst->rq_iov;
+	int n_vec = rqst->rq_nvec;
 	struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
 
 	memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
@@ -106,10 +107,10 @@ smb2_calc_signature2(const struct kvec *iov, int n_vec,
 
 /* must be called with server->srv_mutex held */
 static int
-smb2_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server)
+smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
 	int rc = 0;
-	struct smb2_hdr *smb2_pdu = iov[0].iov_base;
+	struct smb2_hdr *smb2_pdu = rqst->rq_iov[0].iov_base;
 
 	if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) ||
 	    server->tcpStatus == CifsNeedNegotiate)
@@ -120,18 +121,17 @@ smb2_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server)
 		return rc;
 	}
 
-	rc = smb2_calc_signature2(iov, n_vec, server);
+	rc = smb2_calc_signature(rqst, server);
 
 	return rc;
 }
 
 int
-smb2_verify_signature2(struct kvec *iov, unsigned int n_vec,
-		       struct TCP_Server_Info *server)
+smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
 	unsigned int rc;
 	char server_response_sig[16];
-	struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+	struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
 
 	if ((smb2_pdu->Command == SMB2_NEGOTIATE) ||
 	    (smb2_pdu->Command == SMB2_OPLOCK_BREAK) ||
@@ -157,7 +157,7 @@ smb2_verify_signature2(struct kvec *iov, unsigned int n_vec,
 	memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE);
 
 	mutex_lock(&server->srv_mutex);
-	rc = smb2_calc_signature2(iov, n_vec, server);
+	rc = smb2_calc_signature(rqst, server);
 	mutex_unlock(&server->srv_mutex);
 
 	if (rc)
@@ -170,16 +170,6 @@ smb2_verify_signature2(struct kvec *iov, unsigned int n_vec,
 		return 0;
 }
 
-static int
-smb2_verify_signature(struct smb2_hdr *smb2_pdu, struct TCP_Server_Info *server)
-{
-	struct kvec iov;
-
-	iov.iov_base = (char *)smb2_pdu;
-	iov.iov_len = get_rfc1002_length(smb2_pdu) + 4;
-	return smb2_verify_signature2(&iov, 1, server);
-}
-
 /*
  * Set message id for the request. Should be called after wait_for_free_request
  * and when srv_mutex is held.
@@ -258,6 +248,12 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
 		   bool log_error)
 {
 	unsigned int len = get_rfc1002_length(mid->resp_buf);
+	struct kvec iov;
+	struct smb_rqst rqst = { .rq_iov = &iov,
+				 .rq_nvec = 1 };
+
+	iov.iov_base = (char *)mid->resp_buf;
+	iov.iov_len = get_rfc1002_length(mid->resp_buf) + 4;
 
 	dump_smb(mid->resp_buf, min_t(u32, 80, len));
 	/* convert the length into a more usable form */
@@ -265,7 +261,7 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
 	    (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) {
 		int rc;
 
-		rc = smb2_verify_signature(mid->resp_buf, server);
+		rc = smb2_verify_signature(&rqst, server);
 		if (rc)
 			cERROR(1, "SMB signature verification returned error = "
 			       "%d", rc);
@@ -281,13 +277,15 @@ smb2_setup_request(struct cifs_ses *ses, struct kvec *iov,
 	int rc;
 	struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base;
 	struct mid_q_entry *mid;
+	struct smb_rqst rqst = { .rq_iov = iov,
+				 .rq_nvec = nvec };
 
 	smb2_seq_num_into_buf(ses->server, hdr);
 
 	rc = smb2_get_mid_entry(ses, hdr, &mid);
 	if (rc)
 		return rc;
-	rc = smb2_sign_smb2(iov, nvec, ses->server);
+	rc = smb2_sign_rqst(&rqst, ses->server);
 	if (rc)
 		cifs_delete_mid(mid);
 	*ret_mid = mid;
@@ -301,6 +299,8 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
 	int rc = 0;
 	struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base;
 	struct mid_q_entry *mid;
+	struct smb_rqst rqst = { .rq_iov = iov,
+				 .rq_nvec = nvec };
 
 	smb2_seq_num_into_buf(server, hdr);
 
@@ -308,7 +308,7 @@ smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
 	if (mid == NULL)
 		return -ENOMEM;
 
-	rc = smb2_sign_smb2(iov, nvec, server);
+	rc = smb2_sign_rqst(&rqst, server);
 	if (rc) {
 		DeleteMidQEntry(mid);
 		return rc;
-- 
cgit 


From 6f49f46b187df34539f1e5df2469b8a541897700 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:34 -0700
Subject: cifs: convert send code to use smb_rqst structs

Again, just a change in the arguments and some function renaming here.
In later patches, we'll change this code to deal with page arrays.

In this patch, we add a new smb_send_rqst wrapper and have smb_sendv
call that. Then we move most of the existing smb_sendv code into a new
function -- smb_send_kvec. This seems a little redundant, but later
we'll flesh this out to deal with arrays of pages.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/transport.c | 135 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 90 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bc9ccddad937..766307b725bd 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,18 +119,29 @@ cifs_delete_mid(struct mid_q_entry *mid)
 	DeleteMidQEntry(mid);
 }
 
+/*
+ * smb_send_kvec - send an array of kvecs to the server
+ * @server:	Server to send the data to
+ * @iov:	Pointer to array of kvecs
+ * @n_vec:	length of kvec array
+ * @sent:	amount of data sent on socket is stored here
+ *
+ * Our basic "send data to server" function. Should be called with srv_mutex
+ * held. The caller is responsible for handling the results.
+ */
 static int
-smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
+smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
+		size_t *sent)
 {
 	int rc = 0;
 	int i = 0;
 	struct msghdr smb_msg;
-	unsigned int len = iov[0].iov_len;
-	unsigned int total_len;
-	int first_vec = 0;
-	unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
+	unsigned int remaining;
+	size_t first_vec = 0;
 	struct socket *ssocket = server->ssocket;
 
+	*sent = 0;
+
 	if (ssocket == NULL)
 		return -ENOTSOCK; /* BB eventually add reconnect code here */
 
@@ -143,56 +154,60 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 	else
 		smb_msg.msg_flags = MSG_NOSIGNAL;
 
-	total_len = 0;
+	remaining = 0;
 	for (i = 0; i < n_vec; i++)
-		total_len += iov[i].iov_len;
-
-	cFYI(1, "Sending smb:  total_len %d", total_len);
-	dump_smb(iov[0].iov_base, len);
+		remaining += iov[i].iov_len;
 
 	i = 0;
-	while (total_len) {
+	while (remaining) {
+		/*
+		 * If blocking send, we try 3 times, since each can block
+		 * for 5 seconds. For nonblocking  we have to try more
+		 * but wait increasing amounts of time allowing time for
+		 * socket to clear.  The overall time we wait in either
+		 * case to send on the socket is about 15 seconds.
+		 * Similarly we wait for 15 seconds for a response from
+		 * the server in SendReceive[2] for the server to send
+		 * a response back for most types of requests (except
+		 * SMB Write past end of file which can be slow, and
+		 * blocking lock operations). NFS waits slightly longer
+		 * than CIFS, but this can make it take longer for
+		 * nonresponsive servers to be detected and 15 seconds
+		 * is more than enough time for modern networks to
+		 * send a packet.  In most cases if we fail to send
+		 * after the retries we will kill the socket and
+		 * reconnect which may clear the network problem.
+		 */
 		rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
-				    n_vec - first_vec, total_len);
-		if ((rc == -ENOSPC) || (rc == -EAGAIN)) {
+				    n_vec - first_vec, remaining);
+		if (rc == -ENOSPC || rc == -EAGAIN) {
 			i++;
-			/*
-			 * If blocking send we try 3 times, since each can block
-			 * for 5 seconds. For nonblocking  we have to try more
-			 * but wait increasing amounts of time allowing time for
-			 * socket to clear.  The overall time we wait in either
-			 * case to send on the socket is about 15 seconds.
-			 * Similarly we wait for 15 seconds for a response from
-			 * the server in SendReceive[2] for the server to send
-			 * a response back for most types of requests (except
-			 * SMB Write past end of file which can be slow, and
-			 * blocking lock operations). NFS waits slightly longer
-			 * than CIFS, but this can make it take longer for
-			 * nonresponsive servers to be detected and 15 seconds
-			 * is more than enough time for modern networks to
-			 * send a packet.  In most cases if we fail to send
-			 * after the retries we will kill the socket and
-			 * reconnect which may clear the network problem.
-			 */
-			if ((i >= 14) || (!server->noblocksnd && (i > 2))) {
-				cERROR(1, "sends on sock %p stuck for 15 seconds",
-				    ssocket);
+			if (i >= 14 || (!server->noblocksnd && (i > 2))) {
+				cERROR(1, "sends on sock %p stuck for 15 "
+					  "seconds", ssocket);
 				rc = -EAGAIN;
 				break;
 			}
 			msleep(1 << i);
 			continue;
 		}
+
 		if (rc < 0)
 			break;
 
-		if (rc == total_len) {
-			total_len = 0;
+		/* send was at least partially successful */
+		*sent += rc;
+
+		if (rc == remaining) {
+			remaining = 0;
 			break;
-		} else if (rc > total_len) {
-			cERROR(1, "sent %d requested %d", rc, total_len);
+		}
+
+		if (rc > remaining) {
+			cERROR(1, "sent %d requested %d", rc, remaining);
 			break;
 		}
+
 		if (rc == 0) {
 			/* should never happen, letting socket clear before
 			   retrying is our only obvious option here */
@@ -200,7 +215,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 			msleep(500);
 			continue;
 		}
-		total_len -= rc;
+
+		remaining -= rc;
+
 		/* the line below resets i */
 		for (i = first_vec; i < n_vec; i++) {
 			if (iov[i].iov_len) {
@@ -215,16 +232,35 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 				}
 			}
 		}
+
 		i = 0; /* in case we get ENOSPC on the next send */
+		rc = 0;
 	}
+	return rc;
+}
+
+static int
+smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+{
+	int rc;
+	struct kvec *iov = rqst->rq_iov;
+	int n_vec = rqst->rq_nvec;
+	unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
+	size_t total_len;
+
+	cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
+	dump_smb(iov[0].iov_base, iov[0].iov_len);
+
+	rc = smb_send_kvec(server, iov, n_vec, &total_len);
 
 	if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
-		cFYI(1, "partial send (%d remaining), terminating session",
-			total_len);
-		/* If we have only sent part of an SMB then the next SMB
-		   could be taken as the remainder of this one.  We need
-		   to kill the socket so the server throws away the partial
-		   SMB */
+		cFYI(1, "partial send (wanted=%u sent=%zu): terminating "
+			"session", smb_buf_length + 4, total_len);
+		/*
+		 * If we have only sent part of an SMB then the next SMB could
+		 * be taken as the remainder of this one. We need to kill the
+		 * socket so the server throws away the partial SMB
+		 */
 		server->tcpStatus = CifsNeedReconnect;
 	}
 
@@ -236,6 +272,15 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
 	return rc;
 }
 
+static int
+smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
+{
+	struct smb_rqst rqst = { .rq_iov = iov,
+				 .rq_nvec = n_vec };
+
+	return smb_send_rqst(server, &rqst);
+}
+
 int
 smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
 	 unsigned int smb_buf_length)
-- 
cgit 


From b8eed28375a43e1c9aaa9d15af2a052aae0d0725 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:35 -0700
Subject: cifs: cork the socket before a send and uncork it afterward

We want to send SMBs as "atomically" as possible. Prior to sending any
data on the socket, cork it to make sure that no non-full frames go
out. Afterward, uncork it to make sure all of the data gets pushed out
to the wire.

Note that this more or less renders the socket=TCP_NODELAY mount option
obsolete. When TCP_CORK and TCP_NODELAY are used on the same socket,
TCP_NODELAY is essentially ignored.

Acked-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/connect.c   |  4 ++++
 fs/cifs/transport.c | 12 ++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 549409b1c776..5210bc82b1dc 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1680,6 +1680,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
+			/*
+			 * FIXME: since we now cork/uncork the socket while
+			 * 	  sending, should we deprecate this option?
+			 */
 			if (strnicmp(string, "TCP_NODELAY", 11) == 0)
 				vol->sockopt_tcp_nodelay = 1;
 			break;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 766307b725bd..8ff4c5f36702 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -27,6 +27,7 @@
 #include <linux/net.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
+#include <linux/tcp.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <linux/mempool.h>
@@ -247,12 +248,23 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 	int n_vec = rqst->rq_nvec;
 	unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
 	size_t total_len;
+	struct socket *ssocket = server->ssocket;
+	int val = 1;
 
 	cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
 	dump_smb(iov[0].iov_base, iov[0].iov_len);
 
+	/* cork the socket */
+	kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
+				(char *)&val, sizeof(val));
+
 	rc = smb_send_kvec(server, iov, n_vec, &total_len);
 
+	/* uncork it */
+	val = 0;
+	kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
+				(char *)&val, sizeof(val));
+
 	if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
 		cFYI(1, "partial send (wanted=%u sent=%zu): terminating "
 			"session", smb_buf_length + 4, total_len);
-- 
cgit 


From 97bc00b39408a4180eeeaa976d02d37121488997 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:35 -0700
Subject: cifs: teach smb_send_rqst how to handle arrays of pages

Add code that allows smb_send_rqst to send an array of pages after the
initial kvec array has been sent. For now, we simply kmap the page
array and send it using the standard smb_send_kvec function. Eventually,
we may want to convert this code to use kernel_sendpage under the hood
and avoid the kmap altogether for the page data.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/transport.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 8ff4c5f36702..381dbb778478 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -28,6 +28,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/tcp.h>
+#include <linux/highmem.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <linux/mempool.h>
@@ -240,6 +241,38 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
 	return rc;
 }
 
+/**
+ * rqst_page_to_kvec - Turn a slot in the smb_rqst page array into a kvec
+ * @rqst: pointer to smb_rqst
+ * @idx: index into the array of the page
+ * @iov: pointer to struct kvec that will hold the result
+ *
+ * Helper function to convert a slot in the rqst->rq_pages array into a kvec.
+ * The page will be kmapped and the address placed into iov_base. The length
+ * will then be adjusted according to the ptailoff.
+ */
+static void
+cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
+			struct kvec *iov)
+{
+	/*
+	 * FIXME: We could avoid this kmap altogether if we used
+	 * kernel_sendpage instead of kernel_sendmsg. That will only
+	 * work if signing is disabled though as sendpage inlines the
+	 * page directly into the fraglist. If userspace modifies the
+	 * page after we calculate the signature, then the server will
+	 * reject it and may break the connection. kernel_sendmsg does
+	 * an extra copy of the data and avoids that issue.
+	 */
+	iov->iov_base = kmap(rqst->rq_pages[idx]);
+
+	/* if last page, don't send beyond this offset into page */
+	if (idx == (rqst->rq_npages - 1))
+		iov->iov_len = rqst->rq_tailsz;
+	else
+		iov->iov_len = rqst->rq_pagesz;
+}
+
 static int
 smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 {
@@ -247,7 +280,8 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 	struct kvec *iov = rqst->rq_iov;
 	int n_vec = rqst->rq_nvec;
 	unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
-	size_t total_len;
+	unsigned int i;
+	size_t total_len = 0, sent;
 	struct socket *ssocket = server->ssocket;
 	int val = 1;
 
@@ -258,8 +292,26 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 	kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
 				(char *)&val, sizeof(val));
 
-	rc = smb_send_kvec(server, iov, n_vec, &total_len);
+	rc = smb_send_kvec(server, iov, n_vec, &sent);
+	if (rc < 0)
+		goto uncork;
+
+	total_len += sent;
+
+	/* now walk the page array and send each page in it */
+	for (i = 0; i < rqst->rq_npages; i++) {
+		struct kvec p_iov;
+
+		cifs_rqst_page_to_kvec(rqst, i, &p_iov);
+		rc = smb_send_kvec(server, &p_iov, 1, &sent);
+		kunmap(rqst->rq_pages[i]);
+		if (rc < 0)
+			break;
+
+		total_len += sent;
+	}
 
+uncork:
 	/* uncork it */
 	val = 0;
 	kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
-- 
cgit 


From fb308a6f22f7f4f3574dab6b36c4a3598e50cf05 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:35 -0700
Subject: cifs: teach signing routines how to deal with arrays of pages in a
 smb_rqst

Use the smb_send_rqst helper function to kmap each page in the array
and update the hash for that chunk.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsencrypt.c   | 11 +++++++++++
 fs/cifs/cifsproto.h     |  2 ++
 fs/cifs/smb2transport.c | 11 +++++++++++
 fs/cifs/transport.c     |  2 +-
 4 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index af520522ef20..652f5051be09 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -29,6 +29,7 @@
 #include "ntlmssp.h"
 #include <linux/ctype.h>
 #include <linux/random.h>
+#include <linux/highmem.h>
 
 /*
  * Calculate and return the CIFS signature based on the mac key and SMB PDU.
@@ -93,6 +94,16 @@ static int cifs_calc_signature(struct smb_rqst *rqst,
 		}
 	}
 
+	/* now hash over the rq_pages array */
+	for (i = 0; i < rqst->rq_npages; i++) {
+		struct kvec p_iov;
+
+		cifs_rqst_page_to_kvec(rqst, i, &p_iov);
+		crypto_shash_update(&server->secmech.sdescmd5->shash,
+					p_iov.iov_base, p_iov.iov_len);
+		kunmap(rqst->rq_pages[i]);
+	}
+
 	rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
 	if (rc)
 		cERROR(1, "%s: Could not generate md5 hash", __func__);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8e071a5a7da4..e97a1843ab98 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,6 +36,8 @@ extern struct smb_hdr *cifs_buf_get(void);
 extern void cifs_buf_release(void *);
 extern struct smb_hdr *cifs_small_buf_get(void);
 extern void cifs_small_buf_release(void *);
+extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
+					struct kvec *iov);
 extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
 			unsigned int /* length */);
 extern unsigned int _get_xid(void);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 1850d9ec3c90..9ca4bcfb34c6 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -30,6 +30,7 @@
 #include <linux/uaccess.h>
 #include <asm/processor.h>
 #include <linux/mempool.h>
+#include <linux/highmem.h>
 #include "smb2pdu.h"
 #include "cifsglob.h"
 #include "cifsproto.h"
@@ -95,6 +96,16 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 		}
 	}
 
+	/* now hash over the rq_pages array */
+	for (i = 0; i < rqst->rq_npages; i++) {
+		struct kvec p_iov;
+
+		cifs_rqst_page_to_kvec(rqst, i, &p_iov);
+		crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+					p_iov.iov_base, p_iov.iov_len);
+		kunmap(rqst->rq_pages[i]);
+	}
+
 	rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
 				sigptr);
 	if (rc)
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 381dbb778478..b6097344cd5b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -251,7 +251,7 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
  * The page will be kmapped and the address placed into iov_base. The length
  * will then be adjusted according to the ptailoff.
  */
-static void
+void
 cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
 			struct kvec *iov)
 {
-- 
cgit 


From fec344e3f31aa911297cd3a4639432d983b1f324 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:35 -0700
Subject: cifs: change cifs_call_async to use smb_rqst structs

For now, none of the callers populate rq_pages. That will be done for
writes in a later patch. While we're at it, change the prototype of
setup_async_request not to need a return pointer argument. Just
return the pointer to the mid_q_entry or an ERR_PTR.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h      |  8 +++----
 fs/cifs/cifsproto.h     | 16 +++++++-------
 fs/cifs/cifssmb.c       | 20 +++++++++++-------
 fs/cifs/smb2pdu.c       | 15 +++++++++----
 fs/cifs/smb2proto.h     |  9 ++++----
 fs/cifs/smb2transport.c | 40 +++++++++++++++--------------------
 fs/cifs/transport.c     | 56 ++++++++++++++++++++++++-------------------------
 7 files changed, 85 insertions(+), 79 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5ea50dd316c5..a81790005e57 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -201,11 +201,11 @@ struct smb_version_operations {
 			   struct mid_q_entry *);
 	bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *);
 	/* setup request: allocate mid, sign message */
-	int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int,
-			     struct mid_q_entry **);
+	struct mid_q_entry *(*setup_request)(struct cifs_ses *,
+						struct smb_rqst *);
 	/* setup async request: allocate mid, sign message */
-	int (*setup_async_request)(struct TCP_Server_Info *, struct kvec *,
-				   unsigned int, struct mid_q_entry **);
+	struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *,
+						struct smb_rqst *);
 	/* check response: verify signature, map error */
 	int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *,
 			     bool);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e97a1843ab98..3b628f2af258 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -70,20 +70,20 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
 extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
 extern void cifs_delete_mid(struct mid_q_entry *mid);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
-extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
-			   unsigned int nvec, mid_receive_t *receive,
-			   mid_callback_t *callback, void *cbdata,
-			   const int flags);
+extern int cifs_call_async(struct TCP_Server_Info *server,
+			struct smb_rqst *rqst,
+			mid_receive_t *receive, mid_callback_t *callback,
+			void *cbdata, const int flags);
 extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
 			struct smb_hdr * /* input */ ,
 			struct smb_hdr * /* out */ ,
 			int * /* bytes returned */ , const int);
 extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
 			    char *in_buf, int flags);
-extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int,
-			      struct mid_q_entry **);
-extern int cifs_setup_async_request(struct TCP_Server_Info *, struct kvec *,
-				    unsigned int, struct mid_q_entry **);
+extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *,
+				struct smb_rqst *);
+extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
+						struct smb_rqst *);
 extern int cifs_check_receive(struct mid_q_entry *mid,
 			struct TCP_Server_Info *server, bool log_error);
 extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6786b5ee5326..2f86c84468cb 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -725,6 +725,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
 	ECHO_REQ *smb;
 	int rc = 0;
 	struct kvec iov;
+	struct smb_rqst rqst = { .rq_iov = &iov,
+				 .rq_nvec = 1 };
 
 	cFYI(1, "In echo request");
 
@@ -742,7 +744,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
 	iov.iov_base = smb;
 	iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
 
-	rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback,
+	rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback,
 			     server, CIFS_ASYNC_OP | CIFS_ECHO_OP);
 	if (rc)
 		cFYI(1, "Echo request failed: %d", rc);
@@ -1585,6 +1587,8 @@ cifs_async_readv(struct cifs_readdata *rdata)
 	READ_REQ *smb = NULL;
 	int wct;
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
+	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+				 .rq_nvec = 1 };
 
 	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
 		rdata->offset, rdata->bytes);
@@ -1628,9 +1632,8 @@ cifs_async_readv(struct cifs_readdata *rdata)
 	rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
 
 	kref_get(&rdata->refcount);
-	rc = cifs_call_async(tcon->ses->server, rdata->iov, 1,
-			     cifs_readv_receive, cifs_readv_callback,
-			     rdata, 0);
+	rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive,
+			     cifs_readv_callback, rdata, 0);
 
 	if (rc == 0)
 		cifs_stats_inc(&tcon->stats.cifs_stats.num_reads);
@@ -2035,6 +2038,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	int wct;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
 	struct kvec *iov = NULL;
+	struct smb_rqst rqst = { };
 
 	if (tcon->ses->capabilities & CAP_LARGE_FILES) {
 		wct = 14;
@@ -2051,11 +2055,13 @@ cifs_async_writev(struct cifs_writedata *wdata)
 		goto async_writev_out;
 
 	/* 1 iov per page + 1 for header */
-	iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS);
+	rqst.rq_nvec = wdata->nr_pages + 1;
+	iov = kzalloc((rqst.rq_nvec) * sizeof(*iov), GFP_NOFS);
 	if (iov == NULL) {
 		rc = -ENOMEM;
 		goto async_writev_out;
 	}
+	rqst.rq_iov = iov;
 
 	smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid);
 	smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
@@ -2104,8 +2110,8 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	}
 
 	kref_get(&wdata->refcount);
-	rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1,
-			     NULL, cifs_writev_callback, wdata, 0);
+	rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
+				cifs_writev_callback, wdata, 0);
 
 	if (rc == 0)
 		cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e188d137cab2..a04301b69b4e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1171,6 +1171,8 @@ SMB2_echo(struct TCP_Server_Info *server)
 	struct smb2_echo_req *req;
 	int rc = 0;
 	struct kvec iov;
+	struct smb_rqst rqst = { .rq_iov = &iov,
+				 .rq_nvec = 1 };
 
 	cFYI(1, "In echo request");
 
@@ -1184,7 +1186,7 @@ SMB2_echo(struct TCP_Server_Info *server)
 	/* 4 for rfc1002 length field */
 	iov.iov_len = get_rfc1002_length(req) + 4;
 
-	rc = cifs_call_async(server, &iov, 1, NULL, smb2_echo_callback, server,
+	rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server,
 			     CIFS_ECHO_OP);
 	if (rc)
 		cFYI(1, "Echo request failed: %d", rc);
@@ -1344,6 +1346,8 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	int rc;
 	struct smb2_hdr *buf;
 	struct cifs_io_parms io_parms;
+	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+				 .rq_nvec = 1 };
 
 	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
 		rdata->offset, rdata->bytes);
@@ -1363,7 +1367,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	rdata->iov[0].iov_len = get_rfc1002_length(rdata->iov[0].iov_base) + 4;
 
 	kref_get(&rdata->refcount);
-	rc = cifs_call_async(io_parms.tcon->ses->server, rdata->iov, 1,
+	rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
 			     cifs_readv_receive, smb2_readv_callback,
 			     rdata, 0);
 	if (rc)
@@ -1484,6 +1488,7 @@ smb2_async_writev(struct cifs_writedata *wdata)
 	struct smb2_write_req *req = NULL;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
 	struct kvec *iov = NULL;
+	struct smb_rqst rqst;
 
 	rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
 	if (rc)
@@ -1495,6 +1500,8 @@ smb2_async_writev(struct cifs_writedata *wdata)
 		rc = -ENOMEM;
 		goto async_writev_out;
 	}
+	rqst.rq_iov = iov;
+	rqst.rq_nvec = wdata->nr_pages + 1;
 
 	req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
 
@@ -1530,8 +1537,8 @@ smb2_async_writev(struct cifs_writedata *wdata)
 	inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
 
 	kref_get(&wdata->refcount);
-	rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1,
-			     NULL, smb2_writev_callback, wdata, 0);
+	rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
+				smb2_writev_callback, wdata, 0);
 
 	if (rc)
 		kref_put(&wdata->refcount, cifs_writedata_release);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index a9bda043e26e..aeb30dbdf8b8 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -43,11 +43,10 @@ extern __le16 *cifs_convert_path_to_utf16(const char *from,
 extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *);
 extern int smb2_check_receive(struct mid_q_entry *mid,
 			      struct TCP_Server_Info *server, bool log_error);
-extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov,
-			      unsigned int nvec, struct mid_q_entry **ret_mid);
-extern int smb2_setup_async_request(struct TCP_Server_Info *server,
-				    struct kvec *iov, unsigned int nvec,
-				    struct mid_q_entry **ret_mid);
+extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
+			      struct smb_rqst *rqst);
+extern struct mid_q_entry *smb2_setup_async_request(
+			struct TCP_Server_Info *server, struct smb_rqst *rqst);
 extern void smb2_echo_request(struct work_struct *work);
 extern bool smb2_is_valid_oplock_break(char *buffer,
 				       struct TCP_Server_Info *srv);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 9ca4bcfb34c6..2a5fdf26f79f 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -281,50 +281,44 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
 	return map_smb2_to_linux_error(mid->resp_buf, log_error);
 }
 
-int
-smb2_setup_request(struct cifs_ses *ses, struct kvec *iov,
-		   unsigned int nvec, struct mid_q_entry **ret_mid)
+struct mid_q_entry *
+smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
 {
 	int rc;
-	struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base;
+	struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
 	struct mid_q_entry *mid;
-	struct smb_rqst rqst = { .rq_iov = iov,
-				 .rq_nvec = nvec };
 
 	smb2_seq_num_into_buf(ses->server, hdr);
 
 	rc = smb2_get_mid_entry(ses, hdr, &mid);
 	if (rc)
-		return rc;
-	rc = smb2_sign_rqst(&rqst, ses->server);
-	if (rc)
+		return ERR_PTR(rc);
+	rc = smb2_sign_rqst(rqst, ses->server);
+	if (rc) {
 		cifs_delete_mid(mid);
-	*ret_mid = mid;
-	return rc;
+		return ERR_PTR(rc);
+	}
+	return mid;
 }
 
-int
-smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
-			 unsigned int nvec, struct mid_q_entry **ret_mid)
+struct mid_q_entry *
+smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 {
-	int rc = 0;
-	struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base;
+	int rc;
+	struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
 	struct mid_q_entry *mid;
-	struct smb_rqst rqst = { .rq_iov = iov,
-				 .rq_nvec = nvec };
 
 	smb2_seq_num_into_buf(server, hdr);
 
 	mid = smb2_mid_entry_alloc(hdr, server);
 	if (mid == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	rc = smb2_sign_rqst(&rqst, server);
+	rc = smb2_sign_rqst(rqst, server);
 	if (rc) {
 		DeleteMidQEntry(mid);
-		return rc;
+		return ERR_PTR(rc);
 	}
 
-	*ret_mid = mid;
-	return rc;
+	return mid;
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b6097344cd5b..2126ab185045 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -454,12 +454,11 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
 	return 0;
 }
 
-int
-cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
-			 unsigned int nvec, struct mid_q_entry **ret_mid)
+struct mid_q_entry *
+cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 {
 	int rc;
-	struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
+	struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
 	struct mid_q_entry *mid;
 
 	/* enable signing if server requires it */
@@ -468,16 +467,15 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
 
 	mid = AllocMidQEntry(hdr, server);
 	if (mid == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	rc = cifs_sign_smbv(iov, nvec, server, &mid->sequence_number);
+	rc = cifs_sign_rqst(rqst, server, &mid->sequence_number);
 	if (rc) {
 		DeleteMidQEntry(mid);
-		return rc;
+		return ERR_PTR(rc);
 	}
 
-	*ret_mid = mid;
-	return 0;
+	return mid;
 }
 
 /*
@@ -485,9 +483,9 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
  * the result. Caller is responsible for dealing with timeouts.
  */
 int
-cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
-		unsigned int nvec, mid_receive_t *receive,
-		mid_callback_t *callback, void *cbdata, const int flags)
+cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+		mid_receive_t *receive, mid_callback_t *callback,
+		void *cbdata, const int flags)
 {
 	int rc, timeout, optype;
 	struct mid_q_entry *mid;
@@ -500,12 +498,12 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 		return rc;
 
 	mutex_lock(&server->srv_mutex);
-	rc = server->ops->setup_async_request(server, iov, nvec, &mid);
-	if (rc) {
+	mid = server->ops->setup_async_request(server, rqst);
+	if (IS_ERR(mid)) {
 		mutex_unlock(&server->srv_mutex);
 		add_credits(server, 1, optype);
 		wake_up(&server->request_q);
-		return rc;
+		return PTR_ERR(mid);
 	}
 
 	mid->receive = receive;
@@ -520,7 +518,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
 
 
 	cifs_in_send_inc(server);
-	rc = smb_sendv(server, iov, nvec);
+	rc = smb_send_rqst(server, rqst);
 	cifs_in_send_dec(server);
 	cifs_save_when_sent(mid);
 	mutex_unlock(&server->srv_mutex);
@@ -630,22 +628,22 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
 	return map_smb_to_linux_error(mid->resp_buf, log_error);
 }
 
-int
-cifs_setup_request(struct cifs_ses *ses, struct kvec *iov,
-		   unsigned int nvec, struct mid_q_entry **ret_mid)
+struct mid_q_entry *
+cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
 {
 	int rc;
-	struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base;
+	struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
 	struct mid_q_entry *mid;
 
 	rc = allocate_mid(ses, hdr, &mid);
 	if (rc)
-		return rc;
-	rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number);
-	if (rc)
+		return ERR_PTR(rc);
+	rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number);
+	if (rc) {
 		cifs_delete_mid(mid);
-	*ret_mid = mid;
-	return rc;
+		return ERR_PTR(rc);
+	}
+	return mid;
 }
 
 int
@@ -658,6 +656,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 	struct mid_q_entry *midQ;
 	char *buf = iov[0].iov_base;
 	unsigned int credits = 1;
+	struct smb_rqst rqst = { .rq_iov = iov,
+				 .rq_nvec = n_vec };
 
 	timeout = flags & CIFS_TIMEOUT_MASK;
 	optype = flags & CIFS_OP_MASK;
@@ -695,13 +695,13 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
 
 	mutex_lock(&ses->server->srv_mutex);
 
-	rc = ses->server->ops->setup_request(ses, iov, n_vec, &midQ);
-	if (rc) {
+	midQ = ses->server->ops->setup_request(ses, &rqst);
+	if (IS_ERR(midQ)) {
 		mutex_unlock(&ses->server->srv_mutex);
 		cifs_small_buf_release(buf);
 		/* Update # of requests on wire to server */
 		add_credits(ses->server, 1, optype);
-		return rc;
+		return PTR_ERR(midQ);
 	}
 
 	midQ->mid_state = MID_REQUEST_SUBMITTED;
-- 
cgit 


From eddb079deb4deb1259f87425094c7a586fc59313 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:35 -0700
Subject: cifs: convert async write code to pass in data via rq_pages array

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  4 ++--
 fs/cifs/cifssmb.c  | 39 +++++++++++----------------------------
 fs/cifs/file.c     | 48 ++++++++++--------------------------------------
 fs/cifs/smb2pdu.c  | 37 ++++++++++---------------------------
 4 files changed, 33 insertions(+), 95 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a81790005e57..cc70ac0bac47 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -999,8 +999,8 @@ struct cifs_writedata {
 	pid_t				pid;
 	unsigned int			bytes;
 	int				result;
-	void (*marshal_iov) (struct kvec *iov,
-			     struct cifs_writedata *wdata);
+	unsigned int			pagesz;
+	unsigned int			tailsz;
 	unsigned int			nr_pages;
 	struct page			*pages[1];
 };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2f86c84468cb..a110e0784221 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2033,11 +2033,11 @@ cifs_writev_callback(struct mid_q_entry *mid)
 int
 cifs_async_writev(struct cifs_writedata *wdata)
 {
-	int i, rc = -EACCES;
+	int rc = -EACCES;
 	WRITE_REQ *smb = NULL;
 	int wct;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
-	struct kvec *iov = NULL;
+	struct kvec iov;
 	struct smb_rqst rqst = { };
 
 	if (tcon->ses->capabilities & CAP_LARGE_FILES) {
@@ -2054,15 +2054,6 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	if (rc)
 		goto async_writev_out;
 
-	/* 1 iov per page + 1 for header */
-	rqst.rq_nvec = wdata->nr_pages + 1;
-	iov = kzalloc((rqst.rq_nvec) * sizeof(*iov), GFP_NOFS);
-	if (iov == NULL) {
-		rc = -ENOMEM;
-		goto async_writev_out;
-	}
-	rqst.rq_iov = iov;
-
 	smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid);
 	smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
 
@@ -2079,18 +2070,15 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	    cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
 
 	/* 4 for RFC1001 length + 1 for BCC */
-	iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
-	iov[0].iov_base = smb;
+	iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
+	iov.iov_base = smb;
 
-	/*
-	 * This function should marshal up the page array into the kvec
-	 * array, reserving [0] for the header. It should kmap the pages
-	 * and set the iov_len properly for each one. It may also set
-	 * wdata->bytes too.
-	 */
-	cifs_kmap_lock();
-	wdata->marshal_iov(iov, wdata);
-	cifs_kmap_unlock();
+	rqst.rq_iov = &iov;
+	rqst.rq_nvec = 1;
+	rqst.rq_pages = wdata->pages;
+	rqst.rq_npages = wdata->nr_pages;
+	rqst.rq_pagesz = wdata->pagesz;
+	rqst.rq_tailsz = wdata->tailsz;
 
 	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
 
@@ -2106,7 +2094,7 @@ cifs_async_writev(struct cifs_writedata *wdata)
 				(struct smb_com_writex_req *)smb;
 		inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5);
 		put_bcc(wdata->bytes + 5, &smbw->hdr);
-		iov[0].iov_len += 4; /* pad bigger by four bytes */
+		iov.iov_len += 4; /* pad bigger by four bytes */
 	}
 
 	kref_get(&wdata->refcount);
@@ -2118,13 +2106,8 @@ cifs_async_writev(struct cifs_writedata *wdata)
 	else
 		kref_put(&wdata->refcount, cifs_writedata_release);
 
-	/* send is done, unmap pages */
-	for (i = 0; i < wdata->nr_pages; i++)
-		kunmap(wdata->pages[i]);
-
 async_writev_out:
 	cifs_small_buf_release(smb);
-	kfree(iov);
 	return rc;
 }
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2418618118b6..8a781226ae33 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1738,27 +1738,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	return rc;
 }
 
-/*
- * Marshal up the iov array, reserving the first one for the header. Also,
- * set wdata->bytes.
- */
-static void
-cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
-{
-	int i;
-	struct inode *inode = wdata->cfile->dentry->d_inode;
-	loff_t size = i_size_read(inode);
-
-	/* marshal up the pages into iov array */
-	wdata->bytes = 0;
-	for (i = 0; i < wdata->nr_pages; i++) {
-		iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
-					(loff_t)PAGE_CACHE_SIZE);
-		iov[i + 1].iov_base = kmap(wdata->pages[i]);
-		wdata->bytes += iov[i + 1].iov_len;
-	}
-}
-
 static int cifs_writepages(struct address_space *mapping,
 			   struct writeback_control *wbc)
 {
@@ -1769,6 +1748,7 @@ static int cifs_writepages(struct address_space *mapping,
 	struct TCP_Server_Info *server;
 	struct page *page;
 	int rc = 0;
+	loff_t isize = i_size_read(mapping->host);
 
 	/*
 	 * If wsize is smaller than the page cache size, default to writing
@@ -1873,7 +1853,7 @@ retry:
 			 */
 			set_page_writeback(page);
 
-			if (page_offset(page) >= mapping->host->i_size) {
+			if (page_offset(page) >= isize) {
 				done = true;
 				unlock_page(page);
 				end_page_writeback(page);
@@ -1904,7 +1884,12 @@ retry:
 		wdata->sync_mode = wbc->sync_mode;
 		wdata->nr_pages = nr_pages;
 		wdata->offset = page_offset(wdata->pages[0]);
-		wdata->marshal_iov = cifs_writepages_marshal_iov;
+		wdata->pagesz = PAGE_CACHE_SIZE;
+		wdata->tailsz =
+			min(isize - page_offset(wdata->pages[nr_pages - 1]),
+			    (loff_t)PAGE_CACHE_SIZE);
+		wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
+					wdata->tailsz;
 
 		do {
 			if (wdata->cfile != NULL)
@@ -2205,20 +2190,6 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
 	return num_pages;
 }
 
-static void
-cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
-{
-	int i;
-	size_t bytes = wdata->bytes;
-
-	/* marshal up the pages into iov array */
-	for (i = 0; i < wdata->nr_pages; i++) {
-		iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
-		iov[i + 1].iov_base = kmap(wdata->pages[i]);
-		bytes -= iov[i + 1].iov_len;
-	}
-}
-
 static void
 cifs_uncached_writev_complete(struct work_struct *work)
 {
@@ -2339,7 +2310,8 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 		wdata->cfile = cifsFileInfo_get(open_file);
 		wdata->pid = pid;
 		wdata->bytes = cur_len;
-		wdata->marshal_iov = cifs_uncached_marshal_iov;
+		wdata->pagesz = PAGE_SIZE;
+		wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
 		rc = cifs_uncached_retry_writev(wdata);
 		if (rc) {
 			kref_put(&wdata->refcount, cifs_writedata_release);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a04301b69b4e..68023d23702b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1484,25 +1484,16 @@ smb2_writev_callback(struct mid_q_entry *mid)
 int
 smb2_async_writev(struct cifs_writedata *wdata)
 {
-	int i, rc = -EACCES;
+	int rc = -EACCES;
 	struct smb2_write_req *req = NULL;
 	struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
-	struct kvec *iov = NULL;
+	struct kvec iov;
 	struct smb_rqst rqst;
 
 	rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
 	if (rc)
 		goto async_writev_out;
 
-	/* 1 iov per page + 1 for header */
-	iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS);
-	if (iov == NULL) {
-		rc = -ENOMEM;
-		goto async_writev_out;
-	}
-	rqst.rq_iov = iov;
-	rqst.rq_nvec = wdata->nr_pages + 1;
-
 	req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
 
 	req->PersistentFileId = wdata->cfile->fid.persistent_fid;
@@ -1517,18 +1508,15 @@ smb2_async_writev(struct cifs_writedata *wdata)
 	req->RemainingBytes = 0;
 
 	/* 4 for rfc1002 length field and 1 for Buffer */
-	iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
-	iov[0].iov_base = (char *)req;
+	iov.iov_len = get_rfc1002_length(req) + 4 - 1;
+	iov.iov_base = req;
 
-	/*
-	 * This function should marshal up the page array into the kvec
-	 * array, reserving [0] for the header. It should kmap the pages
-	 * and set the iov_len properly for each one. It may also set
-	 * wdata->bytes too.
-	 */
-	cifs_kmap_lock();
-	wdata->marshal_iov(iov, wdata);
-	cifs_kmap_unlock();
+	rqst.rq_iov = &iov;
+	rqst.rq_nvec = 1;
+	rqst.rq_pages = wdata->pages;
+	rqst.rq_npages = wdata->nr_pages;
+	rqst.rq_pagesz = wdata->pagesz;
+	rqst.rq_tailsz = wdata->tailsz;
 
 	cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
 
@@ -1543,13 +1531,8 @@ smb2_async_writev(struct cifs_writedata *wdata)
 	if (rc)
 		kref_put(&wdata->refcount, cifs_writedata_release);
 
-	/* send is done, unmap pages */
-	for (i = 0; i < wdata->nr_pages; i++)
-		kunmap(wdata->pages[i]);
-
 async_writev_out:
 	cifs_small_buf_release(req);
-	kfree(iov);
 	return rc;
 }
 
-- 
cgit 


From c84ce4a7b22aa54017c61217f8a0e5e24d593da1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:36 -0700
Subject: cifs: remove the kmap size limit from wsize

Now that we're not kmapping so much at once, there's no need to cap
the wsize at the amount that can be simultaneously kmapped.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb1ops.c | 3 ---
 fs/cifs/smb2ops.c | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index f6c7a1c9a1b6..8727ef712a3c 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -442,9 +442,6 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 		wsize = min_t(unsigned int, wsize,
 				server->maxBuf - sizeof(WRITE_REQ) + 4);
 
-	/* limit to the amount that we can kmap at once */
-	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
-
 	/* hard limit of CIFS_MAX_WSIZE */
 	wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 3a8c68258026..a621d6125367 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -187,9 +187,6 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 	 */
 	wsize = min_t(unsigned int, wsize, 2 << 15);
 
-	/* limit to the amount that we can kmap at once */
-	wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
-
 	return wsize;
 }
 
-- 
cgit 


From 67c1f5295150eb86d065d57b4515a472ecbf008f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:36 -0700
Subject: cifs: add deprecation warning to sockopt=TCP_NODELAY option

Now that we're using TCP_CORK on the socket, there's no value in
continuting to support this option. Schedule it for removal in 3.9.

Reviewed-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/connect.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5210bc82b1dc..443e39633107 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1680,12 +1680,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			/*
-			 * FIXME: since we now cork/uncork the socket while
-			 * 	  sending, should we deprecate this option?
-			 */
-			if (strnicmp(string, "TCP_NODELAY", 11) == 0)
+			if (strnicmp(string, "TCP_NODELAY", 11) == 0) {
+				printk(KERN_WARNING "CIFS: the "
+					"sockopt=TCP_NODELAY option has been "
+					"deprecated and will be removed "
+					"in 3.9\n");
 				vol->sockopt_tcp_nodelay = 1;
+			}
 			break;
 		case Opt_netbiosname:
 			string = match_strdup(args);
-- 
cgit 


From f4e49cd2dce2ccac6feae64fbb4e90f7d8baf370 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 18 Sep 2012 16:20:36 -0700
Subject: cifs: allocate kvec array for cifs_readdata as a separate allocation

Eventually, we're going to want to append a list of pages to
cifs_readdata instead of a list of kvecs. To prepare for that, turn
the kvec array allocation into a separate one and just keep a
pointer to it in the readdata.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/cifsglob.h |  2 +-
 fs/cifs/file.c     | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cc70ac0bac47..737289b50ca5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -982,7 +982,7 @@ struct cifs_readdata {
 	int (*marshal_iov) (struct cifs_readdata *rdata,
 			    unsigned int remaining);
 	unsigned int			nr_iov;
-	struct kvec			iov[1];
+	struct kvec			*iov;
 };
 
 struct cifs_writedata;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8a781226ae33..61b7c834069f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2410,19 +2410,27 @@ ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
 }
 
 static struct cifs_readdata *
-cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
+cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
 {
 	struct cifs_readdata *rdata;
+	struct kvec *iov;
 
-	rdata = kzalloc(sizeof(*rdata) +
-			sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
+	iov = kzalloc(sizeof(*iov) * (nr_pages + 1), GFP_KERNEL);
+	if (!iov)
+		return (struct cifs_readdata *)iov;
+
+	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
 	if (rdata != NULL) {
 		kref_init(&rdata->refcount);
 		INIT_LIST_HEAD(&rdata->list);
 		init_completion(&rdata->done);
 		INIT_WORK(&rdata->work, complete);
 		INIT_LIST_HEAD(&rdata->pages);
+		rdata->iov = iov;
+	} else {
+		kfree(iov);
 	}
+
 	return rdata;
 }
 
@@ -2435,6 +2443,7 @@ cifs_readdata_release(struct kref *refcount)
 	if (rdata->cfile)
 		cifsFileInfo_put(rdata->cfile);
 
+	kfree(rdata->iov);
 	kfree(rdata);
 }
 
-- 
cgit 


From c5fab6f4f081afcfcd7c1d444d9b900d6ef3e50b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 06:22:30 -0700
Subject: cifs: turn the pages list in cifs_readdata into an array

We'll need an array to put into a smb_rqst, so convert this into an array
instead of (ab)using the lru list_head.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/cifsglob.h |  3 +-
 fs/cifs/file.c     | 87 +++++++++++++++++++++++++++++++-----------------------
 2 files changed, 52 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 737289b50ca5..b70863ebedf2 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -977,12 +977,13 @@ struct cifs_readdata {
 	unsigned int			bytes;
 	pid_t				pid;
 	int				result;
-	struct list_head		pages;
 	struct work_struct		work;
 	int (*marshal_iov) (struct cifs_readdata *rdata,
 			    unsigned int remaining);
 	unsigned int			nr_iov;
 	struct kvec			*iov;
+	unsigned int			nr_pages;
+	struct page			*pages[];
 };
 
 struct cifs_writedata;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 61b7c834069f..6eaf48270c97 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2419,13 +2419,13 @@ cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
 	if (!iov)
 		return (struct cifs_readdata *)iov;
 
-	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
+	rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
+			GFP_KERNEL);
 	if (rdata != NULL) {
 		kref_init(&rdata->refcount);
 		INIT_LIST_HEAD(&rdata->list);
 		init_completion(&rdata->done);
 		INIT_WORK(&rdata->work, complete);
-		INIT_LIST_HEAD(&rdata->pages);
 		rdata->iov = iov;
 	} else {
 		kfree(iov);
@@ -2448,25 +2448,25 @@ cifs_readdata_release(struct kref *refcount)
 }
 
 static int
-cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
+cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
 {
 	int rc = 0;
-	struct page *page, *tpage;
+	struct page *page;
 	unsigned int i;
 
-	for (i = 0; i < npages; i++) {
+	for (i = 0; i < nr_pages; i++) {
 		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
 		if (!page) {
 			rc = -ENOMEM;
 			break;
 		}
-		list_add(&page->lru, list);
+		rdata->pages[i] = page;
 	}
 
 	if (rc) {
-		list_for_each_entry_safe(page, tpage, list, lru) {
-			list_del(&page->lru);
-			put_page(page);
+		for (i = 0; i < nr_pages; i++) {
+			put_page(rdata->pages[i]);
+			rdata->pages[i] = NULL;
 		}
 	}
 	return rc;
@@ -2475,13 +2475,13 @@ cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
 static void
 cifs_uncached_readdata_release(struct kref *refcount)
 {
-	struct page *page, *tpage;
 	struct cifs_readdata *rdata = container_of(refcount,
 					struct cifs_readdata, refcount);
+	unsigned int i;
 
-	list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
-		list_del(&page->lru);
-		put_page(page);
+	for (i = 0; i < rdata->nr_pages; i++) {
+		put_page(rdata->pages[i]);
+		rdata->pages[i] = NULL;
 	}
 	cifs_readdata_release(refcount);
 }
@@ -2525,17 +2525,18 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
 	int rc = 0;
 	struct iov_iter ii;
 	size_t pos = rdata->offset - offset;
-	struct page *page, *tpage;
 	ssize_t remaining = rdata->bytes;
 	unsigned char *pdata;
+	unsigned int i;
 
 	/* set up iov_iter and advance to the correct offset */
 	iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
 	iov_iter_advance(&ii, pos);
 
 	*copied = 0;
-	list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+	for (i = 0; i < rdata->nr_pages; i++) {
 		ssize_t copy;
+		struct page *page = rdata->pages[i];
 
 		/* copy a whole page or whatever's left */
 		copy = min_t(ssize_t, remaining, PAGE_SIZE);
@@ -2555,9 +2556,6 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
 				iov_iter_advance(&ii, copy);
 			}
 		}
-
-		list_del(&page->lru);
-		put_page(page);
 	}
 
 	return rc;
@@ -2568,13 +2566,12 @@ cifs_uncached_readv_complete(struct work_struct *work)
 {
 	struct cifs_readdata *rdata = container_of(work,
 						struct cifs_readdata, work);
+	unsigned int i;
 
 	/* if the result is non-zero then the pages weren't kmapped */
 	if (rdata->result == 0) {
-		struct page *page;
-
-		list_for_each_entry(page, &rdata->pages, lru)
-			kunmap(page);
+		for (i = 0; i < rdata->nr_pages; i++)
+			kunmap(rdata->pages[i]);
 	}
 
 	complete(&rdata->done);
@@ -2586,10 +2583,13 @@ cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
 				unsigned int remaining)
 {
 	int len = 0;
-	struct page *page, *tpage;
+	unsigned int i;
+	unsigned int nr_pages = rdata->nr_pages;
 
 	rdata->nr_iov = 1;
-	list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = rdata->pages[i];
+
 		if (remaining >= PAGE_SIZE) {
 			/* enough data to fill the page */
 			rdata->iov[rdata->nr_iov].iov_base = kmap(page);
@@ -2616,7 +2616,8 @@ cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
 			remaining = 0;
 		} else {
 			/* no need to hold page hostage */
-			list_del(&page->lru);
+			rdata->pages[i] = NULL;
+			rdata->nr_pages--;
 			put_page(page);
 		}
 	}
@@ -2675,11 +2676,12 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
 			goto error;
 		}
 
-		rc = cifs_read_allocate_pages(&rdata->pages, npages);
+		rc = cifs_read_allocate_pages(rdata, npages);
 		if (rc)
 			goto error;
 
 		rdata->cfile = cifsFileInfo_get(open_file);
+		rdata->nr_pages = npages;
 		rdata->offset = offset;
 		rdata->bytes = cur_len;
 		rdata->pid = pid;
@@ -2923,12 +2925,13 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 static void
 cifs_readv_complete(struct work_struct *work)
 {
+	unsigned int i;
 	struct cifs_readdata *rdata = container_of(work,
 						struct cifs_readdata, work);
-	struct page *page, *tpage;
 
-	list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
-		list_del(&page->lru);
+	for (i = 0; i < rdata->nr_pages; i++) {
+		struct page *page = rdata->pages[i];
+
 		lru_cache_add_file(page);
 
 		if (rdata->result == 0) {
@@ -2943,6 +2946,7 @@ cifs_readv_complete(struct work_struct *work)
 			cifs_readpage_to_fscache(rdata->mapping->host, page);
 
 		page_cache_release(page);
+		rdata->pages[i] = NULL;
 	}
 	kref_put(&rdata->refcount, cifs_readdata_release);
 }
@@ -2951,9 +2955,10 @@ static int
 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
 {
 	int len = 0;
-	struct page *page, *tpage;
+	unsigned int i;
 	u64 eof;
 	pgoff_t eof_index;
+	unsigned int nr_pages = rdata->nr_pages;
 
 	/* determine the eof that the server (probably) has */
 	eof = CIFS_I(rdata->mapping->host)->server_eof;
@@ -2961,7 +2966,9 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
 	cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
 
 	rdata->nr_iov = 1;
-	list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = rdata->pages[i];
+
 		if (remaining >= PAGE_CACHE_SIZE) {
 			/* enough data to fill the page */
 			rdata->iov[rdata->nr_iov].iov_base = kmap(page);
@@ -2996,18 +3003,20 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
 			 * fill them until the writes are flushed.
 			 */
 			zero_user(page, 0, PAGE_CACHE_SIZE);
-			list_del(&page->lru);
 			lru_cache_add_file(page);
 			flush_dcache_page(page);
 			SetPageUptodate(page);
 			unlock_page(page);
 			page_cache_release(page);
+			rdata->pages[i] = NULL;
+			rdata->nr_pages--;
 		} else {
 			/* no need to hold page hostage */
-			list_del(&page->lru);
 			lru_cache_add_file(page);
 			unlock_page(page);
 			page_cache_release(page);
+			rdata->pages[i] = NULL;
+			rdata->nr_pages--;
 		}
 	}
 
@@ -3065,6 +3074,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 	 * the rdata->pages, then we want them in increasing order.
 	 */
 	while (!list_empty(page_list)) {
+		unsigned int i;
 		unsigned int bytes = PAGE_CACHE_SIZE;
 		unsigned int expected_index;
 		unsigned int nr_pages = 1;
@@ -3135,13 +3145,16 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		rdata->bytes = bytes;
 		rdata->pid = pid;
 		rdata->marshal_iov = cifs_readpages_marshal_iov;
-		list_splice_init(&tmplist, &rdata->pages);
+
+		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
+			list_del(&page->lru);
+			rdata->pages[rdata->nr_pages++] = page;
+		}
 
 		rc = cifs_retry_async_readv(rdata);
 		if (rc != 0) {
-			list_for_each_entry_safe(page, tpage, &rdata->pages,
-						 lru) {
-				list_del(&page->lru);
+			for (i = 0; i < rdata->nr_pages; i++) {
+				page = rdata->pages[i];
 				lru_cache_add_file(page);
 				unlock_page(page);
 				page_cache_release(page);
-- 
cgit 


From 8321fec436050b586cee448f2da0a6999e5172dd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 06:22:32 -0700
Subject: cifs: convert async read code to use pages array without kmapping

Replace the "marshal_iov" function with a "read_into_pages" function.
That function will copy the read data off the socket and into the
pages array, kmapping and reading pages one at a time.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/cifsglob.h |   7 +++-
 fs/cifs/cifssmb.c  |  27 +++++-------
 fs/cifs/file.c     | 121 ++++++++++++++++++++++++++---------------------------
 fs/cifs/smb2pdu.c  |   6 ++-
 4 files changed, 80 insertions(+), 81 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b70863ebedf2..93e16200b2e8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -978,8 +978,11 @@ struct cifs_readdata {
 	pid_t				pid;
 	int				result;
 	struct work_struct		work;
-	int (*marshal_iov) (struct cifs_readdata *rdata,
-			    unsigned int remaining);
+	int (*read_into_pages)(struct TCP_Server_Info *server,
+				struct cifs_readdata *rdata,
+				unsigned int len);
+	unsigned int			pagesz;
+	unsigned int			tailsz;
 	unsigned int			nr_iov;
 	struct kvec			*iov;
 	unsigned int			nr_pages;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a110e0784221..5d7bd757dcf1 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1496,6 +1496,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	/* set up first iov for signature check */
 	rdata->iov[0].iov_base = buf;
 	rdata->iov[0].iov_len = server->total_read;
+	rdata->nr_iov = 1;
 	cFYI(1, "0: iov_base=%p iov_len=%zu",
 		rdata->iov[0].iov_base, rdata->iov[0].iov_len);
 
@@ -1507,23 +1508,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		return cifs_readv_discard(server, mid);
 	}
 
-	/* marshal up the page array */
-	cifs_kmap_lock();
-	len = rdata->marshal_iov(rdata, data_len);
-	cifs_kmap_unlock();
-	data_len -= len;
-
-	/* issue the read if we have any iovecs left to fill */
-	if (rdata->nr_iov > 1) {
-		length = cifs_readv_from_socket(server, &rdata->iov[1],
-						rdata->nr_iov - 1, len);
-		if (length < 0)
-			return length;
-		server->total_read += length;
-	} else {
-		length = 0;
-	}
+	length = rdata->read_into_pages(server, rdata, data_len);
+	if (length < 0)
+		return length;
 
+	server->total_read += length;
 	rdata->bytes = length;
 
 	cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read,
@@ -1544,7 +1533,11 @@ cifs_readv_callback(struct mid_q_entry *mid)
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
 	struct smb_rqst rqst = { .rq_iov = rdata->iov,
-				 .rq_nvec = rdata->nr_iov };
+				 .rq_nvec = rdata->nr_iov,
+				 .rq_pages = rdata->pages,
+				 .rq_npages = rdata->nr_pages,
+				 .rq_pagesz = rdata->pagesz,
+				 .rq_tailsz = rdata->tailsz };
 
 	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
 		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6eaf48270c97..f3f1b1098a6c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2566,63 +2566,57 @@ cifs_uncached_readv_complete(struct work_struct *work)
 {
 	struct cifs_readdata *rdata = container_of(work,
 						struct cifs_readdata, work);
-	unsigned int i;
-
-	/* if the result is non-zero then the pages weren't kmapped */
-	if (rdata->result == 0) {
-		for (i = 0; i < rdata->nr_pages; i++)
-			kunmap(rdata->pages[i]);
-	}
 
 	complete(&rdata->done);
 	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
 }
 
 static int
-cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
-				unsigned int remaining)
+cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
+			struct cifs_readdata *rdata, unsigned int len)
 {
-	int len = 0;
+	int total_read = 0, result = 0;
 	unsigned int i;
 	unsigned int nr_pages = rdata->nr_pages;
+	struct kvec iov;
 
-	rdata->nr_iov = 1;
+	rdata->tailsz = PAGE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page = rdata->pages[i];
 
-		if (remaining >= PAGE_SIZE) {
+		if (len >= PAGE_SIZE) {
 			/* enough data to fill the page */
-			rdata->iov[rdata->nr_iov].iov_base = kmap(page);
-			rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
-			cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
-				rdata->nr_iov, page->index,
-				rdata->iov[rdata->nr_iov].iov_base,
-				rdata->iov[rdata->nr_iov].iov_len);
-			++rdata->nr_iov;
-			len += PAGE_SIZE;
-			remaining -= PAGE_SIZE;
-		} else if (remaining > 0) {
+			iov.iov_base = kmap(page);
+			iov.iov_len = PAGE_SIZE;
+			cFYI(1, "%u: iov_base=%p iov_len=%zu",
+				i, iov.iov_base, iov.iov_len);
+			len -= PAGE_SIZE;
+		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
-			rdata->iov[rdata->nr_iov].iov_base = kmap(page);
-			rdata->iov[rdata->nr_iov].iov_len = remaining;
-			cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
-				rdata->nr_iov, page->index,
-				rdata->iov[rdata->nr_iov].iov_base,
-				rdata->iov[rdata->nr_iov].iov_len);
-			memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
-				'\0', PAGE_SIZE - remaining);
-			++rdata->nr_iov;
-			len += remaining;
-			remaining = 0;
+			iov.iov_base = kmap(page);
+			iov.iov_len = len;
+			cFYI(1, "%u: iov_base=%p iov_len=%zu",
+				i, iov.iov_base, iov.iov_len);
+			memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
+			rdata->tailsz = len;
+			len = 0;
 		} else {
 			/* no need to hold page hostage */
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
 			put_page(page);
+			continue;
 		}
+
+		result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
+		kunmap(page);
+		if (result < 0)
+			break;
+
+		total_read += result;
 	}
 
-	return len;
+	return total_read > 0 ? total_read : result;
 }
 
 static ssize_t
@@ -2685,7 +2679,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
 		rdata->offset = offset;
 		rdata->bytes = cur_len;
 		rdata->pid = pid;
-		rdata->marshal_iov = cifs_uncached_read_marshal_iov;
+		rdata->pagesz = PAGE_SIZE;
+		rdata->read_into_pages = cifs_uncached_read_into_pages;
 
 		rc = cifs_retry_async_readv(rdata);
 error:
@@ -2935,7 +2930,6 @@ cifs_readv_complete(struct work_struct *work)
 		lru_cache_add_file(page);
 
 		if (rdata->result == 0) {
-			kunmap(page);
 			flush_dcache_page(page);
 			SetPageUptodate(page);
 		}
@@ -2952,47 +2946,42 @@ cifs_readv_complete(struct work_struct *work)
 }
 
 static int
-cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
+cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
+			struct cifs_readdata *rdata, unsigned int len)
 {
-	int len = 0;
+	int total_read = 0, result = 0;
 	unsigned int i;
 	u64 eof;
 	pgoff_t eof_index;
 	unsigned int nr_pages = rdata->nr_pages;
+	struct kvec iov;
 
 	/* determine the eof that the server (probably) has */
 	eof = CIFS_I(rdata->mapping->host)->server_eof;
 	eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
 	cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
 
-	rdata->nr_iov = 1;
+	rdata->tailsz = PAGE_CACHE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page = rdata->pages[i];
 
-		if (remaining >= PAGE_CACHE_SIZE) {
+		if (len >= PAGE_CACHE_SIZE) {
 			/* enough data to fill the page */
-			rdata->iov[rdata->nr_iov].iov_base = kmap(page);
-			rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
+			iov.iov_base = kmap(page);
+			iov.iov_len = PAGE_CACHE_SIZE;
 			cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
-				rdata->nr_iov, page->index,
-				rdata->iov[rdata->nr_iov].iov_base,
-				rdata->iov[rdata->nr_iov].iov_len);
-			++rdata->nr_iov;
-			len += PAGE_CACHE_SIZE;
-			remaining -= PAGE_CACHE_SIZE;
-		} else if (remaining > 0) {
+				i, page->index, iov.iov_base, iov.iov_len);
+			len -= PAGE_CACHE_SIZE;
+		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
-			rdata->iov[rdata->nr_iov].iov_base = kmap(page);
-			rdata->iov[rdata->nr_iov].iov_len = remaining;
+			iov.iov_base = kmap(page);
+			iov.iov_len = len;
 			cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
-				rdata->nr_iov, page->index,
-				rdata->iov[rdata->nr_iov].iov_base,
-				rdata->iov[rdata->nr_iov].iov_len);
-			memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
-				'\0', PAGE_CACHE_SIZE - remaining);
-			++rdata->nr_iov;
-			len += remaining;
-			remaining = 0;
+				i, page->index, iov.iov_base, iov.iov_len);
+			memset(iov.iov_base + len,
+				'\0', PAGE_CACHE_SIZE - len);
+			rdata->tailsz = len;
+			len = 0;
 		} else if (page->index > eof_index) {
 			/*
 			 * The VFS will not try to do readahead past the
@@ -3010,6 +2999,7 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
 			page_cache_release(page);
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
+			continue;
 		} else {
 			/* no need to hold page hostage */
 			lru_cache_add_file(page);
@@ -3017,10 +3007,18 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
 			page_cache_release(page);
 			rdata->pages[i] = NULL;
 			rdata->nr_pages--;
+			continue;
 		}
+
+		result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
+		kunmap(page);
+		if (result < 0)
+			break;
+
+		total_read += result;
 	}
 
-	return len;
+	return total_read > 0 ? total_read : result;
 }
 
 static int cifs_readpages(struct file *file, struct address_space *mapping,
@@ -3144,7 +3142,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		rdata->offset = offset;
 		rdata->bytes = bytes;
 		rdata->pid = pid;
-		rdata->marshal_iov = cifs_readpages_marshal_iov;
+		rdata->pagesz = PAGE_CACHE_SIZE;
+		rdata->read_into_pages = cifs_readpages_read_into_pages;
 
 		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
 			list_del(&page->lru);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 68023d23702b..e3efa47cd6ec 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1300,7 +1300,11 @@ smb2_readv_callback(struct mid_q_entry *mid)
 	struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base;
 	unsigned int credits_received = 1;
 	struct smb_rqst rqst = { .rq_iov = rdata->iov,
-				 .rq_nvec = rdata->nr_iov };
+				 .rq_nvec = 1,
+				 .rq_pages = rdata->pages,
+				 .rq_npages = rdata->nr_pages,
+				 .rq_pagesz = rdata->pagesz,
+				 .rq_tailsz = rdata->tailsz };
 
 	cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
 		mid->mid, mid->mid_state, rdata->result, rdata->bytes);
-- 
cgit 


From 5819575ec6b82345e1a21a960d381c699a91c700 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 06:22:34 -0700
Subject: cifs: replace kvec array in readdata with a single kvec

The array is no longer needed. We just need a single kvec to hold the
header for signature checking.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/cifsglob.h |  3 +--
 fs/cifs/cifssmb.c  | 29 ++++++++++++++---------------
 fs/cifs/file.c     |  9 ---------
 fs/cifs/smb2pdu.c  | 12 ++++++------
 4 files changed, 21 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 93e16200b2e8..79e8b6f06021 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -981,10 +981,9 @@ struct cifs_readdata {
 	int (*read_into_pages)(struct TCP_Server_Info *server,
 				struct cifs_readdata *rdata,
 				unsigned int len);
+	struct kvec			iov;
 	unsigned int			pagesz;
 	unsigned int			tailsz;
-	unsigned int			nr_iov;
-	struct kvec			*iov;
 	unsigned int			nr_pages;
 	struct page			*pages[];
 };
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5d7bd757dcf1..88bbb3ef95b3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1436,10 +1436,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
 							HEADER_SIZE(server) + 1;
 
-	rdata->iov[0].iov_base = buf + HEADER_SIZE(server) - 1;
-	rdata->iov[0].iov_len = len;
+	rdata->iov.iov_base = buf + HEADER_SIZE(server) - 1;
+	rdata->iov.iov_len = len;
 
-	length = cifs_readv_from_socket(server, rdata->iov, 1, len);
+	length = cifs_readv_from_socket(server, &rdata->iov, 1, len);
 	if (length < 0)
 		return length;
 	server->total_read += length;
@@ -1485,20 +1485,19 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	len = data_offset - server->total_read;
 	if (len > 0) {
 		/* read any junk before data into the rest of smallbuf */
-		rdata->iov[0].iov_base = buf + server->total_read;
-		rdata->iov[0].iov_len = len;
-		length = cifs_readv_from_socket(server, rdata->iov, 1, len);
+		rdata->iov.iov_base = buf + server->total_read;
+		rdata->iov.iov_len = len;
+		length = cifs_readv_from_socket(server, &rdata->iov, 1, len);
 		if (length < 0)
 			return length;
 		server->total_read += length;
 	}
 
 	/* set up first iov for signature check */
-	rdata->iov[0].iov_base = buf;
-	rdata->iov[0].iov_len = server->total_read;
-	rdata->nr_iov = 1;
+	rdata->iov.iov_base = buf;
+	rdata->iov.iov_len = server->total_read;
 	cFYI(1, "0: iov_base=%p iov_len=%zu",
-		rdata->iov[0].iov_base, rdata->iov[0].iov_len);
+		rdata->iov.iov_base, rdata->iov.iov_len);
 
 	/* how much data is in the response? */
 	data_len = server->ops->read_data_length(buf);
@@ -1532,8 +1531,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
 	struct cifs_readdata *rdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
-	struct smb_rqst rqst = { .rq_iov = rdata->iov,
-				 .rq_nvec = rdata->nr_iov,
+	struct smb_rqst rqst = { .rq_iov = &rdata->iov,
+				 .rq_nvec = 1,
 				 .rq_pages = rdata->pages,
 				 .rq_npages = rdata->nr_pages,
 				 .rq_pagesz = rdata->pagesz,
@@ -1580,7 +1579,7 @@ cifs_async_readv(struct cifs_readdata *rdata)
 	READ_REQ *smb = NULL;
 	int wct;
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
-	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+	struct smb_rqst rqst = { .rq_iov = &rdata->iov,
 				 .rq_nvec = 1 };
 
 	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
@@ -1621,8 +1620,8 @@ cifs_async_readv(struct cifs_readdata *rdata)
 	}
 
 	/* 4 for RFC1001 length + 1 for BCC */
-	rdata->iov[0].iov_base = smb;
-	rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
+	rdata->iov.iov_base = smb;
+	rdata->iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
 
 	kref_get(&rdata->refcount);
 	rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f3f1b1098a6c..2421ec28df14 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2413,11 +2413,6 @@ static struct cifs_readdata *
 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
 {
 	struct cifs_readdata *rdata;
-	struct kvec *iov;
-
-	iov = kzalloc(sizeof(*iov) * (nr_pages + 1), GFP_KERNEL);
-	if (!iov)
-		return (struct cifs_readdata *)iov;
 
 	rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
 			GFP_KERNEL);
@@ -2426,9 +2421,6 @@ cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
 		INIT_LIST_HEAD(&rdata->list);
 		init_completion(&rdata->done);
 		INIT_WORK(&rdata->work, complete);
-		rdata->iov = iov;
-	} else {
-		kfree(iov);
 	}
 
 	return rdata;
@@ -2443,7 +2435,6 @@ cifs_readdata_release(struct kref *refcount)
 	if (rdata->cfile)
 		cifsFileInfo_put(rdata->cfile);
 
-	kfree(rdata->iov);
 	kfree(rdata);
 }
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e3efa47cd6ec..1b447612200e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1297,9 +1297,9 @@ smb2_readv_callback(struct mid_q_entry *mid)
 	struct cifs_readdata *rdata = mid->callback_data;
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
-	struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov[0].iov_base;
+	struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov.iov_base;
 	unsigned int credits_received = 1;
-	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+	struct smb_rqst rqst = { .rq_iov = &rdata->iov,
 				 .rq_nvec = 1,
 				 .rq_pages = rdata->pages,
 				 .rq_npages = rdata->nr_pages,
@@ -1350,7 +1350,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	int rc;
 	struct smb2_hdr *buf;
 	struct cifs_io_parms io_parms;
-	struct smb_rqst rqst = { .rq_iov = rdata->iov,
+	struct smb_rqst rqst = { .rq_iov = &rdata->iov,
 				 .rq_nvec = 1 };
 
 	cFYI(1, "%s: offset=%llu bytes=%u", __func__,
@@ -1362,13 +1362,13 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	io_parms.persistent_fid = rdata->cfile->fid.persistent_fid;
 	io_parms.volatile_fid = rdata->cfile->fid.volatile_fid;
 	io_parms.pid = rdata->pid;
-	rc = smb2_new_read_req(&rdata->iov[0], &io_parms, 0, 0);
+	rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0);
 	if (rc)
 		return rc;
 
-	buf = (struct smb2_hdr *)rdata->iov[0].iov_base;
+	buf = (struct smb2_hdr *)rdata->iov.iov_base;
 	/* 4 for rfc1002 length field */
-	rdata->iov[0].iov_len = get_rfc1002_length(rdata->iov[0].iov_base) + 4;
+	rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4;
 
 	kref_get(&rdata->refcount);
 	rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
-- 
cgit 


From 71953fc6e4ce5ac05b594d8e5866accf531aa969 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 06:22:42 -0700
Subject: cifs: remove kmap lock and rsize limit

Now that we aren't abusing the kmap address space, there's no need for
this lock or to impose a limit on the rsize.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/cifsfs.c   |  4 ----
 fs/cifs/cifsglob.h | 39 ---------------------------------------
 fs/cifs/smb1ops.c  |  3 ---
 fs/cifs/smb2ops.c  |  3 ---
 4 files changed, 49 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4dda4890d776..3a3e2fee0b3e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -88,10 +88,6 @@ extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
 
-#ifdef CONFIG_HIGHMEM
-DEFINE_MUTEX(cifs_kmap_mutex);
-#endif
-
 static int
 cifs_read_super(struct super_block *sb)
 {
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 79e8b6f06021..b2bb941d8ddd 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -637,45 +637,6 @@ get_next_mid(struct TCP_Server_Info *server)
 #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
 #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
 
-/*
- * On hosts with high memory, we can't currently support wsize/rsize that are
- * larger than we can kmap at once. Cap the rsize/wsize at
- * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
- * larger than that anyway.
- */
-#ifdef CONFIG_HIGHMEM
-#define CIFS_KMAP_SIZE_LIMIT   (LAST_PKMAP * PAGE_CACHE_SIZE)
-#else /* CONFIG_HIGHMEM */
-#define CIFS_KMAP_SIZE_LIMIT   (1<<24)
-#endif /* CONFIG_HIGHMEM */
-
-#ifdef CONFIG_HIGHMEM
-/*
- * On arches that have high memory, kmap address space is limited. By
- * serializing the kmap operations on those arches, we ensure that we don't
- * end up with a bunch of threads in writeback with partially mapped page
- * arrays, stuck waiting for kmap to come back. That situation prevents
- * progress and can deadlock.
- */
-
-extern struct mutex cifs_kmap_mutex;
-
-static inline void
-cifs_kmap_lock(void)
-{
-	mutex_lock(&cifs_kmap_mutex);
-}
-
-static inline void
-cifs_kmap_unlock(void)
-{
-	mutex_unlock(&cifs_kmap_mutex);
-}
-#else /* !CONFIG_HIGHMEM */
-#define cifs_kmap_lock() do { ; } while (0)
-#define cifs_kmap_unlock() do { ; } while (0)
-#endif /* CONFIG_HIGHMEM */
-
 /*
  * Macros to allow the TCP_Server_Info->net field and related code to drop out
  * when CONFIG_NET_NS isn't set.
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 8727ef712a3c..ed7f95532383 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -483,9 +483,6 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 	if (!(server->capabilities & CAP_LARGE_READ_X))
 		rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
 
-	/* limit to the amount that we can kmap at once */
-	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
-
 	/* hard limit of CIFS_MAX_RSIZE */
 	rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a621d6125367..b1dedf8cb372 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -205,9 +205,6 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 	 */
 	rsize = min_t(unsigned int, rsize, 2 << 15);
 
-	/* limit to the amount that we can kmap at once */
-	rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
-
 	return rsize;
 }
 
-- 
cgit 


From 1c0bd60b560cdf63a263f8ff3cebe9f99fe7a47c Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastryyy@gmail.com>
Date: Wed, 19 Sep 2012 06:22:43 -0700
Subject: CIFS: Add NTLMSSP sec type to defaults

to let us negotiate SMB2 without specifying sec type explicitly.

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
---
 fs/cifs/cifsglob.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b2bb941d8ddd..004672f9e16c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1328,7 +1328,7 @@ require use of the stronger protocol */
 #define   CIFSSEC_MUST_SEAL	0x40040 /* not supported yet */
 #define   CIFSSEC_MUST_NTLMSSP	0x80080 /* raw ntlmssp with ntlmv2 */
 
-#define   CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
+#define   CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
 #define   CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
 #define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
 /*
-- 
cgit 


From f45d34167c67b083b54690e349e77f59062ef0ea Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Wed, 19 Sep 2012 06:22:43 -0700
Subject: CIFS: Remove spinlock dependence in brlock processing

Now we need to lock/unlock a spinlock while processing brlock ops
on the inode. Move brlocks of a fid to a separate list and attach
all such lists to the inode. This let us not hold a spinlock.

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
---
 fs/cifs/cifsfs.c   |  1 +
 fs/cifs/cifsglob.h | 18 +++++++-------
 fs/cifs/file.c     | 73 ++++++++++++++++++++++++++++++++----------------------
 3 files changed, 53 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3a3e2fee0b3e..e958d9438505 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -233,6 +233,7 @@ cifs_alloc_inode(struct super_block *sb)
 	   to zero by the VFS */
 /*	cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/
 	INIT_LIST_HEAD(&cifs_inode->openFileList);
+	INIT_LIST_HEAD(&cifs_inode->llist);
 	return &cifs_inode->vfs_inode;
 }
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 004672f9e16c..b2eb577b5f3f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -890,13 +890,16 @@ struct cifs_fid {
 #endif
 };
 
+struct cifs_fid_locks {
+	struct list_head llist;
+	struct cifsFileInfo *cfile;	/* fid that owns locks */
+	struct list_head locks;		/* locks held by fid above */
+};
+
 struct cifsFileInfo {
 	struct list_head tlist;	/* pointer to next fid owned by tcon */
 	struct list_head flist;	/* next fid (file instance) for this inode */
-	struct list_head llist;	/*
-				 * brlocks held by this fid, protected by
-				 * lock_mutex from cifsInodeInfo structure
-				 */
+	struct cifs_fid_locks *llist;	/* brlocks held by this fid */
 	unsigned int uid;	/* allows finding which FileInfo structure */
 	__u32 pid;		/* process id who opened file */
 	struct cifs_fid fid;	/* file id from remote */
@@ -988,11 +991,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
 
 struct cifsInodeInfo {
 	bool can_cache_brlcks;
-	struct mutex lock_mutex;	/*
-					 * protect the field above and llist
-					 * from every cifsFileInfo structure
-					 * from openFileList
-					 */
+	struct list_head llist;	/* locks helb by this inode */
+	struct mutex lock_mutex;	/* protect the fields above */
 	/* BB add in lists for dirty pages i.e. write caching info for oplock */
 	struct list_head openFileList;
 	__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2421ec28df14..63963730cb28 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -245,11 +245,25 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	struct inode *inode = dentry->d_inode;
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
 	struct cifsFileInfo *cfile;
+	struct cifs_fid_locks *fdlocks;
 
 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 	if (cfile == NULL)
 		return cfile;
 
+	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
+	if (!fdlocks) {
+		kfree(cfile);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&fdlocks->locks);
+	fdlocks->cfile = cfile;
+	cfile->llist = fdlocks;
+	mutex_lock(&cinode->lock_mutex);
+	list_add(&fdlocks->llist, &cinode->llist);
+	mutex_unlock(&cinode->lock_mutex);
+
 	cfile->count = 1;
 	cfile->pid = current->tgid;
 	cfile->uid = current_fsuid();
@@ -257,9 +271,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
 	cfile->tlink = cifs_get_tlink(tlink);
-	mutex_init(&cfile->fh_mutex);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
-	INIT_LIST_HEAD(&cfile->llist);
+	mutex_init(&cfile->fh_mutex);
 	tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
 
 	spin_lock(&cifs_file_list_lock);
@@ -336,15 +349,18 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 		free_xid(xid);
 	}
 
-	/* Delete any outstanding lock records. We'll lose them when the file
+	/*
+	 * Delete any outstanding lock records. We'll lose them when the file
 	 * is closed anyway.
 	 */
 	mutex_lock(&cifsi->lock_mutex);
-	list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
+	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 		list_del(&li->llist);
 		cifs_del_lock_waiters(li);
 		kfree(li);
 	}
+	list_del(&cifs_file->llist->llist);
+	kfree(cifs_file->llist);
 	mutex_unlock(&cifsi->lock_mutex);
 
 	cifs_put_tlink(cifs_file->tlink);
@@ -691,25 +707,24 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
 }
 
 static bool
-cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
-			    __u64 length, __u8 type, struct cifsFileInfo *cur,
+cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
+			    __u64 length, __u8 type, struct cifsFileInfo *cfile,
 			    struct cifsLockInfo **conf_lock)
 {
 	struct cifsLockInfo *li;
+	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 
-	list_for_each_entry(li, &cfile->llist, llist) {
+	list_for_each_entry(li, &fdlocks->locks, llist) {
 		if (offset + length <= li->offset ||
 		    offset >= li->offset + li->length)
 			continue;
-		else if ((type & server->vals->shared_lock_type) &&
-			 ((server->ops->compare_fids(cur, cfile) &&
-			   current->tgid == li->pid) || type == li->type))
+		if ((type & server->vals->shared_lock_type) &&
+		    ((server->ops->compare_fids(cfile, cur_cfile) &&
+		     current->tgid == li->pid) || type == li->type))
 			continue;
-		else {
-			*conf_lock = li;
-			return true;
-		}
+		*conf_lock = li;
+		return true;
 	}
 	return false;
 }
@@ -719,17 +734,15 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 			__u8 type, struct cifsLockInfo **conf_lock)
 {
 	bool rc = false;
-	struct cifsFileInfo *fid, *tmp;
+	struct cifs_fid_locks *cur;
 	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 
-	spin_lock(&cifs_file_list_lock);
-	list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
-		rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
+	list_for_each_entry(cur, &cinode->llist, llist) {
+		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 						 cfile, conf_lock);
 		if (rc)
 			break;
 	}
-	spin_unlock(&cifs_file_list_lock);
 
 	return rc;
 }
@@ -777,7 +790,7 @@ cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 {
 	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 	mutex_lock(&cinode->lock_mutex);
-	list_add_tail(&lock->llist, &cfile->llist);
+	list_add_tail(&lock->llist, &cfile->llist->locks);
 	mutex_unlock(&cinode->lock_mutex);
 }
 
@@ -803,7 +816,7 @@ try_again:
 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 					lock->type, &conf_lock);
 	if (!exist && cinode->can_cache_brlcks) {
-		list_add_tail(&lock->llist, &cfile->llist);
+		list_add_tail(&lock->llist, &cfile->llist->locks);
 		mutex_unlock(&cinode->lock_mutex);
 		return rc;
 	}
@@ -937,7 +950,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	for (i = 0; i < 2; i++) {
 		cur = buf;
 		num = 0;
-		list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
+		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
 			if (li->type != types[i])
 				continue;
 			cur->Pid = cpu_to_le16(li->pid);
@@ -1279,7 +1292,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 	for (i = 0; i < 2; i++) {
 		cur = buf;
 		num = 0;
-		list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
+		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
 			if (flock->fl_start > li->offset ||
 			    (flock->fl_start + length) <
 			    (li->offset + li->length))
@@ -1320,7 +1333,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 					 * list to the head of the file's list.
 					 */
 					cifs_move_llist(&tmp_llist,
-							&cfile->llist);
+							&cfile->llist->locks);
 					rc = stored_rc;
 				} else
 					/*
@@ -1337,7 +1350,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
 					       types[i], num, 0, buf);
 			if (stored_rc) {
-				cifs_move_llist(&tmp_llist, &cfile->llist);
+				cifs_move_llist(&tmp_llist,
+						&cfile->llist->locks);
 				rc = stored_rc;
 			} else
 				cifs_free_llist(&tmp_llist);
@@ -1350,7 +1364,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 }
 
 static int
-cifs_setlk(struct file *file,  struct file_lock *flock, __u32 type,
+cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
 	   bool wait_flag, bool posix_lck, int lock, int unlock,
 	   unsigned int xid)
 {
@@ -1359,7 +1373,6 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u32 type,
 	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
-	__u16 netfid = cfile->fid.netfid;
 
 	if (posix_lck) {
 		int posix_lock_type;
@@ -1376,9 +1389,9 @@ cifs_setlk(struct file *file,  struct file_lock *flock, __u32 type,
 		if (unlock == 1)
 			posix_lock_type = CIFS_UNLCK;
 
-		rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
-				      flock->fl_start, length, NULL,
-				      posix_lock_type, wait_flag);
+		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
+				      current->tgid, flock->fl_start, length,
+				      NULL, posix_lock_type, wait_flag);
 		goto out;
 	}
 
-- 
cgit 


From d39a4f710b7a7be05b6ed9d4ab8fac754c139f8a Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Wed, 19 Sep 2012 06:22:43 -0700
Subject: CIFS: Move brlock code to ops struct

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
---
 fs/cifs/cifsglob.h  |  8 ++++++++
 fs/cifs/cifsproto.h |  3 +++
 fs/cifs/file.c      | 42 +++++++++++++++++-------------------------
 fs/cifs/smb1ops.c   | 12 ++++++++++++
 4 files changed, 40 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b2eb577b5f3f..d3c72713fec4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -353,6 +353,14 @@ struct smb_version_operations {
 	/* query remote filesystem */
 	int (*queryfs)(const unsigned int, struct cifs_tcon *,
 		       struct kstatfs *);
+	/* send mandatory brlock to the server */
+	int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64,
+			 __u64, __u32, int, int, bool);
+	/* unlock range of mandatory locks */
+	int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *,
+				 const unsigned int);
+	/* push brlocks from the cache to the server */
+	int (*push_mand_locks)(struct cifsFileInfo *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 3b628f2af258..a7e238f88898 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -124,6 +124,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
 extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
 				      int offset);
 extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
+extern int cifs_unlock_range(struct cifsFileInfo *cfile,
+			     struct file_lock *flock, const unsigned int xid);
+extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
 
 extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
 					      struct file *file,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 63963730cb28..90ab83647b82 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -903,7 +903,7 @@ try_again:
 	return rc;
 }
 
-static int
+int
 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 {
 	unsigned int xid;
@@ -1111,7 +1111,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
 	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 		return cifs_push_posix_locks(cfile);
 
-	return cifs_push_mandatory_locks(cfile);
+	return tcon->ses->server->ops->push_mand_locks(cfile);
 }
 
 static void
@@ -1161,15 +1161,6 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
 		cFYI(1, "Unknown type of lock");
 }
 
-static int
-cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
-		    __u64 length, __u32 type, int lock, int unlock, bool wait)
-{
-	return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
-			   current->tgid, length, offset, unlock, lock,
-			   (__u8)type, wait, 0);
-}
-
 static int
 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 	   bool wait_flag, bool posix_lck, unsigned int xid)
@@ -1203,11 +1194,11 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 		return rc;
 
 	/* BB we could chain these into one lock request BB */
-	rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
-				 1, 0, false);
+	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
+				    1, 0, false);
 	if (rc == 0) {
-		rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
-					 type, 0, 1, false);
+		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+					    type, 0, 1, false);
 		flock->fl_type = F_UNLCK;
 		if (rc != 0)
 			cERROR(1, "Error unlocking previously locked "
@@ -1220,13 +1211,14 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 		return 0;
 	}
 
-	rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
-				 type | server->vals->shared_lock_type, 1, 0,
-				 false);
+	type &= ~server->vals->exclusive_lock_type;
+
+	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+				    type | server->vals->shared_lock_type,
+				    1, 0, false);
 	if (rc == 0) {
-		rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
-					 type | server->vals->shared_lock_type,
-					 0, 1, false);
+		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+			type | server->vals->shared_lock_type, 0, 1, false);
 		flock->fl_type = F_RDLCK;
 		if (rc != 0)
 			cERROR(1, "Error unlocking previously locked "
@@ -1256,7 +1248,7 @@ cifs_free_llist(struct list_head *llist)
 	}
 }
 
-static int
+int
 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		  unsigned int xid)
 {
@@ -1408,8 +1400,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
 		if (rc <= 0)
 			goto out;
 
-		rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
-					 type, 1, 0, wait_flag);
+		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+					    type, 1, 0, wait_flag);
 		if (rc) {
 			kfree(lock);
 			goto out;
@@ -1417,7 +1409,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
 
 		cifs_lock_add(cfile, lock);
 	} else if (unlock)
-		rc = cifs_unlock_range(cfile, flock, xid);
+		rc = server->ops->mand_unlock_range(cfile, flock, xid);
 
 out:
 	if (flock->fl_flags & FL_POSIX)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index ed7f95532383..5fb0fe5f72b6 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -899,6 +899,15 @@ cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
+static int
+cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
+	       __u64 length, __u32 type, int lock, int unlock, bool wait)
+{
+	return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
+			   current->tgid, length, offset, unlock, lock,
+			   (__u8)type, wait, 0);
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -960,6 +969,9 @@ struct smb_version_operations smb1_operations = {
 	.calc_smb_size = smbCalcSize,
 	.oplock_response = cifs_oplock_response,
 	.queryfs = cifs_queryfs,
+	.mand_lock = cifs_mand_lock,
+	.mand_unlock_range = cifs_unlock_range,
+	.push_mand_locks = cifs_push_mandatory_locks,
 };
 
 struct smb_version_values smb1_values = {
-- 
cgit 


From 027e8eec31d8141a6231f772e10ccae60c9d5c13 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:43 -0700
Subject: CIFS: Handle SMB2 lock flags

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
---
 fs/cifs/smb2ops.c | 12 ++++++++++++
 fs/cifs/smb2pdu.h |  5 +++++
 2 files changed, 17 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b1dedf8cb372..e4a59d1f06b1 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -537,7 +537,15 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
+static bool
+smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
+{
+	return ob1->fid.persistent_fid == ob2->fid.persistent_fid &&
+	       ob1->fid.volatile_fid == ob2->fid.volatile_fid;
+}
+
 struct smb_version_operations smb21_operations = {
+	.compare_fids = smb2_compare_fids,
 	.setup_request = smb2_setup_request,
 	.setup_async_request = smb2_setup_async_request,
 	.check_receive = smb2_check_receive,
@@ -598,6 +606,10 @@ struct smb_version_operations smb21_operations = {
 
 struct smb_version_values smb21_values = {
 	.version_string = SMB21_VERSION_STRING,
+	.large_lock_type = 0,
+	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 3c8e99ea88ad..d2d132e94155 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -526,6 +526,11 @@ struct smb2_write_rsp {
 	__u8   Buffer[1];
 } __packed;
 
+#define SMB2_LOCKFLAG_SHARED_LOCK	0x0001
+#define SMB2_LOCKFLAG_EXCLUSIVE_LOCK	0x0002
+#define SMB2_LOCKFLAG_UNLOCK		0x0004
+#define SMB2_LOCKFLAG_FAIL_IMMEDIATELY	0x0010
+
 struct smb2_echo_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;	/* Must be 4 */
-- 
cgit 


From f7ba7fe685bc3ed8fd0687870e68b2567d17357f Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:43 -0700
Subject: CIFS: Add brlock support for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@etersoft.ru>
---
 fs/cifs/cifsproto.h |  4 +++
 fs/cifs/file.c      |  8 ++---
 fs/cifs/smb2file.c  | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2ops.c   | 13 +++++++
 fs/cifs/smb2pdu.c   | 59 ++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h   | 24 +++++++++++++
 fs/cifs/smb2proto.h | 10 ++++++
 7 files changed, 210 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index a7e238f88898..15e38dc389fc 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -190,6 +190,10 @@ extern void cifs_dfs_release_automount_timer(void);
 void cifs_proc_init(void);
 void cifs_proc_clean(void);
 
+extern void cifs_move_llist(struct list_head *source, struct list_head *dest);
+extern void cifs_free_llist(struct list_head *llist);
+extern void cifs_del_lock_waiters(struct cifsLockInfo *lock);
+
 extern int cifs_negotiate_protocol(const unsigned int xid,
 				   struct cifs_ses *ses);
 extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 90ab83647b82..e2a8e4456275 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -288,8 +288,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	return cfile;
 }
 
-static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
-
 struct cifsFileInfo *
 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 {
@@ -696,7 +694,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 	return lock;
 }
 
-static void
+void
 cifs_del_lock_waiters(struct cifsLockInfo *lock)
 {
 	struct cifsLockInfo *li, *tmp;
@@ -1229,7 +1227,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 	return 0;
 }
 
-static void
+void
 cifs_move_llist(struct list_head *source, struct list_head *dest)
 {
 	struct list_head *li, *tmp;
@@ -1237,7 +1235,7 @@ cifs_move_llist(struct list_head *source, struct list_head *dest)
 		list_move(li, dest);
 }
 
-static void
+void
 cifs_free_llist(struct list_head *llist)
 {
 	struct cifsLockInfo *li, *tmp;
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 5ff25e025215..a25ea02149e7 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -104,3 +104,100 @@ out:
 	kfree(smb2_path);
 	return rc;
 }
+
+int
+smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
+		  const unsigned int xid)
+{
+	int rc = 0, stored_rc;
+	unsigned int max_num, num = 0, max_buf;
+	struct smb2_lock_element *buf, *cur;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	struct cifsLockInfo *li, *tmp;
+	__u64 length = 1 + flock->fl_end - flock->fl_start;
+	struct list_head tmp_llist;
+
+	INIT_LIST_HEAD(&tmp_llist);
+
+	/*
+	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
+	 * and check it for zero before using.
+	 */
+	max_buf = tcon->ses->server->maxBuf;
+	if (!max_buf)
+		return -EINVAL;
+
+	max_num = max_buf / sizeof(struct smb2_lock_element);
+	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	cur = buf;
+
+	mutex_lock(&cinode->lock_mutex);
+	list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
+		if (flock->fl_start > li->offset ||
+		    (flock->fl_start + length) <
+		    (li->offset + li->length))
+			continue;
+		if (current->tgid != li->pid)
+			continue;
+		if (cinode->can_cache_brlcks) {
+			/*
+			 * We can cache brlock requests - simply remove a lock
+			 * from the file's list.
+			 */
+			list_del(&li->llist);
+			cifs_del_lock_waiters(li);
+			kfree(li);
+			continue;
+		}
+		cur->Length = cpu_to_le64(li->length);
+		cur->Offset = cpu_to_le64(li->offset);
+		cur->Flags = cpu_to_le32(SMB2_LOCKFLAG_UNLOCK);
+		/*
+		 * We need to save a lock here to let us add it again to the
+		 * file's list if the unlock range request fails on the server.
+		 */
+		list_move(&li->llist, &tmp_llist);
+		if (++num == max_num) {
+			stored_rc = smb2_lockv(xid, tcon,
+					       cfile->fid.persistent_fid,
+					       cfile->fid.volatile_fid,
+					       current->tgid, num, buf);
+			if (stored_rc) {
+				/*
+				 * We failed on the unlock range request - add
+				 * all locks from the tmp list to the head of
+				 * the file's list.
+				 */
+				cifs_move_llist(&tmp_llist,
+						&cfile->llist->locks);
+				rc = stored_rc;
+			} else
+				/*
+				 * The unlock range request succeed - free the
+				 * tmp list.
+				 */
+				cifs_free_llist(&tmp_llist);
+			cur = buf;
+			num = 0;
+		} else
+			cur++;
+	}
+	if (num) {
+		stored_rc = smb2_lockv(xid, tcon, cfile->fid.persistent_fid,
+				       cfile->fid.volatile_fid, current->tgid,
+				       num, buf);
+		if (stored_rc) {
+			cifs_move_llist(&tmp_llist, &cfile->llist->locks);
+			rc = stored_rc;
+		} else
+			cifs_free_llist(&tmp_llist);
+	}
+	mutex_unlock(&cinode->lock_mutex);
+
+	kfree(buf);
+	return rc;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e4a59d1f06b1..caed2c57896d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -544,6 +544,17 @@ smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
 	       ob1->fid.volatile_fid == ob2->fid.volatile_fid;
 }
 
+static int
+smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
+	       __u64 length, __u32 type, int lock, int unlock, bool wait)
+{
+	if (unlock && !lock)
+		type = SMB2_LOCKFLAG_UNLOCK;
+	return SMB2_lock(xid, tlink_tcon(cfile->tlink),
+			 cfile->fid.persistent_fid, cfile->fid.volatile_fid,
+			 current->tgid, length, offset, type, wait);
+}
+
 struct smb_version_operations smb21_operations = {
 	.compare_fids = smb2_compare_fids,
 	.setup_request = smb2_setup_request,
@@ -602,6 +613,8 @@ struct smb_version_operations smb21_operations = {
 	.is_status_pending = smb2_is_status_pending,
 	.oplock_response = smb2_oplock_response,
 	.queryfs = smb2_queryfs,
+	.mand_lock = smb2_mand_lock,
+	.mand_unlock_range = smb2_unlock_range,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1b447612200e..d3e1cfca3379 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2047,3 +2047,62 @@ qinf_exit:
 	free_rsp_buf(resp_buftype, iov.iov_base);
 	return rc;
 }
+
+int
+smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
+	   const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid,
+	   const __u32 num_lock, struct smb2_lock_element *buf)
+{
+	int rc = 0;
+	struct smb2_lock_req *req = NULL;
+	struct kvec iov[2];
+	int resp_buf_type;
+	unsigned int count;
+
+	cFYI(1, "smb2_lockv num lock %d", num_lock);
+
+	rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req);
+	if (rc)
+		return rc;
+
+	req->hdr.ProcessId = cpu_to_le32(pid);
+	req->LockCount = cpu_to_le16(num_lock);
+
+	req->PersistentFileId = persist_fid;
+	req->VolatileFileId = volatile_fid;
+
+	count = num_lock * sizeof(struct smb2_lock_element);
+	inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element));
+
+	iov[0].iov_base = (char *)req;
+	/* 4 for rfc1002 length field and count for all locks */
+	iov[0].iov_len = get_rfc1002_length(req) + 4 - count;
+	iov[1].iov_base = (char *)buf;
+	iov[1].iov_len = count;
+
+	cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
+	rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP);
+	if (rc) {
+		cFYI(1, "Send error in smb2_lockv = %d", rc);
+		cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
+	}
+
+	return rc;
+}
+
+int
+SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
+	  const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid,
+	  const __u64 length, const __u64 offset, const __u32 lock_flags,
+	  const bool wait)
+{
+	struct smb2_lock_element lock;
+
+	lock.Offset = cpu_to_le64(offset);
+	lock.Length = cpu_to_le64(length);
+	lock.Flags = cpu_to_le32(lock_flags);
+	if (!wait && lock_flags != SMB2_LOCKFLAG_UNLOCK)
+		lock.Flags |= cpu_to_le32(SMB2_LOCKFLAG_FAIL_IMMEDIATELY);
+
+	return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock);
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index d2d132e94155..889ee5e193d9 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -531,6 +531,30 @@ struct smb2_write_rsp {
 #define SMB2_LOCKFLAG_UNLOCK		0x0004
 #define SMB2_LOCKFLAG_FAIL_IMMEDIATELY	0x0010
 
+struct smb2_lock_element {
+	__le64 Offset;
+	__le64 Length;
+	__le32 Flags;
+	__le32 Reserved;
+} __packed;
+
+struct smb2_lock_req {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 48 */
+	__le16 LockCount;
+	__le32 Reserved;
+	__u64  PersistentFileId; /* opaque endianness */
+	__u64  VolatileFileId; /* opaque endianness */
+	/* Followed by at least one */
+	struct smb2_lock_element locks[1];
+} __packed;
+
+struct smb2_lock_rsp {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 4 */
+	__le16 Reserved;
+} __packed;
+
 struct smb2_echo_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize;	/* Must be 4 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index aeb30dbdf8b8..ab19152b092b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -84,6 +84,8 @@ extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
 			  struct cifs_fid *fid, __u32 *oplock,
 			  FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb);
 extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
+extern int smb2_unlock_range(struct cifsFileInfo *cfile,
+			     struct file_lock *flock, const unsigned int xid);
 
 /*
  * SMB2 Worker functions - most of protocol specific implementation details
@@ -140,5 +142,13 @@ extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_file_id, u64 volatile_file_id,
 			 struct kstatfs *FSData);
+extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
+		     const __u64 persist_fid, const __u64 volatile_fid,
+		     const __u32 pid, const __u64 length, const __u64 offset,
+		     const __u32 lockFlags, const bool wait);
+extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
+		      const __u64 persist_fid, const __u64 volatile_fid,
+		      const __u32 pid, const __u32 num_lock,
+		      struct smb2_lock_element *buf);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From b140799a11adb6023d5f96712874c37b71dab290 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:44 -0700
Subject: CIFS: Use brlock cache for SMB2

Signed-off-by: Pavel Shilovsky <pshilovsky@etersoft.ru>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/smb2file.c  | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2ops.c   |  3 +-
 fs/cifs/smb2proto.h |  1 +
 3 files changed, 94 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index a25ea02149e7..181e13d9f9db 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -201,3 +201,94 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 	kfree(buf);
 	return rc;
 }
+
+static int
+smb2_push_mand_fdlocks(struct cifs_fid_locks *fdlocks, const unsigned int xid,
+		       struct smb2_lock_element *buf, unsigned int max_num)
+{
+	int rc = 0, stored_rc;
+	struct cifsFileInfo *cfile = fdlocks->cfile;
+	struct cifsLockInfo *li;
+	unsigned int num = 0;
+	struct smb2_lock_element *cur = buf;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+
+	list_for_each_entry(li, &fdlocks->locks, llist) {
+		cur->Length = cpu_to_le64(li->length);
+		cur->Offset = cpu_to_le64(li->offset);
+		cur->Flags = cpu_to_le32(li->type |
+						SMB2_LOCKFLAG_FAIL_IMMEDIATELY);
+		if (++num == max_num) {
+			stored_rc = smb2_lockv(xid, tcon,
+					       cfile->fid.persistent_fid,
+					       cfile->fid.volatile_fid,
+					       current->tgid, num, buf);
+			if (stored_rc)
+				rc = stored_rc;
+			cur = buf;
+			num = 0;
+		} else
+			cur++;
+	}
+	if (num) {
+		stored_rc = smb2_lockv(xid, tcon,
+				       cfile->fid.persistent_fid,
+				       cfile->fid.volatile_fid,
+				       current->tgid, num, buf);
+		if (stored_rc)
+			rc = stored_rc;
+	}
+
+	return rc;
+}
+
+int
+smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
+{
+	int rc = 0, stored_rc;
+	unsigned int xid;
+	unsigned int max_num, max_buf;
+	struct smb2_lock_element *buf;
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	struct cifs_fid_locks *fdlocks;
+
+	xid = get_xid();
+	mutex_lock(&cinode->lock_mutex);
+	if (!cinode->can_cache_brlcks) {
+		mutex_unlock(&cinode->lock_mutex);
+		free_xid(xid);
+		return rc;
+	}
+
+	/*
+	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
+	 * and check it for zero before using.
+	 */
+	max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf;
+	if (!max_buf) {
+		mutex_unlock(&cinode->lock_mutex);
+		free_xid(xid);
+		return -EINVAL;
+	}
+
+	max_num = max_buf / sizeof(struct smb2_lock_element);
+	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	if (!buf) {
+		mutex_unlock(&cinode->lock_mutex);
+		free_xid(xid);
+		return -ENOMEM;
+	}
+
+	list_for_each_entry(fdlocks, &cinode->llist, llist) {
+		stored_rc = smb2_push_mand_fdlocks(fdlocks, xid, buf, max_num);
+		if (stored_rc)
+			rc = stored_rc;
+	}
+
+	cinode->can_cache_brlcks = false;
+	kfree(buf);
+
+	mutex_unlock(&cinode->lock_mutex);
+	free_xid(xid);
+	return rc;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index caed2c57896d..0808b238219b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -371,7 +371,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 	cfile->fid.persistent_fid = fid->persistent_fid;
 	cfile->fid.volatile_fid = fid->volatile_fid;
 	smb2_set_oplock_level(cinode, oplock);
-	/* cinode->can_cache_brlcks = cinode->clientCanCacheAll; */
+	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
 }
 
 static int
@@ -615,6 +615,7 @@ struct smb_version_operations smb21_operations = {
 	.queryfs = smb2_queryfs,
 	.mand_lock = smb2_mand_lock,
 	.mand_unlock_range = smb2_unlock_range,
+	.push_mand_locks = smb2_push_mandatory_locks,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index ab19152b092b..8b4d3712255b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -86,6 +86,7 @@ extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
 extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 extern int smb2_unlock_range(struct cifsFileInfo *cfile,
 			     struct file_lock *flock, const unsigned int xid);
+extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
 
 /*
  * SMB2 Worker functions - most of protocol specific implementation details
-- 
cgit 


From 1b4b55a1d9404f51aacf1ff19887eb911484057d Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:44 -0700
Subject: CIFS: Turn lock mutex into rw semaphore

and allow several processes to walk through the lock list and read
can_cache_brlcks value if they are not going to modify them.

Signed-off-by: Pavel Shilovsky <pshilovsky@etersoft.ru>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c   |  2 +-
 fs/cifs/cifsglob.h |  2 +-
 fs/cifs/file.c     | 60 ++++++++++++++++++++++++++++--------------------------
 fs/cifs/smb2file.c | 15 +++++++-------
 4 files changed, 41 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e958d9438505..4cd68c77ce39 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -937,7 +937,7 @@ cifs_init_once(void *inode)
 	struct cifsInodeInfo *cifsi = inode;
 
 	inode_init_once(&cifsi->vfs_inode);
-	mutex_init(&cifsi->lock_mutex);
+	init_rwsem(&cifsi->lock_sem);
 }
 
 static int
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d3c72713fec4..e2492e1cdb85 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1000,7 +1000,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
 struct cifsInodeInfo {
 	bool can_cache_brlcks;
 	struct list_head llist;	/* locks helb by this inode */
-	struct mutex lock_mutex;	/* protect the fields above */
+	struct rw_semaphore lock_sem;	/* protect the fields above */
 	/* BB add in lists for dirty pages i.e. write caching info for oplock */
 	struct list_head openFileList;
 	__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e2a8e4456275..1adff75dfffc 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -260,9 +260,9 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	INIT_LIST_HEAD(&fdlocks->locks);
 	fdlocks->cfile = cfile;
 	cfile->llist = fdlocks;
-	mutex_lock(&cinode->lock_mutex);
+	down_write(&cinode->lock_sem);
 	list_add(&fdlocks->llist, &cinode->llist);
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 
 	cfile->count = 1;
 	cfile->pid = current->tgid;
@@ -351,7 +351,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	 * Delete any outstanding lock records. We'll lose them when the file
 	 * is closed anyway.
 	 */
-	mutex_lock(&cifsi->lock_mutex);
+	down_write(&cifsi->lock_sem);
 	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 		list_del(&li->llist);
 		cifs_del_lock_waiters(li);
@@ -359,7 +359,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	}
 	list_del(&cifs_file->llist->llist);
 	kfree(cifs_file->llist);
-	mutex_unlock(&cifsi->lock_mutex);
+	up_write(&cifsi->lock_sem);
 
 	cifs_put_tlink(cifs_file->tlink);
 	dput(cifs_file->dentry);
@@ -762,7 +762,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 	bool exist;
 
-	mutex_lock(&cinode->lock_mutex);
+	down_read(&cinode->lock_sem);
 
 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
 					&conf_lock);
@@ -779,7 +779,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 	else
 		flock->fl_type = F_UNLCK;
 
-	mutex_unlock(&cinode->lock_mutex);
+	up_read(&cinode->lock_sem);
 	return rc;
 }
 
@@ -787,9 +787,9 @@ static void
 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 {
 	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
-	mutex_lock(&cinode->lock_mutex);
+	down_write(&cinode->lock_sem);
 	list_add_tail(&lock->llist, &cfile->llist->locks);
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 }
 
 /*
@@ -809,13 +809,13 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 
 try_again:
 	exist = false;
-	mutex_lock(&cinode->lock_mutex);
+	down_write(&cinode->lock_sem);
 
 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 					lock->type, &conf_lock);
 	if (!exist && cinode->can_cache_brlcks) {
 		list_add_tail(&lock->llist, &cfile->llist->locks);
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		return rc;
 	}
 
@@ -825,17 +825,17 @@ try_again:
 		rc = -EACCES;
 	else {
 		list_add_tail(&lock->blist, &conf_lock->blist);
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		rc = wait_event_interruptible(lock->block_q,
 					(lock->blist.prev == &lock->blist) &&
 					(lock->blist.next == &lock->blist));
 		if (!rc)
 			goto try_again;
-		mutex_lock(&cinode->lock_mutex);
+		down_write(&cinode->lock_sem);
 		list_del_init(&lock->blist);
 	}
 
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 	return rc;
 }
 
@@ -856,7 +856,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 	if ((flock->fl_flags & FL_POSIX) == 0)
 		return 1;
 
-	mutex_lock(&cinode->lock_mutex);
+	down_read(&cinode->lock_sem);
 	posix_test_lock(file, flock);
 
 	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
@@ -864,7 +864,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 		rc = 1;
 	}
 
-	mutex_unlock(&cinode->lock_mutex);
+	up_read(&cinode->lock_sem);
 	return rc;
 }
 
@@ -884,14 +884,14 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 		return rc;
 
 try_again:
-	mutex_lock(&cinode->lock_mutex);
+	down_write(&cinode->lock_sem);
 	if (!cinode->can_cache_brlcks) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		return rc;
 	}
 
 	rc = posix_lock_file(file, flock, NULL);
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 	if (rc == FILE_LOCK_DEFERRED) {
 		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
 		if (!rc)
@@ -918,9 +918,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	xid = get_xid();
 	tcon = tlink_tcon(cfile->tlink);
 
-	mutex_lock(&cinode->lock_mutex);
+	/* we are going to update can_cache_brlcks here - need a write access */
+	down_write(&cinode->lock_sem);
 	if (!cinode->can_cache_brlcks) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return rc;
 	}
@@ -931,7 +932,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	 */
 	max_buf = tcon->ses->server->maxBuf;
 	if (!max_buf) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return -EINVAL;
 	}
@@ -940,7 +941,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 						sizeof(LOCKING_ANDX_RANGE);
 	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return -ENOMEM;
 	}
@@ -978,7 +979,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	}
 
 	cinode->can_cache_brlcks = false;
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 
 	kfree(buf);
 	free_xid(xid);
@@ -1013,9 +1014,10 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 
 	xid = get_xid();
 
-	mutex_lock(&cinode->lock_mutex);
+	/* we are going to update can_cache_brlcks here - need a write access */
+	down_write(&cinode->lock_sem);
 	if (!cinode->can_cache_brlcks) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return rc;
 	}
@@ -1031,7 +1033,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 
 	/*
 	 * Allocating count locks is enough because no FL_POSIX locks can be
-	 * added to the list while we are holding cinode->lock_mutex that
+	 * added to the list while we are holding cinode->lock_sem that
 	 * protects locking operations of this inode.
 	 */
 	for (; i < count; i++) {
@@ -1086,7 +1088,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 
 out:
 	cinode->can_cache_brlcks = false;
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 
 	free_xid(xid);
 	return rc;
@@ -1278,7 +1280,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 	if (!buf)
 		return -ENOMEM;
 
-	mutex_lock(&cinode->lock_mutex);
+	down_write(&cinode->lock_sem);
 	for (i = 0; i < 2; i++) {
 		cur = buf;
 		num = 0;
@@ -1348,7 +1350,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		}
 	}
 
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 	kfree(buf);
 	return rc;
 }
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 181e13d9f9db..0ddd617ffa1a 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -135,7 +135,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 
 	cur = buf;
 
-	mutex_lock(&cinode->lock_mutex);
+	down_write(&cinode->lock_sem);
 	list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
 		if (flock->fl_start > li->offset ||
 		    (flock->fl_start + length) <
@@ -196,7 +196,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		} else
 			cifs_free_llist(&tmp_llist);
 	}
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 
 	kfree(buf);
 	return rc;
@@ -253,9 +253,10 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	struct cifs_fid_locks *fdlocks;
 
 	xid = get_xid();
-	mutex_lock(&cinode->lock_mutex);
+	/* we are going to update can_cache_brlcks here - need a write access */
+	down_write(&cinode->lock_sem);
 	if (!cinode->can_cache_brlcks) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return rc;
 	}
@@ -266,7 +267,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	 */
 	max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf;
 	if (!max_buf) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return -EINVAL;
 	}
@@ -274,7 +275,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	max_num = max_buf / sizeof(struct smb2_lock_element);
 	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf) {
-		mutex_unlock(&cinode->lock_mutex);
+		up_write(&cinode->lock_sem);
 		free_xid(xid);
 		return -ENOMEM;
 	}
@@ -288,7 +289,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	cinode->can_cache_brlcks = false;
 	kfree(buf);
 
-	mutex_unlock(&cinode->lock_mutex);
+	up_write(&cinode->lock_sem);
 	free_xid(xid);
 	return rc;
 }
-- 
cgit 


From 579f9053236c796d718162c37c72bb3bd32d008c Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:44 -0700
Subject: CIFS: Check for mandatory brlocks on read/write

Currently CIFS code accept read/write ops on mandatory locked area
when two processes use the same file descriptor - it's wrong.
Fix this by serializing io and brlock operations on the inode.

Signed-off-by: Pavel Shilovsky <pshilovsky@etersoft.ru>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |   4 ++
 fs/cifs/file.c      | 121 ++++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 102 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 15e38dc389fc..c758ee7b0307 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -180,6 +180,10 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
 extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
 extern void cifs_umount(struct cifs_sb_info *);
 extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
+extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
+				    __u64 length, __u8 type,
+				    struct cifsLockInfo **conf_lock,
+				    bool rw_check);
 
 #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
 extern void cifs_dfs_release_automount_timer(void);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1adff75dfffc..2e2e4f9aeb63 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -707,7 +707,7 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
 static bool
 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 			    __u64 length, __u8 type, struct cifsFileInfo *cfile,
-			    struct cifsLockInfo **conf_lock)
+			    struct cifsLockInfo **conf_lock, bool rw_check)
 {
 	struct cifsLockInfo *li;
 	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
@@ -717,19 +717,24 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 		if (offset + length <= li->offset ||
 		    offset >= li->offset + li->length)
 			continue;
+		if (rw_check && server->ops->compare_fids(cfile, cur_cfile) &&
+		    current->tgid == li->pid)
+			continue;
 		if ((type & server->vals->shared_lock_type) &&
 		    ((server->ops->compare_fids(cfile, cur_cfile) &&
 		     current->tgid == li->pid) || type == li->type))
 			continue;
-		*conf_lock = li;
+		if (conf_lock)
+			*conf_lock = li;
 		return true;
 	}
 	return false;
 }
 
-static bool
+bool
 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
-			__u8 type, struct cifsLockInfo **conf_lock)
+			__u8 type, struct cifsLockInfo **conf_lock,
+			bool rw_check)
 {
 	bool rc = false;
 	struct cifs_fid_locks *cur;
@@ -737,7 +742,7 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 
 	list_for_each_entry(cur, &cinode->llist, llist) {
 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
-						 cfile, conf_lock);
+						 cfile, conf_lock, rw_check);
 		if (rc)
 			break;
 	}
@@ -765,7 +770,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 	down_read(&cinode->lock_sem);
 
 	exist = cifs_find_lock_conflict(cfile, offset, length, type,
-					&conf_lock);
+					&conf_lock, false);
 	if (exist) {
 		flock->fl_start = conf_lock->offset;
 		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
@@ -812,7 +817,7 @@ try_again:
 	down_write(&cinode->lock_sem);
 
 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
-					lock->type, &conf_lock);
+					lock->type, &conf_lock, false);
 	if (!exist && cinode->can_cache_brlcks) {
 		list_add_tail(&lock->llist, &cfile->llist->locks);
 		up_write(&cinode->lock_sem);
@@ -2394,15 +2399,58 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 	return written;
 }
 
-ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
-			   unsigned long nr_segs, loff_t pos)
+static ssize_t
+cifs_writev(struct kiocb *iocb, const struct iovec *iov,
+	    unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode;
+	struct file *file = iocb->ki_filp;
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
+	struct inode *inode = file->f_mapping->host;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
+	ssize_t rc = -EACCES;
 
-	inode = iocb->ki_filp->f_path.dentry->d_inode;
+	BUG_ON(iocb->ki_pos != pos);
 
-	if (CIFS_I(inode)->clientCanCacheAll)
-		return generic_file_aio_write(iocb, iov, nr_segs, pos);
+	sb_start_write(inode->i_sb);
+
+	/*
+	 * We need to hold the sem to be sure nobody modifies lock list
+	 * with a brlock that prevents writing.
+	 */
+	down_read(&cinode->lock_sem);
+	if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
+				     server->vals->exclusive_lock_type, NULL,
+				     true)) {
+		mutex_lock(&inode->i_mutex);
+		rc = __generic_file_aio_write(iocb, iov, nr_segs,
+					       &iocb->ki_pos);
+		mutex_unlock(&inode->i_mutex);
+	}
+
+	if (rc > 0 || rc == -EIOCBQUEUED) {
+		ssize_t err;
+
+		err = generic_write_sync(file, pos, rc);
+		if (err < 0 && rc > 0)
+			rc = err;
+	}
+
+	up_read(&cinode->lock_sem);
+	sb_end_write(inode->i_sb);
+	return rc;
+}
+
+ssize_t
+cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
+		   unsigned long nr_segs, loff_t pos)
+{
+	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
+						iocb->ki_filp->private_data;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 
 	/*
 	 * In strict cache mode we need to write the data to the server exactly
@@ -2411,7 +2459,15 @@ ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
 	 * not on the region from pos to ppos+len-1.
 	 */
 
-	return cifs_user_writev(iocb, iov, nr_segs, pos);
+	if (!cinode->clientCanCacheAll)
+		return cifs_user_writev(iocb, iov, nr_segs, pos);
+
+	if (cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		return generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+	return cifs_writev(iocb, iov, nr_segs, pos);
 }
 
 static struct cifs_readdata *
@@ -2746,15 +2802,17 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
 	return read;
 }
 
-ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
-			  unsigned long nr_segs, loff_t pos)
+ssize_t
+cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
+		  unsigned long nr_segs, loff_t pos)
 {
-	struct inode *inode;
-
-	inode = iocb->ki_filp->f_path.dentry->d_inode;
-
-	if (CIFS_I(inode)->clientCanCacheRead)
-		return generic_file_aio_read(iocb, iov, nr_segs, pos);
+	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
+						iocb->ki_filp->private_data;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	int rc = -EACCES;
 
 	/*
 	 * In strict cache mode we need to read from the server all the time
@@ -2764,8 +2822,25 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
 	 * on pages affected by this read but not on the region from pos to
 	 * pos+len-1.
 	 */
+	if (!cinode->clientCanCacheRead)
+		return cifs_user_readv(iocb, iov, nr_segs, pos);
 
-	return cifs_user_readv(iocb, iov, nr_segs, pos);
+	if (cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		return generic_file_aio_read(iocb, iov, nr_segs, pos);
+
+	/*
+	 * We need to hold the sem to be sure nobody modifies lock list
+	 * with a brlock that prevents reading.
+	 */
+	down_read(&cinode->lock_sem);
+	if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
+				     tcon->ses->server->vals->shared_lock_type,
+				     NULL, true))
+		rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
+	up_read(&cinode->lock_sem);
+	return rc;
 }
 
 static ssize_t
-- 
cgit 


From b8c32dbb0deb287a5fcb78251e4eae6c7275760d Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Wed, 19 Sep 2012 06:22:44 -0700
Subject: CIFS: Request SMB2.1 leases

if server supports them and we need oplocks.

Signed-off-by: Pavel Shilovsky <piastryyy@gmail.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c   |  28 ++++++++---
 fs/cifs/cifsglob.h |  10 ++++
 fs/cifs/dir.c      |  13 ++++-
 fs/cifs/file.c     |  21 +++++---
 fs/cifs/smb2file.c |   9 +++-
 fs/cifs/smb2ops.c  |  21 ++++++++
 fs/cifs/smb2pdu.c  | 138 +++++++++++++++++++++++++++++++++++++++++++++++++----
 fs/cifs/smb2pdu.h  |  43 ++++++++++++++++-
 8 files changed, 256 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4cd68c77ce39..28ac048d54ea 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -36,6 +36,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/namei.h>
+#include <linux/random.h>
 #include <net/ipv6.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -88,6 +89,10 @@ extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
 
+#ifdef CONFIG_CIFS_SMB2
+__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
+#endif
+
 static int
 cifs_read_super(struct super_block *sb)
 {
@@ -218,9 +223,10 @@ cifs_alloc_inode(struct super_block *sb)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
 	cifs_inode->time = 0;
-	/* Until the file is open and we have gotten oplock
-	info back from the server, can not assume caching of
-	file data or metadata */
+	/*
+	 * Until the file is open and we have gotten oplock info back from the
+	 * server, can not assume caching of file data or metadata.
+	 */
 	cifs_set_oplock_level(cifs_inode, 0);
 	cifs_inode->delete_pending = false;
 	cifs_inode->invalid_mapping = false;
@@ -228,10 +234,14 @@ cifs_alloc_inode(struct super_block *sb)
 	cifs_inode->server_eof = 0;
 	cifs_inode->uniqueid = 0;
 	cifs_inode->createtime = 0;
-
-	/* Can not set i_flags here - they get immediately overwritten
-	   to zero by the VFS */
-/*	cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/
+#ifdef CONFIG_CIFS_SMB2
+	get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
+#endif
+	/*
+	 * Can not set i_flags here - they get immediately overwritten to zero
+	 * by the VFS.
+	 */
+	/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
 	INIT_LIST_HEAD(&cifs_inode->openFileList);
 	INIT_LIST_HEAD(&cifs_inode->llist);
 	return &cifs_inode->vfs_inode;
@@ -1107,6 +1117,10 @@ init_cifs(void)
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
+#ifdef CONFIG_CIFS_SMB2
+	get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
+#endif
+
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
 		cFYI(1, "cifs_max_pending set to min of 2");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index e2492e1cdb85..b6ec142028e8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -361,6 +361,12 @@ struct smb_version_operations {
 				 const unsigned int);
 	/* push brlocks from the cache to the server */
 	int (*push_mand_locks)(struct cifsFileInfo *);
+	/* get lease key of the inode */
+	void (*get_lease_key)(struct inode *, struct cifs_fid *fid);
+	/* set lease key of the inode */
+	void (*set_lease_key)(struct inode *, struct cifs_fid *fid);
+	/* generate new lease key */
+	void (*new_lease_key)(struct cifs_fid *fid);
 };
 
 struct smb_version_values {
@@ -895,6 +901,7 @@ struct cifs_fid {
 #ifdef CONFIG_CIFS_SMB2
 	__u64 persistent_fid;	/* persist file id for smb2 */
 	__u64 volatile_fid;	/* volatile file id for smb2 */
+	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for smb2 */
 #endif
 };
 
@@ -1012,6 +1019,9 @@ struct cifsInodeInfo {
 	u64  server_eof;		/* current file size on server -- protected by i_lock */
 	u64  uniqueid;			/* server inode number */
 	u64  createtime;		/* creation time on server */
+#ifdef CONFIG_CIFS_SMB2
+	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for this inode */
+#endif
 #ifdef CONFIG_CIFS_FSCACHE
 	struct fscache_cookie *fscache;
 #endif
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index b99a1670dad4..4f2147c5adb6 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -340,6 +340,8 @@ cifs_create_get_file_info:
 		rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb,
 					 xid, &fid->netfid);
 		if (newinode) {
+			if (server->ops->set_lease_key)
+				server->ops->set_lease_key(newinode, fid);
 			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
 				newinode->i_mode = mode;
 			if ((*oplock & CIFS_CREATE_ACTION) &&
@@ -418,6 +420,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	tcon = tlink_tcon(tlink);
 	server = tcon->ses->server;
 
+	if (server->ops->new_lease_key)
+		server->ops->new_lease_key(&fid);
+
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
 			    &oplock, &fid, opened);
 
@@ -473,10 +478,14 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
 	if (IS_ERR(tlink))
 		goto out_free_xid;
 
-	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
-			    &oplock, &fid, &created);
 	tcon = tlink_tcon(tlink);
 	server = tcon->ses->server;
+
+	if (server->ops->new_lease_key)
+		server->ops->new_lease_key(&fid);
+
+	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+			    &oplock, &fid, &created);
 	if (!rc && server->ops->close)
 		server->ops->close(xid, tcon, &fid);
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2e2e4f9aeb63..ccad858d2d67 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -177,8 +177,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 	int disposition;
 	int create_options = CREATE_NOT_DIR;
 	FILE_ALL_INFO *buf;
+	struct TCP_Server_Info *server = tcon->ses->server;
 
-	if (!tcon->ses->server->ops->open)
+	if (!server->ops->open)
 		return -ENOSYS;
 
 	desired_access = cifs_convert_flags(f_flags);
@@ -218,9 +219,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
-	rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
-					  desired_access, create_options, fid,
-					  oplock, buf, cifs_sb);
+	rc = server->ops->open(xid, tcon, full_path, disposition,
+			       desired_access, create_options, fid, oplock, buf,
+			       cifs_sb);
 
 	if (rc)
 		goto out;
@@ -372,6 +373,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	unsigned int xid;
 	__u32 oplock;
 	struct cifs_sb_info *cifs_sb;
+	struct TCP_Server_Info *server;
 	struct cifs_tcon *tcon;
 	struct tcon_link *tlink;
 	struct cifsFileInfo *cfile = NULL;
@@ -388,6 +390,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		return PTR_ERR(tlink);
 	}
 	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
@@ -432,6 +435,9 @@ int cifs_open(struct inode *inode, struct file *file)
 	}
 
 	if (!posix_open_ok) {
+		if (server->ops->get_lease_key)
+			server->ops->get_lease_key(inode, &fid);
+
 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 				  file->f_flags, &oplock, &fid, xid);
 		if (rc)
@@ -440,8 +446,8 @@ int cifs_open(struct inode *inode, struct file *file)
 
 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 	if (cfile == NULL) {
-		if (tcon->ses->server->ops->close)
-			tcon->ses->server->ops->close(xid, tcon, &fid);
+		if (server->ops->close)
+			server->ops->close(xid, tcon, &fid);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -567,6 +573,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 	if (backup_cred(cifs_sb))
 		create_options |= CREATE_OPEN_BACKUP_INTENT;
 
+	if (server->ops->get_lease_key)
+		server->ops->get_lease_key(inode, &fid);
+
 	/*
 	 * Can not refresh inode by passing in file_info buf to be returned by
 	 * CIFSSMBOpen and then calling get_inode_info with returned buf since
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 0ddd617ffa1a..78fb2050e0d6 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -63,6 +63,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 	int rc;
 	__le16 *smb2_path;
 	struct smb2_file_all_info *smb2_data = NULL;
+	__u8 smb2_oplock[17];
 
 	smb2_path = cifs_convert_path_to_utf16(path, cifs_sb);
 	if (smb2_path == NULL) {
@@ -78,11 +79,14 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 	}
 
 	desired_access |= FILE_READ_ATTRIBUTES;
-	*oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+	*smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+
+	if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
+		memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE);
 
 	rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid,
 		       &fid->volatile_fid, desired_access, disposition,
-		       0, 0, (__u8 *)oplock, smb2_data);
+		       0, 0, smb2_oplock, smb2_data);
 	if (rc)
 		goto out;
 
@@ -99,6 +103,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
 		move_smb2_info_to_cifs(buf, smb2_data);
 	}
 
+	*oplock = *smb2_oplock;
 out:
 	kfree(smb2_data);
 	kfree(smb2_path);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0808b238219b..360d9079af49 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -555,6 +555,24 @@ smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
 			 current->tgid, length, offset, type, wait);
 }
 
+static void
+smb2_get_lease_key(struct inode *inode, struct cifs_fid *fid)
+{
+	memcpy(fid->lease_key, CIFS_I(inode)->lease_key, SMB2_LEASE_KEY_SIZE);
+}
+
+static void
+smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid)
+{
+	memcpy(CIFS_I(inode)->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE);
+}
+
+static void
+smb2_new_lease_key(struct cifs_fid *fid)
+{
+	get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
+}
+
 struct smb_version_operations smb21_operations = {
 	.compare_fids = smb2_compare_fids,
 	.setup_request = smb2_setup_request,
@@ -616,6 +634,9 @@ struct smb_version_operations smb21_operations = {
 	.mand_lock = smb2_mand_lock,
 	.mand_unlock_range = smb2_unlock_range,
 	.push_mand_locks = smb2_push_mandatory_locks,
+	.get_lease_key = smb2_get_lease_key,
+	.set_lease_key = smb2_set_lease_key,
+	.new_lease_key = smb2_new_lease_key,
 };
 
 struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d3e1cfca3379..89d2824587b2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -304,7 +304,7 @@ free_rsp_buf(int resp_buftype, void *rsp)
 		cifs_buf_release(rsp);
 }
 
-#define SMB2_NUM_PROT 1
+#define SMB2_NUM_PROT 2
 
 #define SMB2_PROT   0
 #define SMB21_PROT  1
@@ -393,6 +393,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 	req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS);
 
+	memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
+
 	iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field */
 	iov[0].iov_len = get_rfc1002_length(req) + 4;
@@ -868,6 +870,83 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
 	return rc;
 }
 
+static struct create_lease *
+create_lease_buf(u8 *lease_key, u8 oplock)
+{
+	struct create_lease *buf;
+
+	buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	memset(buf, 0, sizeof(struct create_lease));
+
+	buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
+	buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+	if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
+		buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING |
+					   SMB2_LEASE_READ_CACHING;
+	else if (oplock == SMB2_OPLOCK_LEVEL_II)
+		buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING;
+	else if (oplock == SMB2_OPLOCK_LEVEL_BATCH)
+		buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING |
+					   SMB2_LEASE_READ_CACHING |
+					   SMB2_LEASE_WRITE_CACHING;
+
+	buf->ccontext.DataOffset = cpu_to_le16(offsetof
+					(struct create_lease, lcontext));
+	buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
+	buf->ccontext.NameOffset = cpu_to_le16(offsetof
+				(struct create_lease, Name));
+	buf->ccontext.NameLength = cpu_to_le16(4);
+	buf->Name[0] = 'R';
+	buf->Name[1] = 'q';
+	buf->Name[2] = 'L';
+	buf->Name[3] = 's';
+	return buf;
+}
+
+static __u8
+parse_lease_state(struct smb2_create_rsp *rsp)
+{
+	char *data_offset;
+	struct create_lease *lc;
+	__u8 oplock = 0;
+	bool found = false;
+
+	data_offset = (char *)rsp;
+	data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset);
+	lc = (struct create_lease *)data_offset;
+	do {
+		char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc;
+		if (le16_to_cpu(lc->ccontext.NameLength) != 4 ||
+		    strncmp(name, "RqLs", 4)) {
+			lc = (struct create_lease *)((char *)lc
+					+ le32_to_cpu(lc->ccontext.Next));
+			continue;
+		}
+		if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
+			return SMB2_OPLOCK_LEVEL_NOCHANGE;
+		found = true;
+		break;
+	} while (le32_to_cpu(lc->ccontext.Next) != 0);
+
+	if (!found)
+		return oplock;
+
+	if (le32_to_cpu(lc->lcontext.LeaseState) & SMB2_LEASE_WRITE_CACHING) {
+		if (le32_to_cpu(lc->lcontext.LeaseState) &
+						SMB2_LEASE_HANDLE_CACHING)
+			oplock = SMB2_OPLOCK_LEVEL_BATCH;
+		else
+			oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+	} else if (le32_to_cpu(lc->lcontext.LeaseState) &
+						SMB2_LEASE_READ_CACHING)
+		oplock = SMB2_OPLOCK_LEVEL_II;
+
+	return oplock;
+}
+
 int
 SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	  u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access,
@@ -878,9 +957,11 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	struct smb2_create_rsp *rsp;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses = tcon->ses;
-	struct kvec iov[2];
+	struct kvec iov[3];
 	int resp_buftype;
 	int uni_path_len;
+	__le16 *copy_path = NULL;
+	int copy_size;
 	int rc = 0;
 	int num_iovecs = 2;
 
@@ -895,10 +976,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	if (rc)
 		return rc;
 
-	if (server->oplocks)
-		req->RequestedOplockLevel = *oplock;
-	else
-		req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
 	req->ImpersonationLevel = IL_IMPERSONATION;
 	req->DesiredAccess = cpu_to_le32(desired_access);
 	/* File attributes ignored on open (used in create though) */
@@ -908,7 +985,7 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 	req->CreateOptions = cpu_to_le32(create_options);
 	uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
 	req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)
-			- 1 /* pad */ - 4 /* do not count rfc1001 len field */);
+			- 8 /* pad */ - 4 /* do not count rfc1001 len field */);
 
 	iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field */
@@ -919,6 +996,20 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 		req->NameLength = cpu_to_le16(uni_path_len - 2);
 		/* -1 since last byte is buf[0] which is sent below (path) */
 		iov[0].iov_len--;
+		if (uni_path_len % 8 != 0) {
+			copy_size = uni_path_len / 8 * 8;
+			if (copy_size < uni_path_len)
+				copy_size += 8;
+
+			copy_path = kzalloc(copy_size, GFP_KERNEL);
+			if (!copy_path)
+				return -ENOMEM;
+			memcpy((char *)copy_path, (const char *)path,
+				uni_path_len);
+			uni_path_len = copy_size;
+			path = copy_path;
+		}
+
 		iov[1].iov_len = uni_path_len;
 		iov[1].iov_base = path;
 		/*
@@ -927,10 +1018,37 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 		 */
 		inc_rfc1001_len(req, uni_path_len - 1);
 	} else {
+		iov[0].iov_len += 7;
+		req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu(
+				req->hdr.smb2_buf_length) + 8 - 1);
 		num_iovecs = 1;
 		req->NameLength = 0;
 	}
 
+	if (!server->oplocks)
+		*oplock = SMB2_OPLOCK_LEVEL_NONE;
+
+	if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
+	    *oplock == SMB2_OPLOCK_LEVEL_NONE)
+		req->RequestedOplockLevel = *oplock;
+	else {
+		iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock);
+		if (iov[num_iovecs].iov_base == NULL) {
+			cifs_small_buf_release(req);
+			kfree(copy_path);
+			return -ENOMEM;
+		}
+		iov[num_iovecs].iov_len = sizeof(struct create_lease);
+		req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
+		req->CreateContextsOffset = cpu_to_le32(
+			sizeof(struct smb2_create_req) - 4 - 8 +
+			iov[num_iovecs-1].iov_len);
+		req->CreateContextsLength = cpu_to_le32(
+			sizeof(struct create_lease));
+		inc_rfc1001_len(&req->hdr, sizeof(struct create_lease));
+		num_iovecs++;
+	}
+
 	rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
 	rsp = (struct smb2_create_rsp *)iov[0].iov_base;
 
@@ -955,8 +1073,12 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 		buf->DeletePending = 0;
 	}
 
-	*oplock = rsp->OplockLevel;
+	if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
+		*oplock = parse_lease_state(rsp);
+	else
+		*oplock = rsp->OplockLevel;
 creat_exit:
+	kfree(copy_path);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 889ee5e193d9..e818a5cc5bd8 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -150,6 +150,10 @@ struct smb2_err_rsp {
 	__u8   ErrorData[1];  /* variable length */
 } __packed;
 
+#define SMB2_CLIENT_GUID_SIZE 16
+
+extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
+
 struct smb2_negotiate_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize; /* Must be 36 */
@@ -157,7 +161,7 @@ struct smb2_negotiate_req {
 	__le16 SecurityMode;
 	__le16 Reserved;	/* MBZ */
 	__le32 Capabilities;
-	__u8   ClientGUID[16];	/* MBZ */
+	__u8   ClientGUID[SMB2_CLIENT_GUID_SIZE];
 	__le64 ClientStartTime;	/* MBZ */
 	__le16 Dialects[2]; /* variable length */
 } __packed;
@@ -307,6 +311,8 @@ struct smb2_tree_disconnect_rsp {
 #define SMB2_OPLOCK_LEVEL_EXCLUSIVE	0x08
 #define SMB2_OPLOCK_LEVEL_BATCH		0x09
 #define SMB2_OPLOCK_LEVEL_LEASE		0xFF
+/* Non-spec internal type */
+#define SMB2_OPLOCK_LEVEL_NOCHANGE	0x99
 
 /* Desired Access Flags */
 #define FILE_READ_DATA_LE		cpu_to_le32(0x00000001)
@@ -404,7 +410,7 @@ struct smb2_create_req {
 	__le16 NameLength;
 	__le32 CreateContextsOffset;
 	__le32 CreateContextsLength;
-	__u8   Buffer[1];
+	__u8   Buffer[8];
 } __packed;
 
 struct smb2_create_rsp {
@@ -428,6 +434,39 @@ struct smb2_create_rsp {
 	__u8   Buffer[1];
 } __packed;
 
+struct create_context {
+	__le32 Next;
+	__le16 NameOffset;
+	__le16 NameLength;
+	__le16 Reserved;
+	__le16 DataOffset;
+	__le32 DataLength;
+	__u8 Buffer[0];
+} __packed;
+
+#define SMB2_LEASE_NONE			__constant_cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING		__constant_cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING	__constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING	__constant_cpu_to_le32(0x04)
+
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+
+#define SMB2_LEASE_KEY_SIZE 16
+
+struct lease_context {
+	__le64 LeaseKeyLow;
+	__le64 LeaseKeyHigh;
+	__le32 LeaseState;
+	__le32 LeaseFlags;
+	__le64 LeaseDuration;
+} __packed;
+
+struct create_lease {
+	struct create_context ccontext;
+	__u8   Name[8];
+	struct lease_context lcontext;
+} __packed;
+
 /* Currently defined values for close flags */
 #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB	cpu_to_le16(0x0001)
 struct smb2_close_req {
-- 
cgit 


From 25078105fbe14e7b3270391eaa11514bec787a52 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:45 -0700
Subject: CIFS: Fix cache coherency for read oplock case

When we have a file opened with read oplock and we are writing a data
to this file, we need to store the data in the cache and then send to
the server to ensure that the next read operation will get a coherent
data.

Also mark it as CONFIG_CIFS_SMB2 because it's more suitable for SMB2
code but can fix some CIFS problems too (when server delays sending
an oplock break after a write request). We can drop this ifdefs
dependence in future.

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ccad858d2d67..e93e3d2c69e6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2461,11 +2461,30 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
 						iocb->ki_filp->private_data;
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 
+#ifdef CONFIG_CIFS_SMB2
 	/*
-	 * In strict cache mode we need to write the data to the server exactly
-	 * from the pos to pos+len-1 rather than flush all affected pages
-	 * because it may cause a error with mandatory locks on these pages but
-	 * not on the region from pos to ppos+len-1.
+	 * If we have an oplock for read and want to write a data to the file
+	 * we need to store it in the page cache and then push it to the server
+	 * to be sure the next read will get a valid data.
+	 */
+	if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) {
+		ssize_t written;
+		int rc;
+
+		written = generic_file_aio_write(iocb, iov, nr_segs, pos);
+		rc = filemap_fdatawrite(inode->i_mapping);
+		if (rc)
+			return (ssize_t)rc;
+
+		return written;
+	}
+#endif
+
+	/*
+	 * For non-oplocked files in strict cache mode we need to write the data
+	 * to the server exactly from the pos to pos+len-1 rather than flush all
+	 * affected pages because it may cause a error with mandatory locks on
+	 * these pages but not on the region from pos to ppos+len-1.
 	 */
 
 	if (!cinode->clientCanCacheAll)
-- 
cgit 


From 0822f51426b51bd599b3a7e972b14aacaa045a92 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@samba.org>
Date: Wed, 19 Sep 2012 06:22:45 -0700
Subject: CIFS: Add SMB2.1 lease break support

Signed-off-by: Pavel Shilovsky <pshilovsky@samba.org>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/smb2file.c  |  2 ++
 fs/cifs/smb2misc.c  | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/cifs/smb2ops.c   |  4 +++
 fs/cifs/smb2pdu.c   | 46 +++++++++++++++++++--------
 fs/cifs/smb2pdu.h   | 25 +++++++++++++++
 fs/cifs/smb2proto.h |  4 +++
 6 files changed, 157 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 78fb2050e0d6..a93eec30a50d 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -38,6 +38,8 @@ void
 smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
 {
 	oplock &= 0xFF;
+	if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
+		return;
 	if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
 		cinode->clientCanCacheAll = true;
 		cinode->clientCanCacheRead = true;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 01479a3fee8d..3a7f8bd5127d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -148,6 +148,13 @@ smb2_check_message(char *buf, unsigned int length)
 			cERROR(1, "Illegal response size %u for command %d",
 				   le16_to_cpu(pdu->StructureSize2), command);
 			return 1;
+		} else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0)
+			   && (le16_to_cpu(pdu->StructureSize2) != 44)
+			   && (le16_to_cpu(pdu->StructureSize2) != 36)) {
+			/* special case for SMB2.1 lease break message */
+			cERROR(1, "Illegal response size %d for oplock break",
+				   le16_to_cpu(pdu->StructureSize2));
+			return 1;
 		}
 	}
 
@@ -360,6 +367,84 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
 	return to;
 }
 
+__le32
+smb2_get_lease_state(struct cifsInodeInfo *cinode)
+{
+	if (cinode->clientCanCacheAll)
+		return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING;
+	else if (cinode->clientCanCacheRead)
+		return SMB2_LEASE_READ_CACHING;
+	return 0;
+}
+
+__u8 smb2_map_lease_to_oplock(__le32 lease_state)
+{
+	if (lease_state & SMB2_LEASE_WRITE_CACHING) {
+		if (lease_state & SMB2_LEASE_HANDLE_CACHING)
+			return SMB2_OPLOCK_LEVEL_BATCH;
+		else
+			return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
+	} else if (lease_state & SMB2_LEASE_READ_CACHING)
+		return SMB2_OPLOCK_LEVEL_II;
+	return 0;
+}
+
+static bool
+smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
+{
+	struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
+	struct list_head *tmp, *tmp1, *tmp2;
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+	struct cifsInodeInfo *cinode;
+	struct cifsFileInfo *cfile;
+
+	cFYI(1, "Checking for lease break");
+
+	/* look up tcon based on tid & uid */
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each(tmp, &server->smb_ses_list) {
+		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+		list_for_each(tmp1, &ses->tcon_list) {
+			tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+
+			cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
+			spin_lock(&cifs_file_list_lock);
+			list_for_each(tmp2, &tcon->openFileList) {
+				cfile = list_entry(tmp2, struct cifsFileInfo,
+						     tlist);
+				cinode = CIFS_I(cfile->dentry->d_inode);
+
+				if (memcmp(cinode->lease_key, rsp->LeaseKey,
+					   SMB2_LEASE_KEY_SIZE))
+					continue;
+
+				cFYI(1, "lease key match, lease break 0x%d",
+				     le32_to_cpu(rsp->NewLeaseState));
+
+				smb2_set_oplock_level(cinode,
+				  smb2_map_lease_to_oplock(rsp->NewLeaseState));
+
+				if (rsp->Flags &
+				    SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED)
+					cfile->oplock_break_cancelled = false;
+				else
+					cfile->oplock_break_cancelled = true;
+
+				queue_work(cifsiod_wq, &cfile->oplock_break);
+
+				spin_unlock(&cifs_file_list_lock);
+				spin_unlock(&cifs_tcp_ses_lock);
+				return true;
+			}
+			spin_unlock(&cifs_file_list_lock);
+		}
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+	cFYI(1, "Can not process lease break - no lease matched");
+	return false;
+}
+
 bool
 smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 {
@@ -377,7 +462,10 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 
 	if (le16_to_cpu(rsp->StructureSize) !=
 				smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
-		return false;
+		if (le16_to_cpu(rsp->StructureSize) == 44)
+			return smb2_is_valid_lease_break(buffer, server);
+		else
+			return false;
 	}
 
 	cFYI(1, "oplock level 0x%d", rsp->OplockLevel);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 360d9079af49..630156f98cc7 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -513,6 +513,10 @@ static int
 smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 		     struct cifsInodeInfo *cinode)
 {
+	if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
+		return SMB2_lease_break(0, tcon, cinode->lease_key,
+					smb2_get_lease_state(cinode));
+
 	return SMB2_oplock_break(0, tcon, fid->persistent_fid,
 				 fid->volatile_fid,
 				 cinode->clientCanCacheRead ? 1 : 0);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 89d2824587b2..1572abefb378 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -911,7 +911,6 @@ parse_lease_state(struct smb2_create_rsp *rsp)
 {
 	char *data_offset;
 	struct create_lease *lc;
-	__u8 oplock = 0;
 	bool found = false;
 
 	data_offset = (char *)rsp;
@@ -932,19 +931,9 @@ parse_lease_state(struct smb2_create_rsp *rsp)
 	} while (le32_to_cpu(lc->ccontext.Next) != 0);
 
 	if (!found)
-		return oplock;
-
-	if (le32_to_cpu(lc->lcontext.LeaseState) & SMB2_LEASE_WRITE_CACHING) {
-		if (le32_to_cpu(lc->lcontext.LeaseState) &
-						SMB2_LEASE_HANDLE_CACHING)
-			oplock = SMB2_OPLOCK_LEVEL_BATCH;
-		else
-			oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
-	} else if (le32_to_cpu(lc->lcontext.LeaseState) &
-						SMB2_LEASE_READ_CACHING)
-		oplock = SMB2_OPLOCK_LEVEL_II;
+		return 0;
 
-	return oplock;
+	return smb2_map_lease_to_oplock(lc->lcontext.LeaseState);
 }
 
 int
@@ -2228,3 +2217,34 @@ SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
 
 	return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock);
 }
+
+int
+SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
+		 __u8 *lease_key, const __le32 lease_state)
+{
+	int rc;
+	struct smb2_lease_ack *req = NULL;
+
+	cFYI(1, "SMB2_lease_break");
+	rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
+
+	if (rc)
+		return rc;
+
+	req->hdr.CreditRequest = cpu_to_le16(1);
+	req->StructureSize = cpu_to_le16(36);
+	inc_rfc1001_len(req, 12);
+
+	memcpy(req->LeaseKey, lease_key, 16);
+	req->LeaseState = lease_state;
+
+	rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP);
+	/* SMB2 buffer freed by function above */
+
+	if (rc) {
+		cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
+		cFYI(1, "Send error in Lease Break = %d", rc);
+	}
+
+	return rc;
+}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e818a5cc5bd8..da099225b1a9 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -693,6 +693,31 @@ struct smb2_oplock_break {
 	__u64  VolatileFid;
 } __packed;
 
+#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
+
+struct smb2_lease_break {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 44 */
+	__le16 Reserved;
+	__le32 Flags;
+	__u8   LeaseKey[16];
+	__le32 CurrentLeaseState;
+	__le32 NewLeaseState;
+	__le32 BreakReason;
+	__le32 AccessMaskHint;
+	__le32 ShareMaskHint;
+} __packed;
+
+struct smb2_lease_ack {
+	struct smb2_hdr hdr;
+	__le16 StructureSize; /* Must be 36 */
+	__le16 Reserved;
+	__le32 Flags;
+	__u8   LeaseKey[16];
+	__le32 LeaseState;
+	__le64 LeaseDuration;
+} __packed;
+
 /*
  *	PDU infolevel structure definitions
  *	BB consider moving to a different header
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 8b4d3712255b..7d25f8b14f93 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -48,6 +48,8 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
 extern struct mid_q_entry *smb2_setup_async_request(
 			struct TCP_Server_Info *server, struct smb_rqst *rqst);
 extern void smb2_echo_request(struct work_struct *work);
+extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
+extern __u8 smb2_map_lease_to_oplock(__le32 lease_state);
 extern bool smb2_is_valid_oplock_break(char *buffer,
 				       struct TCP_Server_Info *srv);
 
@@ -151,5 +153,7 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
 		      const __u64 persist_fid, const __u64 volatile_fid,
 		      const __u32 pid, const __u32 num_lock,
 		      struct smb2_lock_element *buf);
+extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
+			    __u8 *lease_key, const __le32 lease_state);
 
 #endif			/* _SMB2PROTO_H */
-- 
cgit 


From 233839b1df65a24c8b67b748fe7b18d86d0ad6d7 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilovsky@etersoft.ru>
Date: Wed, 19 Sep 2012 06:22:45 -0700
Subject: CIFS: Fix fast lease break after open problem

Now we walk though cifsFileInfo's list for every incoming lease
break and look for an equivalent there. That approach misses lease
breaks that come just after an open response - we don't have time
to populate new cifsFileInfo structure to the list. Fix this by
adding new list of pending opens and look for a lease there if we
didn't find it in the list of cifsFileInfo structures.

Signed-off-by: Pavel Shilovsky <pshilovsky@etersoft.ru>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsglob.h  | 12 +++++++++
 fs/cifs/cifsproto.h |  7 +++++
 fs/cifs/connect.c   |  1 +
 fs/cifs/dir.c       |  9 ++++++-
 fs/cifs/file.c      | 35 ++++++++++++++++++++++---
 fs/cifs/misc.c      | 30 ++++++++++++++++++++++
 fs/cifs/smb2misc.c  | 74 +++++++++++++++++++++++++++++++++++++++++++++++++----
 7 files changed, 158 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b6ec142028e8..a39e5b7fc844 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -715,6 +715,7 @@ struct cifs_ses {
 	__u16 session_flags;
 #endif /* CONFIG_CIFS_SMB2 */
 };
+
 /* no more than one of the following three session flags may be set */
 #define CIFS_SES_NT4 1
 #define CIFS_SES_OS2 2
@@ -821,6 +822,7 @@ struct cifs_tcon {
 	u64 resource_id;		/* server resource id */
 	struct fscache_cookie *fscache;	/* cookie for share */
 #endif
+	struct list_head pending_opens;	/* list of incomplete opens */
 	/* BB add field for back pointer to sb struct(s)? */
 };
 
@@ -863,6 +865,15 @@ cifs_get_tlink(struct tcon_link *tlink)
 /* This function is always expected to succeed */
 extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb);
 
+#define CIFS_OPLOCK_NO_CHANGE 0xfe
+
+struct cifs_pending_open {
+	struct list_head olist;
+	struct tcon_link *tlink;
+	__u8 lease_key[16];
+	__u32 oplock;
+};
+
 /*
  * This info hangs off the cifsFileInfo structure, pointed to by llist.
  * This is used to track byte stream locks on the file
@@ -903,6 +914,7 @@ struct cifs_fid {
 	__u64 volatile_fid;	/* volatile file id for smb2 */
 	__u8 lease_key[SMB2_LEASE_KEY_SIZE];	/* lease key for smb2 */
 #endif
+	struct cifs_pending_open *pending_open;
 };
 
 struct cifs_fid_locks {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c758ee7b0307..09ea6321c55a 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -184,6 +184,13 @@ extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
 				    __u64 length, __u8 type,
 				    struct cifsLockInfo **conf_lock,
 				    bool rw_check);
+extern void cifs_add_pending_open(struct cifs_fid *fid,
+				  struct tcon_link *tlink,
+				  struct cifs_pending_open *open);
+extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
+					 struct tcon_link *tlink,
+					 struct cifs_pending_open *open);
+extern void cifs_del_pending_open(struct cifs_pending_open *open);
 
 #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
 extern void cifs_dfs_release_automount_timer(void);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 443e39633107..59c595e8a1b0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2645,6 +2645,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 	tcon->retry = volume_info->retry;
 	tcon->nocase = volume_info->nocase;
 	tcon->local_lease = volume_info->local_lease;
+	INIT_LIST_HEAD(&tcon->pending_opens);
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_add(&tcon->tcon_list, &ses->tcon_list);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4f2147c5adb6..7c0a81283645 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -382,6 +382,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
 	struct cifs_fid fid;
+	struct cifs_pending_open open;
 	__u32 oplock;
 	struct cifsFileInfo *file_info;
 
@@ -423,16 +424,21 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if (server->ops->new_lease_key)
 		server->ops->new_lease_key(&fid);
 
+	cifs_add_pending_open(&fid, tlink, &open);
+
 	rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
 			    &oplock, &fid, opened);
 
-	if (rc)
+	if (rc) {
+		cifs_del_pending_open(&open);
 		goto out;
+	}
 
 	rc = finish_open(file, direntry, generic_file_open, opened);
 	if (rc) {
 		if (server->ops->close)
 			server->ops->close(xid, tcon, &fid);
+		cifs_del_pending_open(&open);
 		goto out;
 	}
 
@@ -440,6 +446,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if (file_info == NULL) {
 		if (server->ops->close)
 			server->ops->close(xid, tcon, &fid);
+		cifs_del_pending_open(&open);
 		rc = -ENOMEM;
 	}
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e93e3d2c69e6..88e9c74e2cac 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -247,6 +247,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
 	struct cifsFileInfo *cfile;
 	struct cifs_fid_locks *fdlocks;
+	struct cifs_tcon *tcon = tlink_tcon(tlink);
 
 	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 	if (cfile == NULL)
@@ -274,10 +275,15 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	mutex_init(&cfile->fh_mutex);
-	tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
 
 	spin_lock(&cifs_file_list_lock);
-	list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
+	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
+		oplock = fid->pending_open->oplock;
+	list_del(&fid->pending_open->olist);
+
+	tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
+
+	list_add(&cfile->tlist, &tcon->openFileList);
 	/* if readable file instance put first in list*/
 	if (file->f_mode & FMODE_READ)
 		list_add(&cfile->flist, &cinode->openFileList);
@@ -307,9 +313,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
 	struct inode *inode = cifs_file->dentry->d_inode;
 	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsLockInfo *li, *tmp;
+	struct cifs_fid fid;
+	struct cifs_pending_open open;
 
 	spin_lock(&cifs_file_list_lock);
 	if (--cifs_file->count > 0) {
@@ -317,6 +326,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 		return;
 	}
 
+	if (server->ops->get_lease_key)
+		server->ops->get_lease_key(inode, &fid);
+
+	/* store open in pending opens to make sure we don't miss lease break */
+	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
+
 	/* remove it from the lists */
 	list_del(&cifs_file->flist);
 	list_del(&cifs_file->tlist);
@@ -348,6 +363,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 		free_xid(xid);
 	}
 
+	cifs_del_pending_open(&open);
+
 	/*
 	 * Delete any outstanding lock records. We'll lose them when the file
 	 * is closed anyway.
@@ -368,6 +385,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 }
 
 int cifs_open(struct inode *inode, struct file *file)
+
 {
 	int rc = -EACCES;
 	unsigned int xid;
@@ -380,6 +398,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	char *full_path = NULL;
 	bool posix_open_ok = false;
 	struct cifs_fid fid;
+	struct cifs_pending_open open;
 
 	xid = get_xid();
 
@@ -401,7 +420,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
 		 inode, file->f_flags, full_path);
 
-	if (tcon->ses->server->oplocks)
+	if (server->oplocks)
 		oplock = REQ_OPLOCK;
 	else
 		oplock = 0;
@@ -434,20 +453,28 @@ int cifs_open(struct inode *inode, struct file *file)
 		 */
 	}
 
+	if (server->ops->get_lease_key)
+		server->ops->get_lease_key(inode, &fid);
+
+	cifs_add_pending_open(&fid, tlink, &open);
+
 	if (!posix_open_ok) {
 		if (server->ops->get_lease_key)
 			server->ops->get_lease_key(inode, &fid);
 
 		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 				  file->f_flags, &oplock, &fid, xid);
-		if (rc)
+		if (rc) {
+			cifs_del_pending_open(&open);
 			goto out;
+		}
 	}
 
 	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 	if (cfile == NULL) {
 		if (server->ops->close)
 			server->ops->close(xid, tcon, &fid);
+		cifs_del_pending_open(&open);
 		rc = -ENOMEM;
 		goto out;
 	}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a921b0712eff..3a00c0d0cead 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -579,3 +579,33 @@ backup_cred(struct cifs_sb_info *cifs_sb)
 
 	return false;
 }
+
+void
+cifs_del_pending_open(struct cifs_pending_open *open)
+{
+	spin_lock(&cifs_file_list_lock);
+	list_del(&open->olist);
+	spin_unlock(&cifs_file_list_lock);
+}
+
+void
+cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink,
+			     struct cifs_pending_open *open)
+{
+#ifdef CONFIG_CIFS_SMB2
+	memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE);
+#endif
+	open->oplock = CIFS_OPLOCK_NO_CHANGE;
+	open->tlink = tlink;
+	fid->pending_open = open;
+	list_add_tail(&open->olist, &tlink_tcon(tlink)->pending_opens);
+}
+
+void
+cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
+		      struct cifs_pending_open *open)
+{
+	spin_lock(&cifs_file_list_lock);
+	cifs_add_pending_open_locked(fid, tlink, open);
+	spin_unlock(&cifs_file_list_lock);
+}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3a7f8bd5127d..cd31715f03f4 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -389,6 +389,27 @@ __u8 smb2_map_lease_to_oplock(__le32 lease_state)
 	return 0;
 }
 
+struct smb2_lease_break_work {
+	struct work_struct lease_break;
+	struct tcon_link *tlink;
+	__u8 lease_key[16];
+	__le32 lease_state;
+};
+
+static void
+cifs_ses_oplock_break(struct work_struct *work)
+{
+	struct smb2_lease_break_work *lw = container_of(work,
+				struct smb2_lease_break_work, lease_break);
+	int rc;
+
+	rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key,
+			      lw->lease_state);
+	cFYI(1, "Lease release rc %d", rc);
+	cifs_put_tlink(lw->tlink);
+	kfree(lw);
+}
+
 static bool
 smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
 {
@@ -398,6 +419,19 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
 	struct cifs_tcon *tcon;
 	struct cifsInodeInfo *cinode;
 	struct cifsFileInfo *cfile;
+	struct cifs_pending_open *open;
+	struct smb2_lease_break_work *lw;
+	bool found;
+	int ack_req = rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED;
+
+	lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
+	if (!lw) {
+		cERROR(1, "Memory allocation failed during lease break check");
+		return false;
+	}
+
+	INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
+	lw->lease_state = rsp->NewLeaseState;
 
 	cFYI(1, "Checking for lease break");
 
@@ -405,28 +439,29 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
 	spin_lock(&cifs_tcp_ses_lock);
 	list_for_each(tmp, &server->smb_ses_list) {
 		ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+
+		spin_lock(&cifs_file_list_lock);
 		list_for_each(tmp1, &ses->tcon_list) {
 			tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
 
 			cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
-			spin_lock(&cifs_file_list_lock);
 			list_for_each(tmp2, &tcon->openFileList) {
 				cfile = list_entry(tmp2, struct cifsFileInfo,
-						     tlist);
+						   tlist);
 				cinode = CIFS_I(cfile->dentry->d_inode);
 
 				if (memcmp(cinode->lease_key, rsp->LeaseKey,
 					   SMB2_LEASE_KEY_SIZE))
 					continue;
 
+				cFYI(1, "found in the open list");
 				cFYI(1, "lease key match, lease break 0x%d",
 				     le32_to_cpu(rsp->NewLeaseState));
 
 				smb2_set_oplock_level(cinode,
 				  smb2_map_lease_to_oplock(rsp->NewLeaseState));
 
-				if (rsp->Flags &
-				    SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED)
+				if (ack_req)
 					cfile->oplock_break_cancelled = false;
 				else
 					cfile->oplock_break_cancelled = true;
@@ -437,10 +472,39 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
 				spin_unlock(&cifs_tcp_ses_lock);
 				return true;
 			}
-			spin_unlock(&cifs_file_list_lock);
+
+			found = false;
+			list_for_each_entry(open, &tcon->pending_opens, olist) {
+				if (memcmp(open->lease_key, rsp->LeaseKey,
+					   SMB2_LEASE_KEY_SIZE))
+					continue;
+
+				if (!found && ack_req) {
+					found = true;
+					memcpy(lw->lease_key, open->lease_key,
+					       SMB2_LEASE_KEY_SIZE);
+					lw->tlink = cifs_get_tlink(open->tlink);
+					queue_work(cifsiod_wq,
+						   &lw->lease_break);
+				}
+
+				cFYI(1, "found in the pending open list");
+				cFYI(1, "lease key match, lease break 0x%d",
+				     le32_to_cpu(rsp->NewLeaseState));
+
+				open->oplock =
+				  smb2_map_lease_to_oplock(rsp->NewLeaseState);
+			}
+			if (found) {
+				spin_unlock(&cifs_file_list_lock);
+				spin_unlock(&cifs_tcp_ses_lock);
+				return true;
+			}
 		}
+		spin_unlock(&cifs_file_list_lock);
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
+	kfree(lw);
 	cFYI(1, "Can not process lease break - no lease matched");
 	return false;
 }
-- 
cgit 


From 101b92d9590a645d6fb643654b3a92556203b745 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 06:22:45 -0700
Subject: cifs: cleanups for cifs_mkdir_qinfo

Rename inode pointers for better clarity. Move the d_instantiate call to
the end of the function to prevent other tasks from seeing it before
we've finished constructing it. Since we should have exclusive access to
the inode at this point, remove the spinlock around i_nlink update.

Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 51 ++++++++++++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e74e1bceb416..3d155875f446 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1177,34 +1177,33 @@ unlink_out:
 }
 
 static int
-cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
+cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
 		 const char *full_path, struct cifs_sb_info *cifs_sb,
 		 struct cifs_tcon *tcon, const unsigned int xid)
 {
 	int rc = 0;
-	struct inode *newinode = NULL;
+	struct inode *inode = NULL;
 
 	if (tcon->unix_ext)
-		rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb,
+		rc = cifs_get_inode_info_unix(&inode, full_path, parent->i_sb,
 					      xid);
 	else
-		rc = cifs_get_inode_info(&newinode, full_path, NULL,
-					 inode->i_sb, xid, NULL);
+		rc = cifs_get_inode_info(&inode, full_path, NULL, parent->i_sb,
+					 xid, NULL);
+
 	if (rc)
 		return rc;
 
-	d_instantiate(dentry, newinode);
 	/*
 	 * setting nlink not necessary except in cases where we failed to get it
-	 * from the server or was set bogus
+	 * from the server or was set bogus. Also, since this is a brand new
+	 * inode, no need to grab the i_lock before setting the i_nlink.
 	 */
-	spin_lock(&dentry->d_inode->i_lock);
-	if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2))
-		set_nlink(dentry->d_inode, 2);
-	spin_unlock(&dentry->d_inode->i_lock);
+	if (inode->i_nlink < 2)
+		set_nlink(inode, 2);
 	mode &= ~current_umask();
 	/* must turn on setgid bit if parent dir has it */
-	if (inode->i_mode & S_ISGID)
+	if (parent->i_mode & S_ISGID)
 		mode |= S_ISGID;
 
 	if (tcon->unix_ext) {
@@ -1217,8 +1216,8 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
 		};
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
 			args.uid = (__u64)current_fsuid();
-			if (inode->i_mode & S_ISGID)
-				args.gid = (__u64)inode->i_gid;
+			if (parent->i_mode & S_ISGID)
+				args.gid = (__u64)parent->i_gid;
 			else
 				args.gid = (__u64)current_fsgid();
 		} else {
@@ -1233,22 +1232,20 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
 		struct TCP_Server_Info *server = tcon->ses->server;
 		if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
 		    (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo)
-			server->ops->mkdir_setinfo(newinode, full_path, cifs_sb,
+			server->ops->mkdir_setinfo(inode, full_path, cifs_sb,
 						   tcon, xid);
-		if (dentry->d_inode) {
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
-				dentry->d_inode->i_mode = (mode | S_IFDIR);
-
-			if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
-				dentry->d_inode->i_uid = current_fsuid();
-				if (inode->i_mode & S_ISGID)
-					dentry->d_inode->i_gid = inode->i_gid;
-				else
-					dentry->d_inode->i_gid =
-								current_fsgid();
-			}
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+			inode->i_mode = (mode | S_IFDIR);
+
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+			inode->i_uid = current_fsuid();
+			if (inode->i_mode & S_ISGID)
+				inode->i_gid = parent->i_gid;
+			else
+				inode->i_gid = current_fsgid();
 		}
 	}
+	d_instantiate(dentry, inode);
 	return rc;
 }
 
-- 
cgit 


From ecdb69e2cc80cca77d6afcc0aca244b72cc5ee68 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 19 Sep 2012 06:22:45 -0700
Subject: cifs: Mangle string used for unc in /proc/mounts

The string for "unc=" in /proc/mounts needs to be escaped. The current
behaviour can create problems in cases when mounting a share starting
with a number.

example:
>mount -t cifs -o username=test,password=x vm140-31:/17000-test /mnt
>mount -o remount,password=x /mnt
mount error: could not resolve address for vm140-31x00-test: Unknown
error

The sub-string "\170" which is part of the unc for the mount above in
/proc/mounts is interpreted as character'x' in the case above. Escaping
the string fixes the problem.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 28ac048d54ea..a41044a31083 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -350,7 +350,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	cifs_show_security(s, tcon->ses->server);
 	cifs_show_cache_flavor(s, cifs_sb);
 
-	seq_printf(s, ",unc=%s", tcon->treeName);
+	seq_printf(s, ",unc=");
+	seq_escape(s, tcon->treeName, " \t\n\\");
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
 		seq_printf(s, ",multiuser");
-- 
cgit 


From 3d6d854a13844223b603fd7a16a4a4a4afd62c72 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 06:22:46 -0700
Subject: cifs: add FL_CLOSE to fl_flags mask in cifs_read_flock

FL_CLOSE is quite common when you close a file on which you hold a
lock. The spurious "Unknown lock flags" message in cFYI is
confusing in this case.

Reported-by: Alexander Bokovoy <abokovoy@redhat.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 88e9c74e2cac..075f7cfd1da5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1173,7 +1173,8 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
 	if (flock->fl_flags & FL_LEASE)
 		cFYI(1, "Lease on file - not implemented yet");
 	if (flock->fl_flags &
-	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
+	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
+	       FL_ACCESS | FL_LEASE | FL_CLOSE)))
 		cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
 
 	*type = server->vals->large_lock_type;
-- 
cgit 


From 5efeb0970794933f86af70a0fa8282e9255a1ca2 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 19 Sep 2012 06:22:46 -0700
Subject: Update cifs version number

With SMB2 support, update from version 1.79 to 2.0 to make
it easier for users to recognize which version has SMB2 support.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/cifs/cifsfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 1c49c5a9b27a..7163419cecd9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -128,5 +128,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "1.78"
+#define CIFS_VERSION   "2.0"
 #endif				/* _CIFSFS_H */
-- 
cgit 


From ba02e89915afcfc9a071a86e5cae32f77c7d353a Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 19 Sep 2012 06:22:46 -0700
Subject: MARK SMB2 support EXPERIMENTAL

Now that the merge of the remaining pieces needed for
SMB2 (SMB2.1 dialect) are in, and most test cases pass,
we can consider SMB2.1 EXPERIMENTAL rather than "BROKEN."

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 802930179c2b..253033ec0904 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -162,7 +162,7 @@ config CIFS_NFSD_EXPORT
 
 config CIFS_SMB2
 	bool "SMB2 network file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && INET && BROKEN
+	depends on EXPERIMENTAL && INET
 	select NLS
 	select KEYS
 	select FSCACHE
-- 
cgit 


From 12e8a20824677fbc24e921d7aebfda6a47cc25b1 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 19 Sep 2012 09:19:39 -0700
Subject: Trivial endian fixes

Some trivial endian fixes for the SMB2 code. One
warning remains which I asked Pavel to look at.

Reviewed-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2misc.c | 5 +++--
 fs/cifs/smb2ops.c  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index cd31715f03f4..7b1c5e3287fb 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -422,7 +422,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
 	struct cifs_pending_open *open;
 	struct smb2_lease_break_work *lw;
 	bool found;
-	int ack_req = rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED;
+	int ack_req = le32_to_cpu(rsp->Flags &
+				  SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED);
 
 	lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
 	if (!lw) {
@@ -524,7 +525,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 	if (rsp->hdr.Command != SMB2_OPLOCK_BREAK)
 		return false;
 
-	if (le16_to_cpu(rsp->StructureSize) !=
+	if (rsp->StructureSize !=
 				smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
 		if (le16_to_cpu(rsp->StructureSize) == 44)
 			return smb2_is_valid_lease_break(buffer, server);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 630156f98cc7..2183bb343edd 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -496,7 +496,7 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
 {
 	struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
 
-	if (le32_to_cpu(hdr->Status) != STATUS_PENDING)
+	if (hdr->Status != STATUS_PENDING)
 		return false;
 
 	if (!length) {
-- 
cgit 


From e4e3703555b6eb1afa0be86a45cd3c8bccb6cb08 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Wed, 19 Sep 2012 12:36:52 +0400
Subject: CIFS: Fix endian conversion of IndexNumber

by making it __le64 rather than __u64 in FILE_AL_INFO structure.

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifspdu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 3fb03e2c8e86..b9d59a948a2c 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2210,7 +2210,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
 	__u8 DeletePending;
 	__u8 Directory;
 	__u16 Pad2;
-	__u64 IndexNumber;
+	__le64 IndexNumber;
 	__le32 EASize;
 	__le32 AccessFlags;
 	__u64 IndexNumber1;
-- 
cgit 


From e5d04887196ee30423c79e52043d418e04012954 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Wed, 19 Sep 2012 16:03:26 +0400
Subject: CIFS: Fix possible memory leaks in SMB2 code

and add missed increments of failed async read and write requests.

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1572abefb378..a7db95f4760c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1218,13 +1218,13 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
 	iov[0].iov_len = get_rfc1002_length(req) + 4;
 
 	rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
+	rsp = (struct smb2_query_info_rsp *)iov[0].iov_base;
+
 	if (rc) {
 		cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
 		goto qinf_exit;
 	}
 
-	rsp = (struct smb2_query_info_rsp *)iov[0].iov_base;
-
 	rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset),
 				   le32_to_cpu(rsp->OutputBufferLength),
 				   &rsp->hdr, min_len, data);
@@ -1485,8 +1485,10 @@ smb2_async_readv(struct cifs_readdata *rdata)
 	rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
 			     cifs_readv_receive, smb2_readv_callback,
 			     rdata, 0);
-	if (rc)
+	if (rc) {
 		kref_put(&rdata->refcount, cifs_readdata_release);
+		cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
+	}
 
 	cifs_small_buf_release(buf);
 	return rc;
@@ -1643,8 +1645,10 @@ smb2_async_writev(struct cifs_writedata *wdata)
 	rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
 				smb2_writev_callback, wdata, 0);
 
-	if (rc)
+	if (rc) {
 		kref_put(&wdata->refcount, cifs_writedata_release);
+		cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
+	}
 
 async_writev_out:
 	cifs_small_buf_release(req);
@@ -1700,15 +1704,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
 
 	rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1,
 			  &resp_buftype, 0);
+	rsp = (struct smb2_write_rsp *)iov[0].iov_base;
 
 	if (rc) {
 		cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
 		cERROR(1, "Send error in write = %d", rc);
-	} else {
-		rsp = (struct smb2_write_rsp *)iov[0].iov_base;
+	} else
 		*nbytes = le32_to_cpu(rsp->DataLength);
-		free_rsp_buf(resp_buftype, rsp);
-	}
+
+	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
 
@@ -1828,11 +1832,12 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
 	inc_rfc1001_len(req, len - 1 /* Buffer */);
 
 	rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0);
+	rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
+
 	if (rc) {
 		cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
 		goto qdir_exit;
 	}
-	rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
 
 	rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset),
 			  le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr,
-- 
cgit 


From 52b0c3427e5baf14f1ffb92cf8ae542160ec6a07 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 15:20:27 -0700
Subject: cifs: remove support for CIFS_IOC_CHECKUMOUNT ioctl

...as promised for 3.7.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/ioctl.c | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 5b3481bd3d96..fd5009d56f9f 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -28,8 +28,6 @@
 #include "cifs_debug.h"
 #include "cifsfs.h"
 
-#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
-
 long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 {
 	struct inode *inode = filep->f_dentry->d_inode;
@@ -51,23 +49,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 	cifs_sb = CIFS_SB(inode->i_sb);
 
 	switch (command) {
-		static bool warned = false;
-		case CIFS_IOC_CHECKUMOUNT:
-			if (!warned) {
-				warned = true;
-				cERROR(1, "the CIFS_IOC_CHECKMOUNT ioctl will "
-					  "be deprecated in 3.7. Please "
-					  "migrate away from the use of "
-					  "umount.cifs");
-			}
-			cFYI(1, "User unmount attempted");
-			if (cifs_sb->mnt_uid == current_uid())
-				rc = 0;
-			else {
-				rc = -EACCES;
-				cFYI(1, "uids do not match");
-			}
-			break;
 #ifdef CONFIG_CIFS_POSIX
 		case FS_IOC_GETFLAGS:
 			if (pSMBFile == NULL)
-- 
cgit 


From 1b359204901e182b27aa9da432095cbe2cfd2512 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 19 Sep 2012 15:20:27 -0700
Subject: cifs: remove support for deprecated "forcedirectio" and "strictcache"
 mount options

...and make the default cache=strict as promised for 3.7.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/connect.c | 37 ++++---------------------------------
 1 file changed, 4 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 59c595e8a1b0..a792282f02f7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -82,8 +82,7 @@ enum {
 	Opt_serverino, Opt_noserverino,
 	Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl,
 	Opt_acl, Opt_noacl, Opt_locallease,
-	Opt_sign, Opt_seal, Opt_direct,
-	Opt_strictcache, Opt_noac,
+	Opt_sign, Opt_seal, Opt_noac,
 	Opt_fsc, Opt_mfsymlinks,
 	Opt_multiuser, Opt_sloppy,
 
@@ -160,10 +159,6 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_locallease, "locallease" },
 	{ Opt_sign, "sign" },
 	{ Opt_seal, "seal" },
-	{ Opt_direct, "direct" },
-	{ Opt_direct, "directio" },
-	{ Opt_direct, "forcedirectio" },
-	{ Opt_strictcache, "strictcache" },
 	{ Opt_noac, "noac" },
 	{ Opt_fsc, "fsc" },
 	{ Opt_mfsymlinks, "mfsymlinks" },
@@ -1105,8 +1100,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 	char *string = NULL;
 	char *tmp_end, *value;
 	char delim;
-	bool cache_specified = false;
-	static bool cache_warned = false;
 
 	separator[0] = ',';
 	separator[1] = 0;
@@ -1138,6 +1131,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 	/* default to using server inode numbers where available */
 	vol->server_ino = 1;
 
+	/* default is to use strict cifs caching semantics */
+	vol->strict_io = true;
+
 	vol->actimeo = CIFS_DEF_ACTIMEO;
 
 	/* FIXME: add autonegotiation -- for now, SMB1 is default */
@@ -1321,22 +1317,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			 */
 			vol->seal = 1;
 			break;
-		case Opt_direct:
-			cache_specified = true;
-			vol->direct_io = true;
-			vol->strict_io = false;
-			cERROR(1, "The \"directio\" option will be removed in "
-				  "3.7. Please switch to the \"cache=none\" "
-				  "option.");
-			break;
-		case Opt_strictcache:
-			cache_specified = true;
-			vol->direct_io = false;
-			vol->strict_io = true;
-			cERROR(1, "The \"strictcache\" option will be removed "
-				"in 3.7. Please switch to the \"cache=strict\" "
-				"option.");
-			break;
 		case Opt_noac:
 			printk(KERN_WARNING "CIFS: Mount option noac not "
 				"supported. Instead set "
@@ -1771,7 +1751,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 				goto cifs_parse_mount_err;
 			break;
 		case Opt_cache:
-			cache_specified = true;
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
@@ -1822,14 +1801,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 		printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
 				   "specified with no gid= option.\n");
 
-	/* FIXME: remove this block in 3.7 */
-	if (!cache_specified && !cache_warned) {
-		cache_warned = true;
-		printk(KERN_NOTICE "CIFS: no cache= option specified, using "
-				   "\"cache=loose\". This default will change "
-				   "to \"cache=strict\" in 3.7.\n");
-	}
-
 	kfree(mountdata_copy);
 	return 0;
 
-- 
cgit 


From 142e5460a66edbfe881474eb422e86ff61c4bfc7 Mon Sep 17 00:00:00 2001
From: Jaeden Amero <jaeden.amero@ni.com>
Date: Mon, 24 Sep 2012 10:39:45 -0500
Subject: compat_ioctl: Avoid using undefined RS-485 IOCTLs

Wrap the use of TIOCSRS485 and TIOCGRS485 in #ifdef so that we avoid
adding undefined IOCTLs to the ioctl pointer list as compatible
ioctls.

This change was motivated by a build error on a MIPS build.

tree:   git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
tty-next
head:   ac57e7f38ea6fe7358cd0b7a2f2d21aef5ab70cd
commit: 84c3b84860440a9e3a3666c14112f41311b8f623 [10/16] compat_ioctl:
  Add RS-485 IOCTLs to the list
config: mips-fuloong2e_defconfig

All related error/warning messages:

fs/compat_ioctl.c:869:1: error: 'TIOCSRS485' undeclared here (not in a
  function)
fs/compat_ioctl.c:870:1: error: 'TIOCGRS485' undeclared here (not in a
  function)

vim +869 fs/compat_ioctl.c
863 COMPATIBLE_IOCTL(TIOCSPGRP)
864 COMPATIBLE_IOCTL(TIOCGPGRP)
865 COMPATIBLE_IOCTL(TIOCGPTN)
866 COMPATIBLE_IOCTL(TIOCSPTLCK)
867 COMPATIBLE_IOCTL(TIOCSERGETLSR)
868 COMPATIBLE_IOCTL(TIOCSIG)
> 869 COMPATIBLE_IOCTL(TIOCSRS485)
870 COMPATIBLE_IOCTL(TIOCGRS485)
871 #ifdef TCGETS2
872 COMPATIBLE_IOCTL(TCGETS2)

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jaeden Amero <jaeden.amero@ni.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/compat_ioctl.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 85dfebfe6820..59f8db4a39a7 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -866,8 +866,12 @@ COMPATIBLE_IOCTL(TIOCGPTN)
 COMPATIBLE_IOCTL(TIOCSPTLCK)
 COMPATIBLE_IOCTL(TIOCSERGETLSR)
 COMPATIBLE_IOCTL(TIOCSIG)
+#ifdef TIOCSRS485
 COMPATIBLE_IOCTL(TIOCSRS485)
+#endif
+#ifdef TIOCGRS485
 COMPATIBLE_IOCTL(TIOCGRS485)
+#endif
 #ifdef TCGETS2
 COMPATIBLE_IOCTL(TCGETS2)
 COMPATIBLE_IOCTL(TCSETS2)
-- 
cgit 


From 4f2b86aba87a2654a1258ffe09c22ce70ab69d60 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 24 Sep 2012 09:33:40 -0400
Subject: cifs: change DOS/NT/POSIX mapping of ERRnoresource

ERRnoresource is an ERRSRV level (aka server-side) error and means "No
resources currently available for request". Currently that maps to POSIX
-ENOBUFS. No NT errors map to it currently.

NT_STATUS_INSUFFICIENT_RESOURCES and NT_STATUS_INSUFF_SERVER_RESOURCES
are also similar in meaning. Currently the client maps those to
ERRnomem, which maps to -ENOMEM in POSIX.

All of these mappings seem to be quite wrong to me and are confusing for
users. All of the above errors indicate problems on the server, not the
client. Reporting -ENOMEM or -ENOBUFS implies that the client is running
out of resources.

This patch changes those mappings. The NT_* errors are changed to map to
the SRV level ERRnoresource. That error is in turn changed to return
-EREMOTEIO which is the only POSIX error I could find that conveys that
something went wrong on the server. While we're at it, change the SMB2
equivalent error to return the same.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Suresh Jayaraman <sjayaraman@suse.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/netmisc.c      | 6 +++---
 fs/cifs/smb2maperror.c | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index e7bab3be5cf9..d5ce9e26696c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -110,7 +110,7 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
 	{ERRnoroom, -ENOSPC},
 	{ERRrmuns, -EUSERS},
 	{ERRtimeout, -ETIME},
-	{ERRnoresource, -ENOBUFS},
+	{ERRnoresource, -EREMOTEIO},
 	{ERRtoomanyuids, -EUSERS},
 	{ERRbaduid, -EACCES},
 	{ERRusempx, -EIO},
@@ -412,7 +412,7 @@ static const struct {
 	 from NT_STATUS_INSUFFICIENT_RESOURCES to
 	 NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */
 	{
-	ERRDOS, ERRnomem, NT_STATUS_INSUFFICIENT_RESOURCES}, {
+	ERRDOS, ERRnoresource, NT_STATUS_INSUFFICIENT_RESOURCES}, {
 	ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, {
 	ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, {
 	ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, {
@@ -682,7 +682,7 @@ static const struct {
 	ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, {
 	ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, {
-	ERRDOS, ERRnomem, NT_STATUS_INSUFF_SERVER_RESOURCES}, {
+	ERRDOS, ERRnoresource, NT_STATUS_INSUFF_SERVER_RESOURCES}, {
 	ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, {
 	ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, {
 	ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, {
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index eaf5466d4041..494c912c76fe 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -453,7 +453,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	{STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"},
 	{STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO,
 	"STATUS_ALLOTTED_SPACE_EXCEEDED"},
-	{STATUS_INSUFFICIENT_RESOURCES, -EIO, "STATUS_INSUFFICIENT_RESOURCES"},
+	{STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO,
+				"STATUS_INSUFFICIENT_RESOURCES"},
 	{STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"},
 	{STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"},
 	{STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"},
-- 
cgit 


From c3a58fecdd1934a8538ada9073107625f5151687 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Fri, 17 Aug 2012 18:19:38 -0300
Subject: Make inode64 a remountable option

Actually, there is no reason about why a user must umount and mount a
XFS filesystem to enable 'inode64' option. So, this patch makes this a
remountable option.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 19e2380fb867..c416a01fcb14 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -120,12 +120,13 @@ mempool_t *xfs_ioend_pool;
  * in the future, too.
  */
 enum {
-	Opt_barrier, Opt_nobarrier, Opt_err
+	Opt_barrier, Opt_nobarrier, Opt_inode64, Opt_err
 };
 
 static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
+	{Opt_inode64, "inode64"},
 	{Opt_err, NULL}
 };
 
@@ -1031,6 +1032,30 @@ xfs_restore_resvblks(struct xfs_mount *mp)
 	xfs_reserve_blocks(mp, &resblks, NULL);
 }
 
+STATIC void
+xfs_set_inode64(struct xfs_mount *mp)
+{
+	int i = 0;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		struct xfs_perag	*pag;
+
+		pag = xfs_perag_get(mp, i);
+		pag->pagi_inodeok = 1;
+		pag->pagf_metadata = 0;
+		xfs_perag_put(pag);
+	}
+
+	/* There is no need for lock protection on m_flags,
+	 * the rw_semaphore of the VFS superblock is locked
+	 * during mount/umount/remount operations, so this is
+	 * enough to avoid concurency on the m_flags field
+	 */
+	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
+			 XFS_MOUNT_SMALL_INUMS);
+	mp->m_maxagi = i;
+}
+
 STATIC int
 xfs_fs_remount(
 	struct super_block	*sb,
@@ -1056,6 +1081,9 @@ xfs_fs_remount(
 		case Opt_nobarrier:
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 			break;
+		case Opt_inode64:
+			xfs_set_inode64(mp);
+			break;
 		default:
 			/*
 			 * Logically we would return an error here to prevent
-- 
cgit 


From 8aea3ff411b2ce8fe7b46644298ed243a920eb24 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:36 -0300
Subject: xfs: Fix m_agirotor reset during AG selection

xfs_ialloc_next_ag() currently resets m_agirotor when it is equal to
m_maxagi:

         if (++mp->m_agirotor == mp->m_maxagi)
	         mp->m_agirotor = 0;

But, if for some reason mp->m_maxagi changes to a lower value than
current m_agirotor, this condition will never be true, causing
m_agirotor to exceed the maximum allowed value (m_maxagi).

This implies mainly during lookups for xfs_perag structs in its radix
tree, since the agno value used for the lookup is based on m_agirotor.
An out-of-range m_agirotor may cause a lookup failure which in case will
return NULL.

As an example, the value of m_maxagi is decreased during
inode64->inode32 remount process, case where I've found this problem.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_ialloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5aceb3f8ecd6..445bf1aef31c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -431,7 +431,7 @@ xfs_ialloc_next_ag(
 
 	spin_lock(&mp->m_agirotor_lock);
 	agno = mp->m_agirotor;
-	if (++mp->m_agirotor == mp->m_maxagi)
+	if (++mp->m_agirotor >= mp->m_maxagi)
 		mp->m_agirotor = 0;
 	spin_unlock(&mp->m_agirotor_lock);
 
-- 
cgit 


From 08bf540412ed82a15cb9068249ad49b410a7b082 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:37 -0300
Subject: xfs: make inode64 as the default allocation mode

since 64-bit inodes can be accessed while using inode32, and these can
also be used on 32-bit kernels, there is no reason to still keep inode32
as the default mount option.  If the filesystem cannot handle 64bit
inode numbers (i.e CONFIG_LBDAF is not enabled and BITS_PER_LONG == 32),
XFS_MOUNT_SMALL_INUMS will still be set by default, so inode64 is not an
unconditional default value.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c416a01fcb14..996257d36fd1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool;
 					 * unwritten extent conversion */
 #define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
 #define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
+#define MNTOPT_32BITINODE   "inode32"	/* inode allocation limited to
+					 * XFS_MAXINUMBER_32 */
 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
 #define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
 #define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
@@ -198,7 +200,9 @@ xfs_parseargs(
 	 */
 	mp->m_flags |= XFS_MOUNT_BARRIER;
 	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+#if !XFS_BIG_INUMS
 	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+#endif
 
 	/*
 	 * These can be overridden by the mount option parsing.
@@ -295,6 +299,8 @@ xfs_parseargs(
 				return EINVAL;
 			}
 			dswidth = simple_strtoul(value, &eov, 10);
+		} else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
 #if !XFS_BIG_INUMS
@@ -493,6 +499,7 @@ xfs_showargs(
 		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
 		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
 		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
+		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_32BITINODE },
 		{ 0, NULL }
 	};
 	static struct proc_xfs_info xfs_info_unset[] = {
-- 
cgit 


From 2d2194f61fddab3a9731b6e7a7ae3a4a19dd810c Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:38 -0300
Subject: xfs: reduce code duplication handling inode32/64 options

Add xfs_set_inode32() to be used to enable inode32 allocation mode. this
will reduce the amount of duplicated code needed to mount/remount a
filesystem with inode32 option.  This patch also changes
xfs_set_inode64() to return the maximum AG number that inodes can be
allocated instead of set mp->m_maxagi by itself, so that the behaviour
is the same as xfs_set_inode32().  This simplifies code that calls these
functions and needs to know the maximum AG that inodes can be allocated
in.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_mount.c | 43 +++-----------------------
 fs/xfs/xfs_super.c | 89 +++++++++++++++++++++++++++++++++++++++---------------
 fs/xfs/xfs_super.h |  2 ++
 3 files changed, 72 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 29c2f83d4147..b2bd3a0e6376 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -440,7 +440,7 @@ xfs_initialize_perag(
 	xfs_agnumber_t	agcount,
 	xfs_agnumber_t	*maxagi)
 {
-	xfs_agnumber_t	index, max_metadata;
+	xfs_agnumber_t	index;
 	xfs_agnumber_t	first_initialised = 0;
 	xfs_perag_t	*pag;
 	xfs_agino_t	agino;
@@ -500,43 +500,10 @@ xfs_initialize_perag(
 	else
 		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
 
-	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-		/*
-		 * Calculate how much should be reserved for inodes to meet
-		 * the max inode percentage.
-		 */
-		if (mp->m_maxicount) {
-			__uint64_t	icount;
-
-			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
-			do_div(icount, 100);
-			icount += sbp->sb_agblocks - 1;
-			do_div(icount, sbp->sb_agblocks);
-			max_metadata = icount;
-		} else {
-			max_metadata = agcount;
-		}
-
-		for (index = 0; index < agcount; index++) {
-			ino = XFS_AGINO_TO_INO(mp, index, agino);
-			if (ino > XFS_MAXINUMBER_32) {
-				index++;
-				break;
-			}
-
-			pag = xfs_perag_get(mp, index);
-			pag->pagi_inodeok = 1;
-			if (index < max_metadata)
-				pag->pagf_metadata = 1;
-			xfs_perag_put(pag);
-		}
-	} else {
-		for (index = 0; index < agcount; index++) {
-			pag = xfs_perag_get(mp, index);
-			pag->pagi_inodeok = 1;
-			xfs_perag_put(pag);
-		}
-	}
+	if (mp->m_flags & XFS_MOUNT_32BITINODES)
+		index = xfs_set_inode32(mp);
+	else
+		index = xfs_set_inode64(mp);
 
 	if (maxagi)
 		*maxagi = index;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 996257d36fd1..d6619d685531 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -599,6 +599,71 @@ xfs_max_file_offset(
 	return (((__uint64_t)pagefactor) << bitshift) - 1;
 }
 
+xfs_agnumber_t
+xfs_set_inode32(struct xfs_mount *mp)
+{
+	xfs_agnumber_t	index = 0;
+	xfs_sb_t	*sbp = &mp->m_sb;
+	xfs_agnumber_t	max_metadata;
+	xfs_agino_t	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
+	xfs_ino_t	ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
+	xfs_perag_t	*pag;
+
+	/* Calculate how much should be reserved for inodes to meet
+	 * the max inode percentage.
+	 */
+	if (mp->m_maxicount) {
+		__uint64_t	icount;
+
+		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+		do_div(icount, 100);
+		icount += sbp->sb_agblocks - 1;
+		do_div(icount, sbp->sb_agblocks);
+		max_metadata = icount;
+	} else {
+		max_metadata = sbp->sb_agcount;
+	}
+
+	for (index = 0; index < sbp->sb_agcount; index++) {
+		ino = XFS_AGINO_TO_INO(mp, index, agino);
+		if (ino > XFS_MAXINUMBER_32) {
+			index++;
+			break;
+		}
+
+		pag = xfs_perag_get(mp, index);
+		pag->pagi_inodeok = 1;
+		if (index < max_metadata)
+			pag->pagf_metadata = 1;
+		xfs_perag_put(pag);
+	}
+	return index;
+}
+
+xfs_agnumber_t
+xfs_set_inode64(struct xfs_mount *mp)
+{
+	xfs_agnumber_t index = 0;
+
+	for (index = 0; index < mp->m_sb.sb_agcount; index++) {
+		struct xfs_perag	*pag;
+
+		pag = xfs_perag_get(mp, index);
+		pag->pagi_inodeok = 1;
+		pag->pagf_metadata = 0;
+		xfs_perag_put(pag);
+	}
+
+	/* There is no need for lock protection on m_flags,
+	 * the rw_semaphore of the VFS superblock is locked
+	 * during mount/umount/remount operations, so this is
+	 * enough to avoid concurency on the m_flags field
+	 */
+	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
+			 XFS_MOUNT_SMALL_INUMS);
+	return index;
+}
+
 STATIC int
 xfs_blkdev_get(
 	xfs_mount_t		*mp,
@@ -1039,30 +1104,6 @@ xfs_restore_resvblks(struct xfs_mount *mp)
 	xfs_reserve_blocks(mp, &resblks, NULL);
 }
 
-STATIC void
-xfs_set_inode64(struct xfs_mount *mp)
-{
-	int i = 0;
-
-	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
-		struct xfs_perag	*pag;
-
-		pag = xfs_perag_get(mp, i);
-		pag->pagi_inodeok = 1;
-		pag->pagf_metadata = 0;
-		xfs_perag_put(pag);
-	}
-
-	/* There is no need for lock protection on m_flags,
-	 * the rw_semaphore of the VFS superblock is locked
-	 * during mount/umount/remount operations, so this is
-	 * enough to avoid concurency on the m_flags field
-	 */
-	mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
-			 XFS_MOUNT_SMALL_INUMS);
-	mp->m_maxagi = i;
-}
-
 STATIC int
 xfs_fs_remount(
 	struct super_block	*sb,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 09b0c26b2245..9de4a920ba05 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -75,6 +75,8 @@ struct block_device;
 extern __uint64_t xfs_max_file_offset(unsigned int);
 
 extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
+extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
 
 extern const struct export_operations xfs_export_operations;
 extern const struct xattr_handler *xfs_xattr_handlers[];
-- 
cgit 


From 4c0837224c677db35cd85b04a77504c496cadb66 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:39 -0300
Subject: xfs: Fix mp->m_maxagi update during inode64 remount

With the changes made on xfs_set_inode64(), to make it behave as
xfs_set_inode32() (now leaving to the caller the responsibility to update
mp->m_maxagi), we use the return value of xfs_set_inode64() to update
mp->m_maxagi during remount.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d6619d685531..aeb03f9a8967 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1130,7 +1130,7 @@ xfs_fs_remount(
 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
 			break;
 		case Opt_inode64:
-			xfs_set_inode64(mp);
+			mp->m_maxagi = xfs_set_inode64(mp);
 			break;
 		default:
 			/*
-- 
cgit 


From 4056c1d08d2a7c50ae7414db7c1783ba45b4835d Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:40 -0300
Subject: xfs: add inode64->inode32 transition into xfs_set_inode32()

To make inode32 a remountable option, xfs_set_inode32() should be able
to make a transition from inode64 option, disabling inode allocation on
higher AGs.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index aeb03f9a8967..168d4984ce89 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -603,6 +603,7 @@ xfs_agnumber_t
 xfs_set_inode32(struct xfs_mount *mp)
 {
 	xfs_agnumber_t	index = 0;
+	xfs_agnumber_t	maxagi = 0;
 	xfs_sb_t	*sbp = &mp->m_sb;
 	xfs_agnumber_t	max_metadata;
 	xfs_agino_t	agino =	XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
@@ -626,18 +627,26 @@ xfs_set_inode32(struct xfs_mount *mp)
 
 	for (index = 0; index < sbp->sb_agcount; index++) {
 		ino = XFS_AGINO_TO_INO(mp, index, agino);
+
 		if (ino > XFS_MAXINUMBER_32) {
-			index++;
-			break;
+			pag = xfs_perag_get(mp, index);
+			pag->pagi_inodeok = 0;
+			pag->pagf_metadata = 0;
+			xfs_perag_put(pag);
+			continue;
 		}
 
 		pag = xfs_perag_get(mp, index);
 		pag->pagi_inodeok = 1;
+		maxagi++;
 		if (index < max_metadata)
 			pag->pagf_metadata = 1;
 		xfs_perag_put(pag);
 	}
-	return index;
+	mp->m_flags |= (XFS_MOUNT_32BITINODES |
+			XFS_MOUNT_SMALL_INUMS);
+
+	return maxagi;
 }
 
 xfs_agnumber_t
-- 
cgit 


From 2ea0392983a82f7dc3055568ae0f2558724d119b Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Thu, 20 Sep 2012 10:32:41 -0300
Subject: xfs: Make inode32 a remountable option

As inode64 is the default option now, and was also made remountable
previously, inode32 can also be remounted on-the-fly when it is needed.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
---
 fs/xfs/xfs_super.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 168d4984ce89..d93f2c7364cc 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -122,13 +122,18 @@ mempool_t *xfs_ioend_pool;
  * in the future, too.
  */
 enum {
-	Opt_barrier, Opt_nobarrier, Opt_inode64, Opt_err
+	Opt_barrier,
+	Opt_nobarrier,
+	Opt_inode64,
+	Opt_inode32,
+	Opt_err
 };
 
 static const match_table_t tokens = {
 	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_inode64, "inode64"},
+	{Opt_inode32, "inode32"},
 	{Opt_err, NULL}
 };
 
@@ -1141,6 +1146,9 @@ xfs_fs_remount(
 		case Opt_inode64:
 			mp->m_maxagi = xfs_set_inode64(mp);
 			break;
+		case Opt_inode32:
+			mp->m_maxagi = xfs_set_inode32(mp);
+			break;
 		default:
 			/*
 			 * Logically we would return an error here to prevent
-- 
cgit 


From 1a7bd2265fc57f29400d57f66275cc5918e30aa6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Aug 2012 17:18:05 -0400
Subject: make get_unused_fd_flags() a function

... and get_unused_fd() a macro around it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index ba3f6053025c..5b46e9970a7a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -477,8 +477,8 @@ out:
 	return error;
 }
 
-int get_unused_fd(void)
+int get_unused_fd_flags(unsigned flags)
 {
-	return alloc_fd(0, 0);
+	return alloc_fd(0, flags);
 }
-EXPORT_SYMBOL(get_unused_fd);
+EXPORT_SYMBOL(get_unused_fd_flags);
-- 
cgit 


From c921b40d6201f7ec7b1edf7ea9a844f93e1a27f4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Aug 2012 18:04:37 -0400
Subject: autofs4: don't open-code fd_install()

The only difference between autofs_dev_ioctl_fd_install() and
fd_install() is __set_close_on_exec() done by the latter.  Just
use get_unused_fd_flags(O_CLOEXEC) to allocate the descriptor
and be done with that...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/autofs4/dev-ioctl.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index abf645c1703b..a16214109d31 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p)
 	return ino && ino->sbi->type & *(unsigned *)p;
 }
 
-static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
-{
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	BUG_ON(fdt->fd[fd] != NULL);
-	rcu_assign_pointer(fdt->fd[fd], file);
-	__set_close_on_exec(fd, fdt);
-	spin_unlock(&files->file_lock);
-}
-
-
 /*
  * Open a file descriptor on the autofs mount point corresponding
  * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
 {
 	int err, fd;
 
-	fd = get_unused_fd();
+	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (likely(fd >= 0)) {
 		struct file *filp;
 		struct path path;
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
 			goto out;
 		}
 
-		autofs_dev_ioctl_fd_install(fd, filp);
+		fd_install(fd, filp);
 	}
 
 	return fd;
-- 
cgit 


From 5b249b1b07c42c61d0c53b8ab4fad026e39a9be9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 19 Aug 2012 12:17:29 -0400
Subject: pipe(2) - race-free error recovery

don't mess with sys_close() if copy_to_user() fails; just postpone
fd_install() until we know it hasn't.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index 8d85d7068c1e..bd3479db4b62 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1064,9 +1064,8 @@ err_inode:
 	return err;
 }
 
-int do_pipe_flags(int *fd, int flags)
+static int __do_pipe_flags(int *fd, struct file **files, int flags)
 {
-	struct file *files[2];
 	int error;
 	int fdw, fdr;
 
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags)
 	fdw = error;
 
 	audit_fd_pair(fdr, fdw);
-	fd_install(fdr, files[0]);
-	fd_install(fdw, files[1]);
 	fd[0] = fdr;
 	fd[1] = fdw;
-
 	return 0;
 
  err_fdr:
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags)
 	return error;
 }
 
+int do_pipe_flags(int *fd, int flags)
+{
+	struct file *files[2];
+	int error = __do_pipe_flags(fd, files, flags);
+	if (!error) {
+		fd_install(fd[0], files[0]);
+		fd_install(fd[1], files[1]);
+	}
+	return error;
+}
+
 /*
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way Unix traditionally does this, though.
  */
 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
 {
+	struct file *files[2];
 	int fd[2];
 	int error;
 
-	error = do_pipe_flags(fd, flags);
+	error = __do_pipe_flags(fd, files, flags);
 	if (!error) {
-		if (copy_to_user(fildes, fd, sizeof(fd))) {
-			sys_close(fd[0]);
-			sys_close(fd[1]);
+		if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
+			fput(files[0]);
+			fput(files[1]);
+			put_unused_fd(fd[0]);
+			put_unused_fd(fd[1]);
 			error = -EFAULT;
+		} else {
+			fd_install(fd[0], files[0]);
+			fd_install(fd[1], files[1]);
 		}
 	}
 	return error;
-- 
cgit 


From 352e3b249284235e00745f3e71fc348b913e5deb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 19 Aug 2012 12:30:45 -0400
Subject: fanotify: sanitize failure exits in copy_event_to_user()

* do copy_to_user() before prepare_for_access_response(); that kills
the need in remove_access_response().
* don't do fd_install() until we are past the last possible failure
exit.  Don't use sys_close() on cleanup side - just put_unused_fd()
and fput().  Less racy that way...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/fanotify/fanotify_user.c | 59 +++++++++++++-------------------------
 1 file changed, 20 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d43803669739..ea48693940f1 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 	return fsnotify_remove_notify_event(group);
 }
 
-static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
+static int create_fd(struct fsnotify_group *group,
+			struct fsnotify_event *event,
+			struct file **file)
 {
 	int client_fd;
 	struct file *new_file;
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 		put_unused_fd(client_fd);
 		client_fd = PTR_ERR(new_file);
 	} else {
-		fd_install(client_fd, new_file);
+		*file = new_file;
 	}
 
 	return client_fd;
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 
 static int fill_event_metadata(struct fsnotify_group *group,
 				   struct fanotify_event_metadata *metadata,
-				   struct fsnotify_event *event)
+				   struct fsnotify_event *event,
+				   struct file **file)
 {
 	int ret = 0;
 
 	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
 		 group, metadata, event);
 
+	*file = NULL;
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
 	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
 	if (unlikely(event->mask & FAN_Q_OVERFLOW))
 		metadata->fd = FAN_NOFD;
 	else {
-		metadata->fd = create_fd(group, event);
+		metadata->fd = create_fd(group, event, file);
 		if (metadata->fd < 0)
 			ret = metadata->fd;
 	}
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 	return 0;
 }
 
-static void remove_access_response(struct fsnotify_group *group,
-				   struct fsnotify_event *event,
-				   __s32 fd)
-{
-	struct fanotify_response_event *re;
-
-	if (!(event->mask & FAN_ALL_PERM_EVENTS))
-		return;
-
-	re = dequeue_re(group, fd);
-	if (!re)
-		return;
-
-	BUG_ON(re->event != event);
-
-	kmem_cache_free(fanotify_response_event_cache, re);
-
-	return;
-}
 #else
 static int prepare_for_access_response(struct fsnotify_group *group,
 				       struct fsnotify_event *event,
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 	return 0;
 }
 
-static void remove_access_response(struct fsnotify_group *group,
-				   struct fsnotify_event *event,
-				   __s32 fd)
-{
-	return;
-}
 #endif
 
 static ssize_t copy_event_to_user(struct fsnotify_group *group,
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 				  char __user *buf)
 {
 	struct fanotify_event_metadata fanotify_event_metadata;
+	struct file *f;
 	int fd, ret;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+	ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
 	if (ret < 0)
 		goto out;
 
 	fd = fanotify_event_metadata.fd;
-	ret = prepare_for_access_response(group, event, fd);
-	if (ret)
-		goto out_close_fd;
-
 	ret = -EFAULT;
 	if (copy_to_user(buf, &fanotify_event_metadata,
 			 fanotify_event_metadata.event_len))
-		goto out_kill_access_response;
+		goto out_close_fd;
+
+	ret = prepare_for_access_response(group, event, fd);
+	if (ret)
+		goto out_close_fd;
 
+	fd_install(fd, f);
 	return fanotify_event_metadata.event_len;
 
-out_kill_access_response:
-	remove_access_response(group, event, fd);
 out_close_fd:
-	if (fd != FAN_NOFD)
-		sys_close(fd);
+	if (fd != FAN_NOFD) {
+		put_unused_fd(fd);
+		fput(f);
+	}
 out:
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	if (event->mask & FAN_ALL_PERM_EVENTS) {
-- 
cgit 


From f33ff9927f42045116d738ee47ff7bc59f739bd7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Aug 2012 16:17:59 -0400
Subject: take rlimit check to callers of expand_files()

... except for one in android, where the check is different
and already done in caller.  No need to recalculate rlimit
many times in alloc_fd() either.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c |  3 +++
 fs/file.c  | 16 +++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 887b5ba8c9b5..08e6af5c1b1f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -64,6 +64,9 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
 	if (unlikely(oldfd == newfd))
 		return -EINVAL;
 
+	if (newfd >= rlimit(RLIMIT_NOFILE))
+		return -EMFILE;
+
 	spin_lock(&files->file_lock);
 	err = expand_files(files, newfd);
 	file = fcheck(oldfd);
diff --git a/fs/file.c b/fs/file.c
index 5b46e9970a7a..08922af4a62c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -251,13 +251,6 @@ int expand_files(struct files_struct *files, int nr)
 
 	fdt = files_fdtable(files);
 
-	/*
-	 * N.B. For clone tasks sharing a files structure, this test
-	 * will limit the total number of files that can be opened.
-	 */
-	if (nr >= rlimit(RLIMIT_NOFILE))
-		return -EMFILE;
-
 	/* Do we need to expand? */
 	if (nr < fdt->max_fds)
 		return 0;
@@ -431,6 +424,7 @@ int alloc_fd(unsigned start, unsigned flags)
 {
 	struct files_struct *files = current->files;
 	unsigned int fd;
+	unsigned end = rlimit(RLIMIT_NOFILE);
 	int error;
 	struct fdtable *fdt;
 
@@ -444,6 +438,14 @@ repeat:
 	if (fd < fdt->max_fds)
 		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
 
+	/*
+	 * N.B. For clone tasks sharing a files structure, this test
+	 * will limit the total number of files that can be opened.
+	 */
+	error = -EMFILE;
+	if (fd >= end)
+		goto out;
+
 	error = expand_files(files, fd);
 	if (error < 0)
 		goto out;
-- 
cgit 


From dcfadfa4ec5a12404a99ad6426871a6b03a62b37 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Aug 2012 17:27:30 -0400
Subject: new helper: __alloc_fd()

Essentially, alloc_fd() in a files_struct we own a reference to.
Most of the time wanting to use it is a sign of lousy API
design (such as android/binder).  It's *not* a general-purpose
interface; better that than open-coding its guts, but again,
playing with other process' descriptor table is a sign of bad
design.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 08922af4a62c..a3a0705f8f51 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -420,11 +420,10 @@ struct files_struct init_files = {
 /*
  * allocate a file descriptor, mark it busy.
  */
-int alloc_fd(unsigned start, unsigned flags)
+int __alloc_fd(struct files_struct *files,
+	       unsigned start, unsigned end, unsigned flags)
 {
-	struct files_struct *files = current->files;
 	unsigned int fd;
-	unsigned end = rlimit(RLIMIT_NOFILE);
 	int error;
 	struct fdtable *fdt;
 
@@ -479,8 +478,13 @@ out:
 	return error;
 }
 
+int alloc_fd(unsigned start, unsigned flags)
+{
+	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
+}
+
 int get_unused_fd_flags(unsigned flags)
 {
-	return alloc_fd(0, flags);
+	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
 }
 EXPORT_SYMBOL(get_unused_fd_flags);
-- 
cgit 


From 7cf4dc3c8dbfdfde163d4636f621cf99a1f63bfb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 Aug 2012 19:56:12 -0400
Subject: move files_struct-related bits from kernel/exit.c to fs/file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index a3a0705f8f51..0be423cadb26 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -84,7 +84,7 @@ static void free_fdtable_work(struct work_struct *work)
 	}
 }
 
-void free_fdtable_rcu(struct rcu_head *rcu)
+static void free_fdtable_rcu(struct rcu_head *rcu)
 {
 	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
 	struct fdtable_defer *fddef;
@@ -116,6 +116,11 @@ void free_fdtable_rcu(struct rcu_head *rcu)
 	}
 }
 
+static inline void free_fdtable(struct fdtable *fdt)
+{
+	call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
@@ -388,6 +393,99 @@ out:
 	return NULL;
 }
 
+static void close_files(struct files_struct * files)
+{
+	int i, j;
+	struct fdtable *fdt;
+
+	j = 0;
+
+	/*
+	 * It is safe to dereference the fd table without RCU or
+	 * ->file_lock because this is the last reference to the
+	 * files structure.  But use RCU to shut RCU-lockdep up.
+	 */
+	rcu_read_lock();
+	fdt = files_fdtable(files);
+	rcu_read_unlock();
+	for (;;) {
+		unsigned long set;
+		i = j * BITS_PER_LONG;
+		if (i >= fdt->max_fds)
+			break;
+		set = fdt->open_fds[j++];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&fdt->fd[i], NULL);
+				if (file) {
+					filp_close(file, files);
+					cond_resched();
+				}
+			}
+			i++;
+			set >>= 1;
+		}
+	}
+}
+
+struct files_struct *get_files_struct(struct task_struct *task)
+{
+	struct files_struct *files;
+
+	task_lock(task);
+	files = task->files;
+	if (files)
+		atomic_inc(&files->count);
+	task_unlock(task);
+
+	return files;
+}
+
+void put_files_struct(struct files_struct *files)
+{
+	struct fdtable *fdt;
+
+	if (atomic_dec_and_test(&files->count)) {
+		close_files(files);
+		/*
+		 * Free the fd and fdset arrays if we expanded them.
+		 * If the fdtable was embedded, pass files for freeing
+		 * at the end of the RCU grace period. Otherwise,
+		 * you can free files immediately.
+		 */
+		rcu_read_lock();
+		fdt = files_fdtable(files);
+		if (fdt != &files->fdtab)
+			kmem_cache_free(files_cachep, files);
+		free_fdtable(fdt);
+		rcu_read_unlock();
+	}
+}
+
+void reset_files_struct(struct files_struct *files)
+{
+	struct task_struct *tsk = current;
+	struct files_struct *old;
+
+	old = tsk->files;
+	task_lock(tsk);
+	tsk->files = files;
+	task_unlock(tsk);
+	put_files_struct(old);
+}
+
+void exit_files(struct task_struct *tsk)
+{
+	struct files_struct * files = tsk->files;
+
+	if (files) {
+		task_lock(tsk);
+		tsk->files = NULL;
+		task_unlock(tsk);
+		put_files_struct(files);
+	}
+}
+
 static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
-- 
cgit 


From b9e02af0ae0783894abb576fbab45ec29aa8e7fc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 Aug 2012 20:00:58 -0400
Subject: don't bother with call_rcu() in put_files_struct()

At that point nobody can see us anyway; everything that
looks at files_fdtable(files) is separated from the
guts of put_files_struct(files) - either since files is
current->files or because we fetched it under task_lock()
and hadn't dropped that yet, or because we'd bumped
files->count while holding task_lock()...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 0be423cadb26..533fa5d56a5f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -447,18 +447,14 @@ void put_files_struct(struct files_struct *files)
 
 	if (atomic_dec_and_test(&files->count)) {
 		close_files(files);
-		/*
-		 * Free the fd and fdset arrays if we expanded them.
-		 * If the fdtable was embedded, pass files for freeing
-		 * at the end of the RCU grace period. Otherwise,
-		 * you can free files immediately.
-		 */
+		/* not really needed, since nobody can see us */
 		rcu_read_lock();
 		fdt = files_fdtable(files);
-		if (fdt != &files->fdtab)
-			kmem_cache_free(files_cachep, files);
-		free_fdtable(fdt);
 		rcu_read_unlock();
+		/* free the arrays if they are not embedded */
+		if (fdt != &files->fdtab)
+			__free_fdtable(fdt);
+		kmem_cache_free(files_cachep, files);
 	}
 }
 
-- 
cgit 


From 1983e781da2f7f77906f4ccc2c3dc279cd61d1ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 Aug 2012 20:06:36 -0400
Subject: trim free_fdtable_rcu()

embedded case isn't hit anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 533fa5d56a5f..4ce4a0fcf320 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -90,16 +90,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	struct fdtable_defer *fddef;
 
 	BUG_ON(!fdt);
+	BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
 
-	if (fdt->max_fds <= NR_OPEN_DEFAULT) {
-		/*
-		 * This fdtable is embedded in the files structure and that
-		 * structure itself is getting destroyed.
-		 */
-		kmem_cache_free(files_cachep,
-				container_of(fdt, struct files_struct, fdtab));
-		return;
-	}
 	if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
 		kfree(fdt->fd);
 		kfree(fdt->open_fds);
@@ -116,11 +108,6 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 	}
 }
 
-static inline void free_fdtable(struct fdtable *fdt)
-{
-	call_rcu(&fdt->rcu, free_fdtable_rcu);
-}
-
 /*
  * Expand the fdset in the files_struct.  Called with the files spinlock
  * held for write.
@@ -234,7 +221,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
 		copy_fdtable(new_fdt, cur_fdt);
 		rcu_assign_pointer(files->fdt, new_fdt);
 		if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
-			free_fdtable(cur_fdt);
+			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
 	} else {
 		/* Somebody else expanded, so undo our attempt */
 		__free_fdtable(new_fdt);
-- 
cgit 


From 56007cae94f349387c088e738c7dcb6bc513063b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 Aug 2012 21:03:26 -0400
Subject: move put_unused_fd() and fd_install() to fs/file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 fs/open.c | 44 --------------------------------------------
 2 files changed, 44 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 4ce4a0fcf320..78cf88f2a0e8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -569,3 +569,47 @@ int get_unused_fd_flags(unsigned flags)
 	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
 }
 EXPORT_SYMBOL(get_unused_fd_flags);
+
+static void __put_unused_fd(struct files_struct *files, unsigned int fd)
+{
+	struct fdtable *fdt = files_fdtable(files);
+	__clear_open_fd(fd, fdt);
+	if (fd < files->next_fd)
+		files->next_fd = fd;
+}
+
+void put_unused_fd(unsigned int fd)
+{
+	struct files_struct *files = current->files;
+	spin_lock(&files->file_lock);
+	__put_unused_fd(files, fd);
+	spin_unlock(&files->file_lock);
+}
+
+EXPORT_SYMBOL(put_unused_fd);
+
+/*
+ * Install a file pointer in the fd array.
+ *
+ * The VFS is full of places where we drop the files lock between
+ * setting the open_fds bitmap and installing the file in the file
+ * array.  At any such point, we are vulnerable to a dup2() race
+ * installing a file in the array before us.  We need to detect this and
+ * fput() the struct file we are about to overwrite in this case.
+ *
+ * It should never happen - if we allow dup2() do it, _really_ bad things
+ * will follow.
+ */
+
+void fd_install(unsigned int fd, struct file *file)
+{
+	struct files_struct *files = current->files;
+	struct fdtable *fdt;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	BUG_ON(fdt->fd[fd] != NULL);
+	rcu_assign_pointer(fdt->fd[fd], file);
+	spin_unlock(&files->file_lock);
+}
+
+EXPORT_SYMBOL(fd_install);
diff --git a/fs/open.c b/fs/open.c
index e1f2cdb91a4d..c525bd0e65b6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -803,50 +803,6 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
-static void __put_unused_fd(struct files_struct *files, unsigned int fd)
-{
-	struct fdtable *fdt = files_fdtable(files);
-	__clear_open_fd(fd, fdt);
-	if (fd < files->next_fd)
-		files->next_fd = fd;
-}
-
-void put_unused_fd(unsigned int fd)
-{
-	struct files_struct *files = current->files;
-	spin_lock(&files->file_lock);
-	__put_unused_fd(files, fd);
-	spin_unlock(&files->file_lock);
-}
-
-EXPORT_SYMBOL(put_unused_fd);
-
-/*
- * Install a file pointer in the fd array.
- *
- * The VFS is full of places where we drop the files lock between
- * setting the open_fds bitmap and installing the file in the file
- * array.  At any such point, we are vulnerable to a dup2() race
- * installing a file in the array before us.  We need to detect this and
- * fput() the struct file we are about to overwrite in this case.
- *
- * It should never happen - if we allow dup2() do it, _really_ bad things
- * will follow.
- */
-
-void fd_install(unsigned int fd, struct file *file)
-{
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	BUG_ON(fdt->fd[fd] != NULL);
-	rcu_assign_pointer(fdt->fd[fd], file);
-	spin_unlock(&files->file_lock);
-}
-
-EXPORT_SYMBOL(fd_install);
-
 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
-- 
cgit 


From f869e8a7f753e3fd43d6483e796774776f645edb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 Aug 2012 21:06:33 -0400
Subject: expose a low-level variant of fd_install() for binder

Similar situation to that of __alloc_fd(); do not use unless you
really have to.  You should not touch any descriptor table other
than your own; it's a sure sign of a really bad API design.

As with __alloc_fd(), you *must* use a first-class reference to
struct files_struct; something obtained by get_files_struct(some task)
(let alone direct task->files) will not do.  It must be either
current->files, or obtained by get_files_struct(current) by the
owner of that sucker and given to you.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 78cf88f2a0e8..0d1bf0515111 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -599,11 +599,18 @@ EXPORT_SYMBOL(put_unused_fd);
  *
  * It should never happen - if we allow dup2() do it, _really_ bad things
  * will follow.
+ *
+ * NOTE: __fd_install() variant is really, really low-level; don't
+ * use it unless you are forced to by truly lousy API shoved down
+ * your throat.  'files' *MUST* be either current->files or obtained
+ * by get_files_struct(current) done by whoever had given it to you,
+ * or really bad things will happen.  Normally you want to use
+ * fd_install() instead.
  */
 
-void fd_install(unsigned int fd, struct file *file)
+void __fd_install(struct files_struct *files, unsigned int fd,
+		struct file *file)
 {
-	struct files_struct *files = current->files;
 	struct fdtable *fdt;
 	spin_lock(&files->file_lock);
 	fdt = files_fdtable(files);
@@ -612,4 +619,9 @@ void fd_install(unsigned int fd, struct file *file)
 	spin_unlock(&files->file_lock);
 }
 
+void fd_install(unsigned int fd, struct file *file)
+{
+	__fd_install(current->files, fd, file);
+}
+
 EXPORT_SYMBOL(fd_install);
-- 
cgit 


From 0ee8cdfe6af052deb56dccd54838a1eb32fb4ca2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 15 Aug 2012 21:12:10 -0400
Subject: take fget() and friends to fs/file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c       | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/file_table.c | 106 --------------------------------------------------------
 2 files changed, 106 insertions(+), 106 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 0d1bf0515111..6eef55ce30c9 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -625,3 +625,109 @@ void fd_install(unsigned int fd, struct file *file)
 }
 
 EXPORT_SYMBOL(fd_install);
+
+struct file *fget(unsigned int fd)
+{
+	struct file *file;
+	struct files_struct *files = current->files;
+
+	rcu_read_lock();
+	file = fcheck_files(files, fd);
+	if (file) {
+		/* File object ref couldn't be taken */
+		if (file->f_mode & FMODE_PATH ||
+		    !atomic_long_inc_not_zero(&file->f_count))
+			file = NULL;
+	}
+	rcu_read_unlock();
+
+	return file;
+}
+
+EXPORT_SYMBOL(fget);
+
+struct file *fget_raw(unsigned int fd)
+{
+	struct file *file;
+	struct files_struct *files = current->files;
+
+	rcu_read_lock();
+	file = fcheck_files(files, fd);
+	if (file) {
+		/* File object ref couldn't be taken */
+		if (!atomic_long_inc_not_zero(&file->f_count))
+			file = NULL;
+	}
+	rcu_read_unlock();
+
+	return file;
+}
+
+EXPORT_SYMBOL(fget_raw);
+
+/*
+ * Lightweight file lookup - no refcnt increment if fd table isn't shared.
+ *
+ * You can use this instead of fget if you satisfy all of the following
+ * conditions:
+ * 1) You must call fput_light before exiting the syscall and returning control
+ *    to userspace (i.e. you cannot remember the returned struct file * after
+ *    returning to userspace).
+ * 2) You must not call filp_close on the returned struct file * in between
+ *    calls to fget_light and fput_light.
+ * 3) You must not clone the current task in between the calls to fget_light
+ *    and fput_light.
+ *
+ * The fput_needed flag returned by fget_light should be passed to the
+ * corresponding fput_light.
+ */
+struct file *fget_light(unsigned int fd, int *fput_needed)
+{
+	struct file *file;
+	struct files_struct *files = current->files;
+
+	*fput_needed = 0;
+	if (atomic_read(&files->count) == 1) {
+		file = fcheck_files(files, fd);
+		if (file && (file->f_mode & FMODE_PATH))
+			file = NULL;
+	} else {
+		rcu_read_lock();
+		file = fcheck_files(files, fd);
+		if (file) {
+			if (!(file->f_mode & FMODE_PATH) &&
+			    atomic_long_inc_not_zero(&file->f_count))
+				*fput_needed = 1;
+			else
+				/* Didn't get the reference, someone's freed */
+				file = NULL;
+		}
+		rcu_read_unlock();
+	}
+
+	return file;
+}
+
+struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+{
+	struct file *file;
+	struct files_struct *files = current->files;
+
+	*fput_needed = 0;
+	if (atomic_read(&files->count) == 1) {
+		file = fcheck_files(files, fd);
+	} else {
+		rcu_read_lock();
+		file = fcheck_files(files, fd);
+		if (file) {
+			if (atomic_long_inc_not_zero(&file->f_count))
+				*fput_needed = 1;
+			else
+				/* Didn't get the reference, someone's freed */
+				file = NULL;
+		}
+		rcu_read_unlock();
+	}
+
+	return file;
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index 701985e4ccda..c6780163bf3e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file)
 
 EXPORT_SYMBOL(fput);
 
-struct file *fget(unsigned int fd)
-{
-	struct file *file;
-	struct files_struct *files = current->files;
-
-	rcu_read_lock();
-	file = fcheck_files(files, fd);
-	if (file) {
-		/* File object ref couldn't be taken */
-		if (file->f_mode & FMODE_PATH ||
-		    !atomic_long_inc_not_zero(&file->f_count))
-			file = NULL;
-	}
-	rcu_read_unlock();
-
-	return file;
-}
-
-EXPORT_SYMBOL(fget);
-
-struct file *fget_raw(unsigned int fd)
-{
-	struct file *file;
-	struct files_struct *files = current->files;
-
-	rcu_read_lock();
-	file = fcheck_files(files, fd);
-	if (file) {
-		/* File object ref couldn't be taken */
-		if (!atomic_long_inc_not_zero(&file->f_count))
-			file = NULL;
-	}
-	rcu_read_unlock();
-
-	return file;
-}
-
-EXPORT_SYMBOL(fget_raw);
-
-/*
- * Lightweight file lookup - no refcnt increment if fd table isn't shared.
- *
- * You can use this instead of fget if you satisfy all of the following
- * conditions:
- * 1) You must call fput_light before exiting the syscall and returning control
- *    to userspace (i.e. you cannot remember the returned struct file * after
- *    returning to userspace).
- * 2) You must not call filp_close on the returned struct file * in between
- *    calls to fget_light and fput_light.
- * 3) You must not clone the current task in between the calls to fget_light
- *    and fput_light.
- *
- * The fput_needed flag returned by fget_light should be passed to the
- * corresponding fput_light.
- */
-struct file *fget_light(unsigned int fd, int *fput_needed)
-{
-	struct file *file;
-	struct files_struct *files = current->files;
-
-	*fput_needed = 0;
-	if (atomic_read(&files->count) == 1) {
-		file = fcheck_files(files, fd);
-		if (file && (file->f_mode & FMODE_PATH))
-			file = NULL;
-	} else {
-		rcu_read_lock();
-		file = fcheck_files(files, fd);
-		if (file) {
-			if (!(file->f_mode & FMODE_PATH) &&
-			    atomic_long_inc_not_zero(&file->f_count))
-				*fput_needed = 1;
-			else
-				/* Didn't get the reference, someone's freed */
-				file = NULL;
-		}
-		rcu_read_unlock();
-	}
-
-	return file;
-}
-
-struct file *fget_raw_light(unsigned int fd, int *fput_needed)
-{
-	struct file *file;
-	struct files_struct *files = current->files;
-
-	*fput_needed = 0;
-	if (atomic_read(&files->count) == 1) {
-		file = fcheck_files(files, fd);
-	} else {
-		rcu_read_lock();
-		file = fcheck_files(files, fd);
-		if (file) {
-			if (atomic_long_inc_not_zero(&file->f_count))
-				*fput_needed = 1;
-			else
-				/* Didn't get the reference, someone's freed */
-				file = NULL;
-		}
-		rcu_read_unlock();
-	}
-
-	return file;
-}
-
 void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
-- 
cgit 


From 483ce1d4b8c3b82bc9c9a1dd9dbc44f50b3aaf5a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 19 Aug 2012 12:04:24 -0400
Subject: take descriptor-related part of close() to file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 26 ++++++++++++++++++++++++++
 fs/open.c | 22 +---------------------
 2 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 6eef55ce30c9..fd4694e688ab 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -626,6 +626,32 @@ void fd_install(unsigned int fd, struct file *file)
 
 EXPORT_SYMBOL(fd_install);
 
+/*
+ * The same warnings as for __alloc_fd()/__fd_install() apply here...
+ */
+int __close_fd(struct files_struct *files, unsigned fd)
+{
+	struct file *file;
+	struct fdtable *fdt;
+
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	if (fd >= fdt->max_fds)
+		goto out_unlock;
+	file = fdt->fd[fd];
+	if (!file)
+		goto out_unlock;
+	rcu_assign_pointer(fdt->fd[fd], NULL);
+	__clear_close_on_exec(fd, fdt);
+	__put_unused_fd(files, fd);
+	spin_unlock(&files->file_lock);
+	return filp_close(file, files);
+
+out_unlock:
+	spin_unlock(&files->file_lock);
+	return -EBADF;
+}
+
 struct file *fget(unsigned int fd)
 {
 	struct file *file;
diff --git a/fs/open.c b/fs/open.c
index c525bd0e65b6..30760017deed 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -994,23 +994,7 @@ EXPORT_SYMBOL(filp_close);
  */
 SYSCALL_DEFINE1(close, unsigned int, fd)
 {
-	struct file * filp;
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-	int retval;
-
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	if (fd >= fdt->max_fds)
-		goto out_unlock;
-	filp = fdt->fd[fd];
-	if (!filp)
-		goto out_unlock;
-	rcu_assign_pointer(fdt->fd[fd], NULL);
-	__clear_close_on_exec(fd, fdt);
-	__put_unused_fd(files, fd);
-	spin_unlock(&files->file_lock);
-	retval = filp_close(filp, files);
+	int retval = __close_fd(current->files, fd);
 
 	/* can't restart close syscall because file table entry was cleared */
 	if (unlikely(retval == -ERESTARTSYS ||
@@ -1020,10 +1004,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
 		retval = -EINTR;
 
 	return retval;
-
-out_unlock:
-	spin_unlock(&files->file_lock);
-	return -EBADF;
 }
 EXPORT_SYMBOL(sys_close);
 
-- 
cgit 


From 6a6d27de340c89c5323565b49f7851362619925d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 09:56:33 -0400
Subject: take close-on-exec logics to fs/file.c, clean it up a bit

... and add cond_resched() there, while we are at it.  We can
get large latencies as is...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c | 41 ++++++-----------------------------------
 fs/file.c | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 574cf4de4ec3..f2b6af585d4a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1006,40 +1006,6 @@ no_thread_group:
 	return 0;
 }
 
-/*
- * These functions flushes out all traces of the currently running executable
- * so that a new one can be started
- */
-static void flush_old_files(struct files_struct * files)
-{
-	long j = -1;
-	struct fdtable *fdt;
-
-	spin_lock(&files->file_lock);
-	for (;;) {
-		unsigned long set, i;
-
-		j++;
-		i = j * BITS_PER_LONG;
-		fdt = files_fdtable(files);
-		if (i >= fdt->max_fds)
-			break;
-		set = fdt->close_on_exec[j];
-		if (!set)
-			continue;
-		fdt->close_on_exec[j] = 0;
-		spin_unlock(&files->file_lock);
-		for ( ; set ; i++,set >>= 1) {
-			if (set & 1) {
-				sys_close(i);
-			}
-		}
-		spin_lock(&files->file_lock);
-
-	}
-	spin_unlock(&files->file_lock);
-}
-
 char *get_task_comm(char *buf, struct task_struct *tsk)
 {
 	/* buf must be at least sizeof(tsk->comm) in size */
@@ -1050,6 +1016,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(get_task_comm);
 
+/*
+ * These functions flushes out all traces of the currently running executable
+ * so that a new one can be started
+ */
+
 void set_task_comm(struct task_struct *tsk, char *buf)
 {
 	task_lock(tsk);
@@ -1171,7 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	current->self_exec_id++;
 			
 	flush_signal_handlers(current, 0);
-	flush_old_files(current->files);
+	do_close_on_exec(current->files);
 }
 EXPORT_SYMBOL(setup_new_exec);
 
diff --git a/fs/file.c b/fs/file.c
index fd4694e688ab..92197dd9fdc8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -652,6 +652,43 @@ out_unlock:
 	return -EBADF;
 }
 
+void do_close_on_exec(struct files_struct *files)
+{
+	unsigned i;
+	struct fdtable *fdt;
+
+	/* exec unshares first */
+	BUG_ON(atomic_read(&files->count) != 1);
+	spin_lock(&files->file_lock);
+	for (i = 0; ; i++) {
+		unsigned long set;
+		unsigned fd = i * BITS_PER_LONG;
+		fdt = files_fdtable(files);
+		if (fd >= fdt->max_fds)
+			break;
+		set = fdt->close_on_exec[i];
+		if (!set)
+			continue;
+		fdt->close_on_exec[i] = 0;
+		for ( ; set ; fd++, set >>= 1) {
+			struct file *file;
+			if (!(set & 1))
+				continue;
+			file = fdt->fd[fd];
+			if (!file)
+				continue;
+			rcu_assign_pointer(fdt->fd[fd], NULL);
+			__put_unused_fd(files, fd);
+			spin_unlock(&files->file_lock);
+			filp_close(file, files);
+			cond_resched();
+			spin_lock(&files->file_lock);
+		}
+
+	}
+	spin_unlock(&files->file_lock);
+}
+
 struct file *fget(unsigned int fd)
 {
 	struct file *file;
-- 
cgit 


From fe17f22d7fd0e344ef6447238f799bb49f670c6f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 11:48:11 -0400
Subject: take purely descriptor-related stuff from fcntl.c to file.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c | 131 ++----------------------------------------------------------
 fs/file.c  | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 128 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 08e6af5c1b1f..40a5bfb72cca 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,127 +26,6 @@
 #include <asm/siginfo.h>
 #include <asm/uaccess.h>
 
-void set_close_on_exec(unsigned int fd, int flag)
-{
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	if (flag)
-		__set_close_on_exec(fd, fdt);
-	else
-		__clear_close_on_exec(fd, fdt);
-	spin_unlock(&files->file_lock);
-}
-
-static bool get_close_on_exec(unsigned int fd)
-{
-	struct files_struct *files = current->files;
-	struct fdtable *fdt;
-	bool res;
-	rcu_read_lock();
-	fdt = files_fdtable(files);
-	res = close_on_exec(fd, fdt);
-	rcu_read_unlock();
-	return res;
-}
-
-SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
-{
-	int err = -EBADF;
-	struct file * file, *tofree;
-	struct files_struct * files = current->files;
-	struct fdtable *fdt;
-
-	if ((flags & ~O_CLOEXEC) != 0)
-		return -EINVAL;
-
-	if (unlikely(oldfd == newfd))
-		return -EINVAL;
-
-	if (newfd >= rlimit(RLIMIT_NOFILE))
-		return -EMFILE;
-
-	spin_lock(&files->file_lock);
-	err = expand_files(files, newfd);
-	file = fcheck(oldfd);
-	if (unlikely(!file))
-		goto Ebadf;
-	if (unlikely(err < 0)) {
-		if (err == -EMFILE)
-			goto Ebadf;
-		goto out_unlock;
-	}
-	/*
-	 * We need to detect attempts to do dup2() over allocated but still
-	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
-	 * extra work in their equivalent of fget() - they insert struct
-	 * file immediately after grabbing descriptor, mark it larval if
-	 * more work (e.g. actual opening) is needed and make sure that
-	 * fget() treats larval files as absent.  Potentially interesting,
-	 * but while extra work in fget() is trivial, locking implications
-	 * and amount of surgery on open()-related paths in VFS are not.
-	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
-	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
-	 * scope of POSIX or SUS, since neither considers shared descriptor
-	 * tables and this condition does not arise without those.
-	 */
-	err = -EBUSY;
-	fdt = files_fdtable(files);
-	tofree = fdt->fd[newfd];
-	if (!tofree && fd_is_open(newfd, fdt))
-		goto out_unlock;
-	get_file(file);
-	rcu_assign_pointer(fdt->fd[newfd], file);
-	__set_open_fd(newfd, fdt);
-	if (flags & O_CLOEXEC)
-		__set_close_on_exec(newfd, fdt);
-	else
-		__clear_close_on_exec(newfd, fdt);
-	spin_unlock(&files->file_lock);
-
-	if (tofree)
-		filp_close(tofree, files);
-
-	return newfd;
-
-Ebadf:
-	err = -EBADF;
-out_unlock:
-	spin_unlock(&files->file_lock);
-	return err;
-}
-
-SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
-{
-	if (unlikely(newfd == oldfd)) { /* corner case */
-		struct files_struct *files = current->files;
-		int retval = oldfd;
-
-		rcu_read_lock();
-		if (!fcheck_files(files, oldfd))
-			retval = -EBADF;
-		rcu_read_unlock();
-		return retval;
-	}
-	return sys_dup3(oldfd, newfd, 0);
-}
-
-SYSCALL_DEFINE1(dup, unsigned int, fildes)
-{
-	int ret = -EBADF;
-	struct file *file = fget_raw(fildes);
-
-	if (file) {
-		ret = get_unused_fd();
-		if (ret >= 0)
-			fd_install(ret, file);
-		else
-			fput(file);
-	}
-	return ret;
-}
-
 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
 static int setfl(int fd, struct file * filp, unsigned long arg)
@@ -376,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 
 	switch (cmd) {
 	case F_DUPFD:
+		err = f_dupfd(arg, filp, 0);
+		break;
 	case F_DUPFD_CLOEXEC:
-		if (arg >= rlimit(RLIMIT_NOFILE))
-			break;
-		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
-		if (err >= 0) {
-			get_file(filp);
-			fd_install(err, filp);
-		}
+		err = f_dupfd(arg, filp, FD_CLOEXEC);
 		break;
 	case F_GETFD:
 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/file.c b/fs/file.c
index 92197dd9fdc8..7f29544755d0 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
  *  Manage the dynamic fd arrays in the process files_struct.
  */
 
+#include <linux/syscalls.h>
 #include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
@@ -794,3 +795,134 @@ struct file *fget_raw_light(unsigned int fd, int *fput_needed)
 
 	return file;
 }
+
+void set_close_on_exec(unsigned int fd, int flag)
+{
+	struct files_struct *files = current->files;
+	struct fdtable *fdt;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	if (flag)
+		__set_close_on_exec(fd, fdt);
+	else
+		__clear_close_on_exec(fd, fdt);
+	spin_unlock(&files->file_lock);
+}
+
+bool get_close_on_exec(unsigned int fd)
+{
+	struct files_struct *files = current->files;
+	struct fdtable *fdt;
+	bool res;
+	rcu_read_lock();
+	fdt = files_fdtable(files);
+	res = close_on_exec(fd, fdt);
+	rcu_read_unlock();
+	return res;
+}
+
+SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+{
+	int err = -EBADF;
+	struct file * file, *tofree;
+	struct files_struct * files = current->files;
+	struct fdtable *fdt;
+
+	if ((flags & ~O_CLOEXEC) != 0)
+		return -EINVAL;
+
+	if (newfd >= rlimit(RLIMIT_NOFILE))
+		return -EMFILE;
+
+	spin_lock(&files->file_lock);
+	err = expand_files(files, newfd);
+	file = fcheck(oldfd);
+	if (unlikely(!file))
+		goto Ebadf;
+	if (unlikely(err < 0)) {
+		if (err == -EMFILE)
+			goto Ebadf;
+		goto out_unlock;
+	}
+	/*
+	 * We need to detect attempts to do dup2() over allocated but still
+	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
+	 * extra work in their equivalent of fget() - they insert struct
+	 * file immediately after grabbing descriptor, mark it larval if
+	 * more work (e.g. actual opening) is needed and make sure that
+	 * fget() treats larval files as absent.  Potentially interesting,
+	 * but while extra work in fget() is trivial, locking implications
+	 * and amount of surgery on open()-related paths in VFS are not.
+	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
+	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
+	 * scope of POSIX or SUS, since neither considers shared descriptor
+	 * tables and this condition does not arise without those.
+	 */
+	err = -EBUSY;
+	fdt = files_fdtable(files);
+	tofree = fdt->fd[newfd];
+	if (!tofree && fd_is_open(newfd, fdt))
+		goto out_unlock;
+	get_file(file);
+	rcu_assign_pointer(fdt->fd[newfd], file);
+	__set_open_fd(newfd, fdt);
+	if (flags & O_CLOEXEC)
+		__set_close_on_exec(newfd, fdt);
+	else
+		__clear_close_on_exec(newfd, fdt);
+	spin_unlock(&files->file_lock);
+
+	if (tofree)
+		filp_close(tofree, files);
+
+	return newfd;
+
+Ebadf:
+	err = -EBADF;
+out_unlock:
+	spin_unlock(&files->file_lock);
+	return err;
+}
+
+SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
+{
+	if (unlikely(newfd == oldfd)) { /* corner case */
+		struct files_struct *files = current->files;
+		int retval = oldfd;
+
+		rcu_read_lock();
+		if (!fcheck_files(files, oldfd))
+			retval = -EBADF;
+		rcu_read_unlock();
+		return retval;
+	}
+	return sys_dup3(oldfd, newfd, 0);
+}
+
+SYSCALL_DEFINE1(dup, unsigned int, fildes)
+{
+	int ret = -EBADF;
+	struct file *file = fget_raw(fildes);
+
+	if (file) {
+		ret = get_unused_fd();
+		if (ret >= 0)
+			fd_install(ret, file);
+		else
+			fput(file);
+	}
+	return ret;
+}
+
+int f_dupfd(unsigned int from, struct file *file, unsigned flags)
+{
+	int err;
+	if (from >= rlimit(RLIMIT_NOFILE))
+		return -EINVAL;
+	err = alloc_fd(from, flags);
+	if (err >= 0) {
+		get_file(file);
+		fd_install(err, file);
+	}
+	return err;
+}
-- 
cgit 


From 8280d16172243702ed43432f826ca6130edb4086 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 12:11:46 -0400
Subject: new helper: replace_fd()

analog of dup2(), except that it takes struct file * as source.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c | 11 +-------
 fs/file.c | 91 +++++++++++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 63 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index f2b6af585d4a..3fc74681cc6c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2041,23 +2041,14 @@ static void wait_for_dump_helpers(struct file *file)
 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
 	struct file *files[2];
-	struct fdtable *fdt;
 	struct coredump_params *cp = (struct coredump_params *)info->data;
-	struct files_struct *cf = current->files;
 	int err = create_pipe_files(files, 0);
 	if (err)
 		return err;
 
 	cp->file = files[1];
 
-	sys_close(0);
-	fd_install(0, files[0]);
-	spin_lock(&cf->file_lock);
-	fdt = files_fdtable(cf);
-	__set_open_fd(0, fdt);
-	__clear_close_on_exec(0, fdt);
-	spin_unlock(&cf->file_lock);
-
+	replace_fd(0, files[0], 0);
 	/* and disallow core files too */
 	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
 
diff --git a/fs/file.c b/fs/file.c
index 7f29544755d0..a7bbe0324478 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -821,29 +821,12 @@ bool get_close_on_exec(unsigned int fd)
 	return res;
 }
 
-SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+static int do_dup2(struct files_struct *files,
+	struct file *file, unsigned fd, unsigned flags)
 {
-	int err = -EBADF;
-	struct file * file, *tofree;
-	struct files_struct * files = current->files;
+	struct file *tofree;
 	struct fdtable *fdt;
 
-	if ((flags & ~O_CLOEXEC) != 0)
-		return -EINVAL;
-
-	if (newfd >= rlimit(RLIMIT_NOFILE))
-		return -EMFILE;
-
-	spin_lock(&files->file_lock);
-	err = expand_files(files, newfd);
-	file = fcheck(oldfd);
-	if (unlikely(!file))
-		goto Ebadf;
-	if (unlikely(err < 0)) {
-		if (err == -EMFILE)
-			goto Ebadf;
-		goto out_unlock;
-	}
 	/*
 	 * We need to detect attempts to do dup2() over allocated but still
 	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
@@ -858,24 +841,74 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
 	 * scope of POSIX or SUS, since neither considers shared descriptor
 	 * tables and this condition does not arise without those.
 	 */
-	err = -EBUSY;
 	fdt = files_fdtable(files);
-	tofree = fdt->fd[newfd];
-	if (!tofree && fd_is_open(newfd, fdt))
-		goto out_unlock;
+	tofree = fdt->fd[fd];
+	if (!tofree && fd_is_open(fd, fdt))
+		goto Ebusy;
 	get_file(file);
-	rcu_assign_pointer(fdt->fd[newfd], file);
-	__set_open_fd(newfd, fdt);
+	rcu_assign_pointer(fdt->fd[fd], file);
+	__set_open_fd(fd, fdt);
 	if (flags & O_CLOEXEC)
-		__set_close_on_exec(newfd, fdt);
+		__set_close_on_exec(fd, fdt);
 	else
-		__clear_close_on_exec(newfd, fdt);
+		__clear_close_on_exec(fd, fdt);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)
 		filp_close(tofree, files);
 
-	return newfd;
+	return fd;
+
+Ebusy:
+	spin_unlock(&files->file_lock);
+	return -EBUSY;
+}
+
+int replace_fd(unsigned fd, struct file *file, unsigned flags)
+{
+	int err;
+	struct files_struct *files = current->files;
+
+	if (!file)
+		return __close_fd(files, fd);
+
+	if (fd >= rlimit(RLIMIT_NOFILE))
+		return -EMFILE;
+
+	spin_lock(&files->file_lock);
+	err = expand_files(files, fd);
+	if (unlikely(err < 0))
+		goto out_unlock;
+	return do_dup2(files, file, fd, flags);
+
+out_unlock:
+	spin_unlock(&files->file_lock);
+	return err;
+}
+
+SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
+{
+	int err = -EBADF;
+	struct file *file;
+	struct files_struct *files = current->files;
+
+	if ((flags & ~O_CLOEXEC) != 0)
+		return -EINVAL;
+
+	if (newfd >= rlimit(RLIMIT_NOFILE))
+		return -EMFILE;
+
+	spin_lock(&files->file_lock);
+	err = expand_files(files, newfd);
+	file = fcheck(oldfd);
+	if (unlikely(!file))
+		goto Ebadf;
+	if (unlikely(err < 0)) {
+		if (err == -EMFILE)
+			goto Ebadf;
+		goto out_unlock;
+	}
+	return do_dup2(files, file, newfd, flags);
 
 Ebadf:
 	err = -EBADF;
-- 
cgit 


From b8318b01a8f7f760ae3ecae052ccc7fc123d9508 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 20:09:42 -0400
Subject: take __{set,clear}_{open_fd,close_on_exec}() into fs/file.c

nobody uses those outside anymore.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index a7bbe0324478..40ddef9fe041 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -256,6 +256,26 @@ int expand_files(struct files_struct *files, int nr)
 	return expand_fdtable(files, nr);
 }
 
+static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
+{
+	__set_bit(fd, fdt->close_on_exec);
+}
+
+static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
+{
+	__clear_bit(fd, fdt->close_on_exec);
+}
+
+static inline void __set_open_fd(int fd, struct fdtable *fdt)
+{
+	__set_bit(fd, fdt->open_fds);
+}
+
+static inline void __clear_open_fd(int fd, struct fdtable *fdt)
+{
+	__clear_bit(fd, fdt->open_fds);
+}
+
 static int count_open_files(struct fdtable *fdt)
 {
 	int size = fdt->max_fds;
-- 
cgit 


From ad47bd7252bf402fe7dba92f5240b5ed16832ae7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 20:11:34 -0400
Subject: make expand_files() and alloc_fd() static

no callers outside of fs/file.c left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 40ddef9fe041..967bd0dadbe5 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -238,7 +238,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
  * expanded and execution may have blocked.
  * The files->file_lock should be held on entry, and will be held on exit.
  */
-int expand_files(struct files_struct *files, int nr)
+static int expand_files(struct files_struct *files, int nr)
 {
 	struct fdtable *fdt;
 
@@ -580,7 +580,7 @@ out:
 	return error;
 }
 
-int alloc_fd(unsigned start, unsigned flags)
+static int alloc_fd(unsigned start, unsigned flags)
 {
 	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
 }
-- 
cgit 


From c3c073f808b22dfae15ef8412b6f7b998644139a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 22:32:06 -0400
Subject: new helper: iterate_fd()

iterates through the opened files in given descriptor table,
calling a supplied function; we stop once non-zero is returned.
Callback gets struct file *, descriptor number and const void *
argument passed to iterator.  It is called with files->file_lock
held, so it is not allowed to block.

tty_io, netprio_cgroup and selinux flush_unauthorized_files()
converted to its use.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 967bd0dadbe5..e6e418122587 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -979,3 +979,24 @@ int f_dupfd(unsigned int from, struct file *file, unsigned flags)
 	}
 	return err;
 }
+
+int iterate_fd(struct files_struct *files, unsigned n,
+		int (*f)(const void *, struct file *, unsigned),
+		const void *p)
+{
+	struct fdtable *fdt;
+	struct file *file;
+	int res = 0;
+	if (!files)
+		return 0;
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	while (!res && n < fdt->max_fds) {
+		file = rcu_dereference_check_fdtable(files, fdt->fd[n++]);
+		if (file)
+			res = f(p, file, n);
+	}
+	spin_unlock(&files->file_lock);
+	return res;
+}
+EXPORT_SYMBOL(iterate_fd);
-- 
cgit 


From 179e037fc1370288188cb1f90b81156d75a3cb2d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Aug 2012 22:43:47 -0400
Subject: do_coredump(): make sure that descriptor table isn't shared

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 3fc74681cc6c..beb05a95e4a3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2066,6 +2066,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	int retval = 0;
 	int flag = 0;
 	int ispipe;
+	struct files_struct *displaced;
 	bool need_nonrelative = false;
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
@@ -2219,6 +2220,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			goto close_fail;
 	}
 
+	/* get us an unshared descriptor table; almost always a no-op */
+	retval = unshare_files(&displaced);
+	if (retval)
+		goto close_fail;
+	if (displaced)
+		put_files_struct(displaced);
 	retval = binfmt->core_dump(&cprm);
 	if (retval)
 		current->signal->group_exit_code |= 0x80;
-- 
cgit 


From 864bdb3b6cbd9911222543fef1cfe36f88183f44 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 22 Aug 2012 18:42:10 -0400
Subject: new helper: daemonize_descriptors()

descriptor-related parts of daemonize, done right.  As the
result we simplify the locking rules for ->files - we
hold task_lock in *all* cases when we modify ->files.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index e6e418122587..15750b80e3ce 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -519,6 +519,12 @@ struct files_struct init_files = {
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
 
+void daemonize_descriptors(void)
+{
+	atomic_inc(&init_files.count);
+	reset_files_struct(&init_files);
+}
+
 /*
  * allocate a file descriptor, mark it busy.
  */
-- 
cgit 


From faf60af17f8da87e1c87a6be527344791025ce9e Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 23 Aug 2012 14:43:24 +0400
Subject: procfs: Move /proc/pid/fd[info] handling code to fd.[ch]

This patch prepares the ground for further extension of
/proc/pid/fd[info] handling code by moving fdinfo handling
code into fs/proc/fd.c.

I think such move makes both fs/proc/base.c and fs/proc/fd.c
easier to read.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: James Bottomley <jbottomley@parallels.com>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Matthew Helsley <matt.helsley@gmail.com>
CC: "J. Bruce Fields" <bfields@fieldses.org>
CC: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/Makefile   |   2 +-
 fs/proc/base.c     | 388 +----------------------------------------------------
 fs/proc/fd.c       | 351 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/fd.h       |  14 ++
 fs/proc/internal.h |  48 +++++++
 5 files changed, 416 insertions(+), 387 deletions(-)
 create mode 100644 fs/proc/fd.c
 create mode 100644 fs/proc/fd.h

(limited to 'fs')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index c1c729335924..99349efbbc2b 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		proc_tty.o
+		proc_tty.o fd.o
 proc-y	+= cmdline.o
 proc-y	+= consoles.o
 proc-y	+= cpuinfo.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b6c84cbdb73..b55c3bb298e3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -90,6 +90,7 @@
 #endif
 #include <trace/events/oom.h>
 #include "internal.h"
+#include "fd.h"
 
 /* NOTE:
  *	Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = show } )
 
-static int proc_fd_permission(struct inode *inode, int mask);
-
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
  * and .. links.
@@ -1492,7 +1491,7 @@ out:
 	return error;
 }
 
-static const struct inode_operations proc_pid_link_inode_operations = {
+const struct inode_operations proc_pid_link_inode_operations = {
 	.readlink	= proc_pid_readlink,
 	.follow_link	= proc_pid_follow_link,
 	.setattr	= proc_setattr,
@@ -1501,21 +1500,6 @@ static const struct inode_operations proc_pid_link_inode_operations = {
 
 /* building an inode */
 
-static int task_dumpable(struct task_struct *task)
-{
-	int dumpable = 0;
-	struct mm_struct *mm;
-
-	task_lock(task);
-	mm = task->mm;
-	if (mm)
-		dumpable = get_dumpable(mm);
-	task_unlock(task);
-	if(dumpable == 1)
-		return 1;
-	return 0;
-}
-
 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
 	struct inode * inode;
@@ -1641,15 +1625,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
 	return 0;
 }
 
-static int pid_delete_dentry(const struct dentry * dentry)
-{
-	/* Is the task we represent dead?
-	 * If so, then don't put the dentry on the lru list,
-	 * kill it immediately.
-	 */
-	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
 const struct dentry_operations pid_dentry_operations =
 {
 	.d_revalidate	= pid_revalidate,
@@ -1712,289 +1687,6 @@ end_instantiate:
 	return filldir(dirent, name, len, filp->f_pos, ino, type);
 }
 
-static unsigned name_to_int(struct dentry *dentry)
-{
-	const char *name = dentry->d_name.name;
-	int len = dentry->d_name.len;
-	unsigned n = 0;
-
-	if (len > 1 && *name == '0')
-		goto out;
-	while (len-- > 0) {
-		unsigned c = *name++ - '0';
-		if (c > 9)
-			goto out;
-		if (n >= (~0U-9)/10)
-			goto out;
-		n *= 10;
-		n += c;
-	}
-	return n;
-out:
-	return ~0U;
-}
-
-#define PROC_FDINFO_MAX 64
-
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
-{
-	struct task_struct *task = get_proc_task(inode);
-	struct files_struct *files = NULL;
-	struct file *file;
-	int fd = proc_fd(inode);
-
-	if (task) {
-		files = get_files_struct(task);
-		put_task_struct(task);
-	}
-	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
-		spin_lock(&files->file_lock);
-		file = fcheck_files(files, fd);
-		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
-
-			fdt = files_fdtable(files);
-			f_flags = file->f_flags & ~O_CLOEXEC;
-			if (close_on_exec(fd, fdt))
-				f_flags |= O_CLOEXEC;
-
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
-		}
-		spin_unlock(&files->file_lock);
-		put_files_struct(files);
-	}
-	return -ENOENT;
-}
-
-static int proc_fd_link(struct dentry *dentry, struct path *path)
-{
-	return proc_fd_info(dentry->d_inode, path, NULL);
-}
-
-static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
-{
-	struct inode *inode;
-	struct task_struct *task;
-	int fd;
-	struct files_struct *files;
-	const struct cred *cred;
-
-	if (flags & LOOKUP_RCU)
-		return -ECHILD;
-
-	inode = dentry->d_inode;
-	task = get_proc_task(inode);
-	fd = proc_fd(inode);
-
-	if (task) {
-		files = get_files_struct(task);
-		if (files) {
-			struct file *file;
-			rcu_read_lock();
-			file = fcheck_files(files, fd);
-			if (file) {
-				unsigned f_mode = file->f_mode;
-
-				rcu_read_unlock();
-				put_files_struct(files);
-
-				if (task_dumpable(task)) {
-					rcu_read_lock();
-					cred = __task_cred(task);
-					inode->i_uid = cred->euid;
-					inode->i_gid = cred->egid;
-					rcu_read_unlock();
-				} else {
-					inode->i_uid = GLOBAL_ROOT_UID;
-					inode->i_gid = GLOBAL_ROOT_GID;
-				}
-
-				if (S_ISLNK(inode->i_mode)) {
-					unsigned i_mode = S_IFLNK;
-					if (f_mode & FMODE_READ)
-						i_mode |= S_IRUSR | S_IXUSR;
-					if (f_mode & FMODE_WRITE)
-						i_mode |= S_IWUSR | S_IXUSR;
-					inode->i_mode = i_mode;
-				}
-
-				security_task_to_inode(task, inode);
-				put_task_struct(task);
-				return 1;
-			}
-			rcu_read_unlock();
-			put_files_struct(files);
-		}
-		put_task_struct(task);
-	}
-	d_drop(dentry);
-	return 0;
-}
-
-static const struct dentry_operations tid_fd_dentry_operations =
-{
-	.d_revalidate	= tid_fd_revalidate,
-	.d_delete	= pid_delete_dentry,
-};
-
-static struct dentry *proc_fd_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	unsigned fd = (unsigned long)ptr;
- 	struct inode *inode;
- 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-ENOENT);
-
-	inode = proc_pid_make_inode(dir->i_sb, task);
-	if (!inode)
-		goto out;
-	ei = PROC_I(inode);
-	ei->fd = fd;
-
-	inode->i_mode = S_IFLNK;
-	inode->i_op = &proc_pid_link_inode_operations;
-	inode->i_size = 64;
-	ei->op.proc_get_link = proc_fd_link;
-	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, 0))
-		error = NULL;
-
- out:
-	return error;
-}
-
-static struct dentry *proc_lookupfd_common(struct inode *dir,
-					   struct dentry *dentry,
-					   instantiate_t instantiate)
-{
-	struct task_struct *task = get_proc_task(dir);
-	unsigned fd = name_to_int(dentry);
-	struct dentry *result = ERR_PTR(-ENOENT);
-
-	if (!task)
-		goto out_no_task;
-	if (fd == ~0U)
-		goto out;
-
-	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
-out:
-	put_task_struct(task);
-out_no_task:
-	return result;
-}
-
-static int proc_readfd_common(struct file * filp, void * dirent,
-			      filldir_t filldir, instantiate_t instantiate)
-{
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
-	struct task_struct *p = get_proc_task(inode);
-	unsigned int fd, ino;
-	int retval;
-	struct files_struct * files;
-
-	retval = -ENOENT;
-	if (!p)
-		goto out_no_task;
-	retval = 0;
-
-	fd = filp->f_pos;
-	switch (fd) {
-		case 0:
-			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
-				goto out;
-			filp->f_pos++;
-		case 1:
-			ino = parent_ino(dentry);
-			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
-				goto out;
-			filp->f_pos++;
-		default:
-			files = get_files_struct(p);
-			if (!files)
-				goto out;
-			rcu_read_lock();
-			for (fd = filp->f_pos-2;
-			     fd < files_fdtable(files)->max_fds;
-			     fd++, filp->f_pos++) {
-				char name[PROC_NUMBUF];
-				int len;
-				int rv;
-
-				if (!fcheck_files(files, fd))
-					continue;
-				rcu_read_unlock();
-
-				len = snprintf(name, sizeof(name), "%d", fd);
-				rv = proc_fill_cache(filp, dirent, filldir,
-						     name, len, instantiate, p,
-						     (void *)(unsigned long)fd);
-				if (rv < 0)
-					goto out_fd_loop;
-				rcu_read_lock();
-			}
-			rcu_read_unlock();
-out_fd_loop:
-			put_files_struct(files);
-	}
-out:
-	put_task_struct(p);
-out_no_task:
-	return retval;
-}
-
-static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
-				    unsigned int flags)
-{
-	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
-}
-
-static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
-{
-	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
-}
-
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				      size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
-static const struct file_operations proc_fd_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_readfd,
-	.llseek		= default_llseek,
-};
-
 #ifdef CONFIG_CHECKPOINT_RESTORE
 
 /*
@@ -2337,82 +2029,6 @@ static const struct file_operations proc_map_files_operations = {
 
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
-/*
- * /proc/pid/fd needs a special permission handler so that a process can still
- * access /proc/self/fd after it has executed a setuid().
- */
-static int proc_fd_permission(struct inode *inode, int mask)
-{
-	int rv = generic_permission(inode, mask);
-	if (rv == 0)
-		return 0;
-	if (task_pid(current) == proc_pid(inode))
-		rv = 0;
-	return rv;
-}
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fd_inode_operations = {
-	.lookup		= proc_lookupfd,
-	.permission	= proc_fd_permission,
-	.setattr	= proc_setattr,
-};
-
-static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-	unsigned fd = (unsigned long)ptr;
- 	struct inode *inode;
- 	struct proc_inode *ei;
-	struct dentry *error = ERR_PTR(-ENOENT);
-
-	inode = proc_pid_make_inode(dir->i_sb, task);
-	if (!inode)
-		goto out;
-	ei = PROC_I(inode);
-	ei->fd = fd;
-	inode->i_mode = S_IFREG | S_IRUSR;
-	inode->i_fop = &proc_fdinfo_file_operations;
-	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, 0))
-		error = NULL;
-
- out:
-	return error;
-}
-
-static struct dentry *proc_lookupfdinfo(struct inode *dir,
-					struct dentry *dentry,
-					unsigned int flags)
-{
-	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
-}
-
-static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
-{
-	return proc_readfd_common(filp, dirent, filldir,
-				  proc_fdinfo_instantiate);
-}
-
-static const struct file_operations proc_fdinfo_operations = {
-	.read		= generic_read_dir,
-	.readdir	= proc_readfdinfo,
-	.llseek		= default_llseek,
-};
-
-/*
- * proc directories can do almost nothing..
- */
-static const struct inode_operations proc_fdinfo_inode_operations = {
-	.lookup		= proc_lookupfdinfo,
-	.setattr	= proc_setattr,
-};
-
-
 static struct dentry *proc_pident_instantiate(struct inode *dir,
 	struct dentry *dentry, struct task_struct *task, const void *ptr)
 {
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
new file mode 100644
index 000000000000..1b0f932b4bd9
--- /dev/null
+++ b/fs/proc/fd.c
@@ -0,0 +1,351 @@
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/dcache.h>
+#include <linux/path.h>
+#include <linux/fdtable.h>
+#include <linux/namei.h>
+#include <linux/pid.h>
+#include <linux/security.h>
+
+#include <linux/proc_fs.h>
+
+#include "internal.h"
+#include "fd.h"
+
+#define PROC_FDINFO_MAX 64
+
+static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+{
+	struct task_struct *task = get_proc_task(inode);
+	struct files_struct *files = NULL;
+	int fd = proc_fd(inode);
+	struct file *file;
+
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+	if (files) {
+		/*
+		 * We are not taking a ref to the file structure, so we must
+		 * hold ->file_lock.
+		 */
+		spin_lock(&files->file_lock);
+		file = fcheck_files(files, fd);
+		if (file) {
+			unsigned int f_flags;
+			struct fdtable *fdt;
+
+			fdt = files_fdtable(files);
+			f_flags = file->f_flags & ~O_CLOEXEC;
+			if (close_on_exec(fd, fdt))
+				f_flags |= O_CLOEXEC;
+
+			if (path) {
+				*path = file->f_path;
+				path_get(&file->f_path);
+			}
+			if (info)
+				snprintf(info, PROC_FDINFO_MAX,
+					 "pos:\t%lli\n"
+					 "flags:\t0%o\n",
+					 (long long) file->f_pos,
+					 f_flags);
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+			return 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+	return -ENOENT;
+}
+
+static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	struct files_struct *files;
+	struct task_struct *task;
+	const struct cred *cred;
+	struct inode *inode;
+	int fd;
+
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+	fd = proc_fd(inode);
+
+	if (task) {
+		files = get_files_struct(task);
+		if (files) {
+			struct file *file;
+
+			rcu_read_lock();
+			file = fcheck_files(files, fd);
+			if (file) {
+				unsigned f_mode = file->f_mode;
+
+				rcu_read_unlock();
+				put_files_struct(files);
+
+				if (task_dumpable(task)) {
+					rcu_read_lock();
+					cred = __task_cred(task);
+					inode->i_uid = cred->euid;
+					inode->i_gid = cred->egid;
+					rcu_read_unlock();
+				} else {
+					inode->i_uid = GLOBAL_ROOT_UID;
+					inode->i_gid = GLOBAL_ROOT_GID;
+				}
+
+				if (S_ISLNK(inode->i_mode)) {
+					unsigned i_mode = S_IFLNK;
+					if (f_mode & FMODE_READ)
+						i_mode |= S_IRUSR | S_IXUSR;
+					if (f_mode & FMODE_WRITE)
+						i_mode |= S_IWUSR | S_IXUSR;
+					inode->i_mode = i_mode;
+				}
+
+				security_task_to_inode(task, inode);
+				put_task_struct(task);
+				return 1;
+			}
+			rcu_read_unlock();
+			put_files_struct(files);
+		}
+		put_task_struct(task);
+	}
+
+	d_drop(dentry);
+	return 0;
+}
+
+static const struct dentry_operations tid_fd_dentry_operations = {
+	.d_revalidate	= tid_fd_revalidate,
+	.d_delete	= pid_delete_dentry,
+};
+
+static int proc_fd_link(struct dentry *dentry, struct path *path)
+{
+	return proc_fd_info(dentry->d_inode, path, NULL);
+}
+
+static struct dentry *
+proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
+		    struct task_struct *task, const void *ptr)
+{
+	struct dentry *error = ERR_PTR(-ENOENT);
+	unsigned fd = (unsigned long)ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	ei->fd = fd;
+
+	inode->i_mode = S_IFLNK;
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+
+	ei->op.proc_get_link = proc_fd_link;
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	d_add(dentry, inode);
+
+	/* Close the race of the process dying before we return the dentry */
+	if (tid_fd_revalidate(dentry, 0))
+		error = NULL;
+ out:
+	return error;
+}
+
+static struct dentry *proc_lookupfd_common(struct inode *dir,
+					   struct dentry *dentry,
+					   instantiate_t instantiate)
+{
+	struct task_struct *task = get_proc_task(dir);
+	struct dentry *result = ERR_PTR(-ENOENT);
+	unsigned fd = name_to_int(dentry);
+
+	if (!task)
+		goto out_no_task;
+	if (fd == ~0U)
+		goto out;
+
+	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+out:
+	put_task_struct(task);
+out_no_task:
+	return result;
+}
+
+static int proc_readfd_common(struct file * filp, void * dirent,
+			      filldir_t filldir, instantiate_t instantiate)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct task_struct *p = get_proc_task(inode);
+	struct files_struct *files;
+	unsigned int fd, ino;
+	int retval;
+
+	retval = -ENOENT;
+	if (!p)
+		goto out_no_task;
+	retval = 0;
+
+	fd = filp->f_pos;
+	switch (fd) {
+		case 0:
+			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		case 1:
+			ino = parent_ino(dentry);
+			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		default:
+			files = get_files_struct(p);
+			if (!files)
+				goto out;
+			rcu_read_lock();
+			for (fd = filp->f_pos - 2;
+			     fd < files_fdtable(files)->max_fds;
+			     fd++, filp->f_pos++) {
+				char name[PROC_NUMBUF];
+				int len;
+				int rv;
+
+				if (!fcheck_files(files, fd))
+					continue;
+				rcu_read_unlock();
+
+				len = snprintf(name, sizeof(name), "%d", fd);
+				rv = proc_fill_cache(filp, dirent, filldir,
+						     name, len, instantiate, p,
+						     (void *)(unsigned long)fd);
+				if (rv < 0)
+					goto out_fd_loop;
+				rcu_read_lock();
+			}
+			rcu_read_unlock();
+out_fd_loop:
+			put_files_struct(files);
+	}
+out:
+	put_task_struct(p);
+out_no_task:
+	return retval;
+}
+
+static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
+				size_t len, loff_t *ppos)
+{
+	char tmp[PROC_FDINFO_MAX];
+	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
+	if (!err)
+		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
+	return err;
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open           = nonseekable_open,
+	.read		= proc_fdinfo_read,
+	.llseek		= no_llseek,
+};
+
+static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
+}
+
+const struct file_operations proc_fd_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_readfd,
+	.llseek		= default_llseek,
+};
+
+static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
+				    unsigned int flags)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
+}
+
+/*
+ * /proc/pid/fd needs a special permission handler so that a process can still
+ * access /proc/self/fd after it has executed a setuid().
+ */
+int proc_fd_permission(struct inode *inode, int mask)
+{
+	int rv = generic_permission(inode, mask);
+	if (rv == 0)
+		return 0;
+	if (task_pid(current) == proc_pid(inode))
+		rv = 0;
+	return rv;
+}
+
+const struct inode_operations proc_fd_inode_operations = {
+	.lookup		= proc_lookupfd,
+	.permission	= proc_fd_permission,
+	.setattr	= proc_setattr,
+};
+
+static struct dentry *
+proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
+			struct task_struct *task, const void *ptr)
+{
+	struct dentry *error = ERR_PTR(-ENOENT);
+	unsigned fd = (unsigned long)ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		goto out;
+
+	ei = PROC_I(inode);
+	ei->fd = fd;
+
+	inode->i_mode = S_IFREG | S_IRUSR;
+	inode->i_fop = &proc_fdinfo_file_operations;
+
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
+	d_add(dentry, inode);
+
+	/* Close the race of the process dying before we return the dentry */
+	if (tid_fd_revalidate(dentry, 0))
+		error = NULL;
+ out:
+	return error;
+}
+
+static struct dentry *
+proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
+	return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
+}
+
+static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
+{
+	return proc_readfd_common(filp, dirent, filldir,
+				  proc_fdinfo_instantiate);
+}
+
+const struct inode_operations proc_fdinfo_inode_operations = {
+	.lookup		= proc_lookupfdinfo,
+	.setattr	= proc_setattr,
+};
+
+const struct file_operations proc_fdinfo_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_readfdinfo,
+	.llseek		= default_llseek,
+};
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
new file mode 100644
index 000000000000..cbb1d47deda8
--- /dev/null
+++ b/fs/proc/fd.h
@@ -0,0 +1,14 @@
+#ifndef __PROCFS_FD_H__
+#define __PROCFS_FD_H__
+
+#include <linux/fs.h>
+
+extern const struct file_operations proc_fd_operations;
+extern const struct inode_operations proc_fd_inode_operations;
+
+extern const struct file_operations proc_fdinfo_operations;
+extern const struct inode_operations proc_fdinfo_inode_operations;
+
+extern int proc_fd_permission(struct inode *inode, int mask);
+
+#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e1167a1c9126..67925a7bd8cb 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/sched.h>
 #include <linux/proc_fs.h>
 struct  ctl_table_header;
 
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
 extern const struct file_operations proc_net_operations;
 extern const struct inode_operations proc_net_inode_operations;
+extern const struct inode_operations proc_pid_link_inode_operations;
 
 struct proc_maps_private {
 	struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode)
 	return PROC_I(inode)->fd;
 }
 
+static inline int task_dumpable(struct task_struct *task)
+{
+	int dumpable = 0;
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		dumpable = get_dumpable(mm);
+	task_unlock(task);
+	if(dumpable == 1)
+		return 1;
+	return 0;
+}
+
+static inline int pid_delete_dentry(const struct dentry * dentry)
+{
+	/* Is the task we represent dead?
+	 * If so, then don't put the dentry on the lru list,
+	 * kill it immediately.
+	 */
+	return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
+static inline unsigned name_to_int(struct dentry *dentry)
+{
+	const char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	unsigned n = 0;
+
+	if (len > 1 && *name == '0')
+		goto out;
+	while (len-- > 0) {
+		unsigned c = *name++ - '0';
+		if (c > 9)
+			goto out;
+		if (n >= (~0U-9)/10)
+			goto out;
+		n *= 10;
+		n += c;
+	}
+	return n;
+out:
+	return ~0U;
+}
+
 struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
 		struct dentry *dentry);
 int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
-- 
cgit 


From ddd3e0771bc7b869c550687c204e21f0155d5496 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Sun, 26 Aug 2012 18:28:20 +0400
Subject: procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
is needed to extend seq operations and plug in auxiliary fdinfo provides
from subsystems like eventfd/eventpoll/fsnotify.

Note the proc_fd_link no longer call for proc_fd_info, simply because
the guts of proc_fd_info() got merged into ->show() of that seq_file

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/fd.c | 112 ++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 64 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 1b0f932b4bd9..9cef449c0f76 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -6,61 +6,68 @@
 #include <linux/namei.h>
 #include <linux/pid.h>
 #include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>
 
 #include <linux/proc_fs.h>
 
 #include "internal.h"
 #include "fd.h"
 
-#define PROC_FDINFO_MAX 64
-
-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int seq_show(struct seq_file *m, void *v)
 {
-	struct task_struct *task = get_proc_task(inode);
 	struct files_struct *files = NULL;
-	int fd = proc_fd(inode);
-	struct file *file;
+	int f_flags = 0, ret = -ENOENT;
+	struct file *file = NULL;
+	struct task_struct *task;
+
+	task = get_proc_task(m->private);
+	if (!task)
+		return -ENOENT;
+
+	files = get_files_struct(task);
+	put_task_struct(task);
 
-	if (task) {
-		files = get_files_struct(task);
-		put_task_struct(task);
-	}
 	if (files) {
-		/*
-		 * We are not taking a ref to the file structure, so we must
-		 * hold ->file_lock.
-		 */
+		int fd = proc_fd(m->private);
+
 		spin_lock(&files->file_lock);
 		file = fcheck_files(files, fd);
 		if (file) {
-			unsigned int f_flags;
-			struct fdtable *fdt;
+			struct fdtable *fdt = files_fdtable(files);
 
-			fdt = files_fdtable(files);
 			f_flags = file->f_flags & ~O_CLOEXEC;
 			if (close_on_exec(fd, fdt))
 				f_flags |= O_CLOEXEC;
 
-			if (path) {
-				*path = file->f_path;
-				path_get(&file->f_path);
-			}
-			if (info)
-				snprintf(info, PROC_FDINFO_MAX,
-					 "pos:\t%lli\n"
-					 "flags:\t0%o\n",
-					 (long long) file->f_pos,
-					 f_flags);
-			spin_unlock(&files->file_lock);
-			put_files_struct(files);
-			return 0;
+			get_file(file);
+			ret = 0;
 		}
 		spin_unlock(&files->file_lock);
 		put_files_struct(files);
 	}
-	return -ENOENT;
+
+	if (!ret) {
+                seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+			   (long long)file->f_pos, f_flags);
+		fput(file);
+	}
+
+	return ret;
 }
 
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, seq_show, inode);
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+	.open		= seq_fdinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct files_struct *files;
@@ -130,7 +137,32 @@ static const struct dentry_operations tid_fd_dentry_operations = {
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(dentry->d_inode, path, NULL);
+	struct files_struct *files = NULL;
+	struct task_struct *task;
+	int ret = -ENOENT;
+
+	task = get_proc_task(dentry->d_inode);
+	if (task) {
+		files = get_files_struct(task);
+		put_task_struct(task);
+	}
+
+	if (files) {
+		int fd = proc_fd(dentry->d_inode);
+		struct file *fd_file;
+
+		spin_lock(&files->file_lock);
+		fd_file = fcheck_files(files, fd);
+		if (fd_file) {
+			*path = fd_file->f_path;
+			path_get(&fd_file->f_path);
+			ret = 0;
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	}
+
+	return ret;
 }
 
 static struct dentry *
@@ -245,22 +277,6 @@ out_no_task:
 	return retval;
 }
 
-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
-				size_t len, loff_t *ppos)
-{
-	char tmp[PROC_FDINFO_MAX];
-	int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
-	if (!err)
-		err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
-	return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
-	.open           = nonseekable_open,
-	.read		= proc_fdinfo_read,
-	.llseek		= no_llseek,
-};
-
 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
 {
 	return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
-- 
cgit 


From c6f3d81115989e274c42a852222b80d2e14ced6f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 11:01:04 -0400
Subject: don't leak O_CLOEXEC into ->f_flags

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c    | 2 +-
 fs/proc/fd.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 30760017deed..03028d0e7487 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -814,7 +814,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
 		op->mode = 0;
 
 	/* Must never be set by userspace */
-	flags &= ~FMODE_NONOTIFY;
+	flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
 
 	/*
 	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 9cef449c0f76..f28a875f8779 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -36,7 +36,7 @@ static int seq_show(struct seq_file *m, void *v)
 		if (file) {
 			struct fdtable *fdt = files_fdtable(files);
 
-			f_flags = file->f_flags & ~O_CLOEXEC;
+			f_flags = file->f_flags;
 			if (close_on_exec(fd, fdt))
 				f_flags |= O_CLOEXEC;
 
-- 
cgit 


From f6d2ac5ca79d1fd468dbc77e488aec06e86f3035 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 12:55:54 -0400
Subject: namei.c: fix BS comment

get_write_access() is needed for nfsd, not binfmt_aout (the latter
has no business doing anything of that kind, of course)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index dd1ed1b8e98e..c5b85b3523c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3971,7 +3971,7 @@ EXPORT_SYMBOL(user_path_at);
 EXPORT_SYMBOL(follow_down_one);
 EXPORT_SYMBOL(follow_down);
 EXPORT_SYMBOL(follow_up);
-EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
+EXPORT_SYMBOL(get_write_access); /* nfsd */
 EXPORT_SYMBOL(getname);
 EXPORT_SYMBOL(lock_rename);
 EXPORT_SYMBOL(lookup_one_len);
-- 
cgit 


From bf2965d5b5950d09e934ea5d961d79d0ed1fae7e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 20:13:36 -0400
Subject: switch ftruncate(2) to fget_light

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 03028d0e7487..9f61d7269d39 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -132,16 +132,16 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 
 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 {
-	struct inode * inode;
+	struct inode *inode;
 	struct dentry *dentry;
-	struct file * file;
-	int error;
+	struct file *file;
+	int error, fput_needed;
 
 	error = -EINVAL;
 	if (length < 0)
 		goto out;
 	error = -EBADF;
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (!file)
 		goto out;
 
@@ -172,7 +172,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
 	sb_end_write(inode->i_sb);
 out_putf:
-	fput(file);
+	fput_light(file, fput_needed);
 out:
 	return error;
 }
-- 
cgit 


From 6b48c5b2079af1f81d8f249ae07a988d8c45b32f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 20:15:40 -0400
Subject: switch fallocate(2) to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 9f61d7269d39..da6d3f1ac243 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -277,12 +277,12 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
 {
 	struct file *file;
-	int error = -EBADF;
+	int error = -EBADF, fput_needed;
 
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (file) {
 		error = do_fallocate(file, mode, offset, len);
-		fput(file);
+		fput_light(file, fput_needed);
 	}
 
 	return error;
-- 
cgit 


From d6483b7a78438bc333560d11b69e6a6a6cf55940 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 20:22:10 -0400
Subject: switch fchmod(2) to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index da6d3f1ac243..3c741eae6b99 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -582,23 +582,21 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
 
 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 {
-	struct file * file;
-	int error = -EBADF;
-	struct dentry * dentry;
+	struct file *file;
+	int error = -EBADF, fput_needed;
 
-	file = fget(fd);
+	file = fget_light(fd, &fput_needed);
 	if (!file)
 		goto out;
 
 	error = mnt_want_write_file(file);
 	if (error)
 		goto out_fput;
-	dentry = file->f_path.dentry;
-	audit_inode(NULL, dentry);
+	audit_inode(NULL, file->f_path.dentry);
 	error = chown_common(&file->f_path, user, group);
 	mnt_drop_write_file(file);
 out_fput:
-	fput(file);
+	fput_light(file, fput_needed);
 out:
 	return error;
 }
-- 
cgit 


From 399c9b862f853f5c33e5a3b1f9a3c2507bdd526b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 21:00:03 -0400
Subject: ext4: close struct file leak on EXT4_IOC_MOVE_EXT

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ioctl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7f7dad787603..a0ee682dc394 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -258,7 +258,8 @@ group_extend_out:
 			       EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
 			ext4_msg(sb, KERN_ERR,
 				 "Online defrag not supported with bigalloc");
-			return -EOPNOTSUPP;
+			err = -EOPNOTSUPP;
+			goto mext_out;
 		}
 
 		err = mnt_want_write_file(filp);
-- 
cgit 


From 4557c669ef9801d96cf663331cdd1dcb8fa9c2f1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 28 Aug 2012 10:19:41 -0400
Subject: export fget_light

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 15750b80e3ce..0f1bda4bebfa 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -797,6 +797,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
 
 	return file;
 }
+EXPORT_SYMBOL(fget_light);
 
 struct file *fget_raw_light(unsigned int fd, int *fput_needed)
 {
-- 
cgit 


From 6bdf2954016ef7c1f4d4fa07a338ee197d9c3506 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 21:01:46 -0400
Subject: switch EXT4_IOC_MOVE_EXT to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a0ee682dc394..39646f224514 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -234,7 +234,7 @@ group_extend_out:
 	case EXT4_IOC_MOVE_EXT: {
 		struct move_extent me;
 		struct file *donor_filp;
-		int err;
+		int err, fput_needed;
 
 		if (!(filp->f_mode & FMODE_READ) ||
 		    !(filp->f_mode & FMODE_WRITE))
@@ -245,7 +245,7 @@ group_extend_out:
 			return -EFAULT;
 		me.moved_len = 0;
 
-		donor_filp = fget(me.donor_fd);
+		donor_filp = fget_light(me.donor_fd, &fput_needed);
 		if (!donor_filp)
 			return -EBADF;
 
@@ -274,7 +274,7 @@ group_extend_out:
 				 &me, sizeof(me)))
 			err = -EFAULT;
 mext_out:
-		fput(donor_filp);
+		fput_light(donor_filp, fput_needed);
 		return err;
 	}
 
-- 
cgit 


From ecd188159efa112770d5f8a4a62f8d3586784f48 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 21:20:24 -0400
Subject: switch btrfs_ioctl_snap_create_transid() to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9df50fa8a078..3c88abb0e265 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1422,7 +1422,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 				     NULL, transid, readonly, inherit);
 	} else {
 		struct inode *src_inode;
-		src_file = fget(fd);
+		int fput_needed;
+		src_file = fget_light(fd, &fput_needed);
 		if (!src_file) {
 			ret = -EINVAL;
 			goto out_drop_write;
@@ -1433,13 +1434,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 			printk(KERN_INFO "btrfs: Snapshot src from "
 			       "another FS\n");
 			ret = -EINVAL;
-			fput(src_file);
-			goto out_drop_write;
+		} else {
+			ret = btrfs_mksubvol(&file->f_path, name, namelen,
+					     BTRFS_I(src_inode)->root,
+					     transid, readonly, inherit);
 		}
-		ret = btrfs_mksubvol(&file->f_path, name, namelen,
-				     BTRFS_I(src_inode)->root,
-				     transid, readonly, inherit);
-		fput(src_file);
+		fput_light(src_file, fput_needed);
 	}
 out_drop_write:
 	mnt_drop_write_file(file);
-- 
cgit 


From 5e196a9cf557dbc2bf808824c6c4998150e18d06 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 21:27:40 -0400
Subject: switch epoll_wait(2) to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/eventpoll.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index eedec84c1809..567ae72e155d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1809,7 +1809,7 @@ error_return:
 SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 		int, maxevents, int, timeout)
 {
-	int error;
+	int error, fput_needed;
 	struct file *file;
 	struct eventpoll *ep;
 
@@ -1825,7 +1825,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 
 	/* Get the "struct file *" for the eventpoll file */
 	error = -EBADF;
-	file = fget(epfd);
+	file = fget_light(epfd, &fput_needed);
 	if (!file)
 		goto error_return;
 
@@ -1847,7 +1847,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 	error = ep_poll(ep, events, maxevents, timeout);
 
 error_fput:
-	fput(file);
+	fput_light(file, fput_needed);
 error_return:
 
 	return error;
-- 
cgit 


From 4109633f4c4dcdaedf0d85ae74dba334760c577b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Aug 2012 21:32:02 -0400
Subject: switch timerfd_[sg]ettime(2) to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/timerfd.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index dffeb3795af1..dd91e9420c9e 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -234,15 +234,15 @@ static const struct file_operations timerfd_fops = {
 	.llseek		= noop_llseek,
 };
 
-static struct file *timerfd_fget(int fd)
+static struct file *timerfd_fget(int fd, int *fput_needed)
 {
 	struct file *file;
 
-	file = fget(fd);
+	file = fget_light(fd, fput_needed);
 	if (!file)
 		return ERR_PTR(-EBADF);
 	if (file->f_op != &timerfd_fops) {
-		fput(file);
+		fput_light(file, *fput_needed);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -287,7 +287,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	struct file *file;
 	struct timerfd_ctx *ctx;
 	struct itimerspec ktmr, kotmr;
-	int ret;
+	int ret, fput_needed;
 
 	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
 		return -EFAULT;
@@ -297,7 +297,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	    !timespec_valid(&ktmr.it_interval))
 		return -EINVAL;
 
-	file = timerfd_fget(ufd);
+	file = timerfd_fget(ufd, &fput_needed);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 	ctx = file->private_data;
@@ -334,7 +334,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	ret = timerfd_setup(ctx, flags, &ktmr);
 
 	spin_unlock_irq(&ctx->wqh.lock);
-	fput(file);
+	fput_light(file, fput_needed);
 	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
 		return -EFAULT;
 
@@ -346,8 +346,9 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 	struct file *file;
 	struct timerfd_ctx *ctx;
 	struct itimerspec kotmr;
+	int fput_needed;
 
-	file = timerfd_fget(ufd);
+	file = timerfd_fget(ufd, &fput_needed);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 	ctx = file->private_data;
@@ -362,7 +363,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
 	spin_unlock_irq(&ctx->wqh.lock);
-	fput(file);
+	fput_light(file, fput_needed);
 
 	return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
 }
-- 
cgit 


From 8319aa9127a1282b24c3ece473a058d246f35b0d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 03:18:55 -0400
Subject: switch btrfs_ioctl_clone() to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3c88abb0e265..3494f2f44167 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2350,7 +2350,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	struct btrfs_key key;
 	u32 nritems;
 	int slot;
-	int ret;
+	int ret, fput_needed;
 	u64 len = olen;
 	u64 bs = root->fs_info->sb->s_blocksize;
 	u64 hint_byte;
@@ -2376,7 +2376,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	if (ret)
 		return ret;
 
-	src_file = fget(srcfd);
+	src_file = fget_light(srcfd, &fput_needed);
 	if (!src_file) {
 		ret = -EBADF;
 		goto out_drop_write;
@@ -2724,7 +2724,7 @@ out_unlock:
 	vfree(buf);
 	btrfs_free_path(path);
 out_fput:
-	fput(src_file);
+	fput_light(src_file, fput_needed);
 out_drop_write:
 	mnt_drop_write_file(file);
 	return ret;
-- 
cgit 


From 78f7d75e5dd9aa1027e90d0b71d394603933c2ed Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 12:54:13 -0400
Subject: switch coda get_device_index() to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coda/inode.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index f1813120d753..bd2313d106e5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -109,41 +109,39 @@ static int get_device_index(struct coda_mount_data *data)
 {
 	struct file *file;
 	struct inode *inode;
-	int idx;
+	int idx, fput_needed;
 
-	if(data == NULL) {
+	if (data == NULL) {
 		printk("coda_read_super: Bad mount data\n");
 		return -1;
 	}
 
-	if(data->version != CODA_MOUNT_VERSION) {
+	if (data->version != CODA_MOUNT_VERSION) {
 		printk("coda_read_super: Bad mount version\n");
 		return -1;
 	}
 
-	file = fget(data->fd);
-	inode = NULL;
-	if(file)
-		inode = file->f_path.dentry->d_inode;
-	
-	if(!inode || !S_ISCHR(inode->i_mode) ||
-	   imajor(inode) != CODA_PSDEV_MAJOR) {
-		if(file)
-			fput(file);
-
-		printk("coda_read_super: Bad file\n");
-		return -1;
+	file = fget_light(data->fd, &fput_needed);
+	if (!file)
+		goto Ebadf;
+	inode = file->f_path.dentry->d_inode;
+	if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
+		fput_light(file, fput_needed);
+		goto Ebadf;
 	}
 
 	idx = iminor(inode);
-	fput(file);
+	fput_light(file, fput_needed);
 
-	if(idx < 0 || idx >= MAX_CODADEVS) {
+	if (idx < 0 || idx >= MAX_CODADEVS) {
 		printk("coda_read_super: Bad minor number\n");
 		return -1;
 	}
 
 	return idx;
+Ebadf:
+	printk("coda_read_super: Bad file\n");
+	return -1;
 }
 
 static int coda_fill_super(struct super_block *sb, void *data, int silent)
-- 
cgit 


From 1ea65c96077f9bb5c0e5e224a4da751d269c5f94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 12:57:12 -0400
Subject: switch xfs_swapext() to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/xfs/xfs_dfrag.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e00de08dc8ac..e6cdf224d7ea 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -49,10 +49,10 @@ xfs_swapext(
 {
 	xfs_inode_t     *ip, *tip;
 	struct file	*file, *tmp_file;
-	int		error = 0;
+	int		error = 0, fput_needed, fput_needed_tmp;
 
 	/* Pull information for the target fd */
-	file = fget((int)sxp->sx_fdtarget);
+	file = fget_light((int)sxp->sx_fdtarget, &fput_needed);
 	if (!file) {
 		error = XFS_ERROR(EINVAL);
 		goto out;
@@ -65,7 +65,7 @@ xfs_swapext(
 		goto out_put_file;
 	}
 
-	tmp_file = fget((int)sxp->sx_fdtmp);
+	tmp_file = fget_light((int)sxp->sx_fdtmp, &fput_needed_tmp);
 	if (!tmp_file) {
 		error = XFS_ERROR(EINVAL);
 		goto out_put_file;
@@ -105,9 +105,9 @@ xfs_swapext(
 	error = xfs_swap_extents(ip, tip, sxp);
 
  out_put_tmp_file:
-	fput(tmp_file);
+	fput_light(tmp_file, fput_needed_tmp);
  out_put_file:
-	fput(file);
+	fput_light(file, fput_needed);
  out:
 	return error;
 }
-- 
cgit 


From 64e09fa2e1fef1696a8685c7aad7e0d3dd24ce71 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 12:59:52 -0400
Subject: switch xfs_find_handle() to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/xfs/xfs_ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c3b6d9..21483eac402d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -72,11 +72,11 @@ xfs_find_handle(
 	struct inode		*inode;
 	struct file		*file = NULL;
 	struct path		path;
-	int			error;
+	int			error, fput_needed;
 	struct xfs_inode	*ip;
 
 	if (cmd == XFS_IOC_FD_TO_HANDLE) {
-		file = fget(hreq->fd);
+		file = fget_light(hreq->fd, &fput_needed);
 		if (!file)
 			return -EBADF;
 		inode = file->f_path.dentry->d_inode;
@@ -134,7 +134,7 @@ xfs_find_handle(
 
  out_put:
 	if (cmd == XFS_IOC_FD_TO_HANDLE)
-		fput(file);
+		fput_light(file, fput_needed);
 	else
 		path_put(&path);
 	return error;
-- 
cgit 


From cb0942b81249798e15c3f04eee2946ef543e8115 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 14:48:26 -0400
Subject: make get_file() return its argument

simplifies a bunch of callers...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/autofs4/waitq.c  | 3 +--
 fs/fuse/dev.c       | 3 +--
 fs/nfsd/nfs4state.c | 3 +--
 fs/proc/base.c      | 3 +--
 fs/select.c         | 3 +--
 5 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7b..dce436e595c1 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
 		return;
 	}
 
-	pipe = sbi->pipe;
-	get_file(pipe);
+	pipe = get_file(sbi->pipe);
 
 	mutex_unlock(&sbi->wq_mutex);
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f4246cfc8d87..8c23fa7a91e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
 		if (ff->reserved_req) {
 			req = ff->reserved_req;
 			ff->reserved_req = NULL;
-			get_file(file);
-			req->stolen_file = file;
+			req->stolen_file = get_file(file);
 		}
 		spin_unlock(&fc->lock);
 	} while (!req);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cc894eda385a..48a1bad37334 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2837,8 +2837,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
 		return -ENOMEM;
 	}
 	fp->fi_lease = fl;
-	fp->fi_deleg_file = fl->fl_file;
-	get_file(fp->fi_deleg_file);
+	fp->fi_deleg_file = get_file(fl->fl_file);
 	atomic_set(&fp->fi_delegees, 1);
 	list_add(&dp->dl_perfile, &fp->fi_delegations);
 	return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b55c3bb298e3..f1e8438d21b5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1979,8 +1979,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				if (++pos <= filp->f_pos)
 					continue;
 
-				get_file(vma->vm_file);
-				info.file = vma->vm_file;
+				info.file = get_file(vma->vm_file);
 				info.len = snprintf(info.name,
 						sizeof(info.name), "%lx-%lx",
 						vma->vm_start, vma->vm_end);
diff --git a/fs/select.c b/fs/select.c
index db14c781335e..ffdd16d6e691 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 	struct poll_table_entry *entry = poll_get_entry(pwq);
 	if (!entry)
 		return;
-	get_file(filp);
-	entry->filp = filp;
+	entry->filp = get_file(filp);
 	entry->wait_address = wait_address;
 	entry->key = p->_key;
 	init_waitqueue_func_entry(&entry->wait, pollwake);
-- 
cgit 


From 7b540d0646ce122f0ba4520412be91e530719742 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 14:55:26 -0400
Subject: proc_map_files_readdir(): don't bother with grabbing files

all we need is their ->f_mode, so just collect _that_

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/base.c | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index f1e8438d21b5..df18db61d6d8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1805,7 +1805,7 @@ out:
 }
 
 struct map_files_info {
-	struct file	*file;
+	fmode_t		mode;
 	unsigned long	len;
 	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
 };
@@ -1814,13 +1814,10 @@ static struct dentry *
 proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
 			   struct task_struct *task, const void *ptr)
 {
-	const struct file *file = ptr;
+	fmode_t mode = (fmode_t)(unsigned long)ptr;
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	if (!file)
-		return ERR_PTR(-ENOENT);
-
 	inode = proc_pid_make_inode(dir->i_sb, task);
 	if (!inode)
 		return ERR_PTR(-ENOENT);
@@ -1832,9 +1829,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
 	inode->i_size = 64;
 	inode->i_mode = S_IFLNK;
 
-	if (file->f_mode & FMODE_READ)
+	if (mode & FMODE_READ)
 		inode->i_mode |= S_IRUSR;
-	if (file->f_mode & FMODE_WRITE)
+	if (mode & FMODE_WRITE)
 		inode->i_mode |= S_IWUSR;
 
 	d_set_d_op(dentry, &tid_map_files_dentry_operations);
@@ -1878,7 +1875,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
 	if (!vma)
 		goto out_no_vma;
 
-	result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
+	result = proc_map_files_instantiate(dir, dentry, task,
+			(void *)(unsigned long)vma->vm_file->f_mode);
 
 out_no_vma:
 	up_read(&mm->mmap_sem);
@@ -1979,7 +1977,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
 				if (++pos <= filp->f_pos)
 					continue;
 
-				info.file = get_file(vma->vm_file);
+				info.mode = vma->vm_file->f_mode;
 				info.len = snprintf(info.name,
 						sizeof(info.name), "%lx-%lx",
 						vma->vm_start, vma->vm_end);
@@ -1994,19 +1992,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			ret = proc_fill_cache(filp, dirent, filldir,
 					      p->name, p->len,
 					      proc_map_files_instantiate,
-					      task, p->file);
+					      task,
+					      (void *)(unsigned long)p->mode);
 			if (ret)
 				break;
 			filp->f_pos++;
-			fput(p->file);
-		}
-		for (; i < nr_files; i++) {
-			/*
-			 * In case of error don't forget
-			 * to put rest of file refs.
-			 */
-			p = flex_array_get(fa, i);
-			fput(p->file);
 		}
 		if (fa)
 			flex_array_free(fa);
-- 
cgit 


From 2a117354b7bdfe13750a64307755e75436fb157a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Aug 2012 17:55:17 -0400
Subject: switch o2hb_region_dev_write() to fget_light()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/cluster/heartbeat.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a4e855e3690e..61c28ae266f5 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1750,6 +1750,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	struct inode *inode = NULL;
 	ssize_t ret = -EINVAL;
 	int live_threshold;
+	int fput_needed;
 
 	if (reg->hr_bdev)
 		goto out;
@@ -1766,7 +1767,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	if (fd < 0 || fd >= INT_MAX)
 		goto out;
 
-	filp = fget(fd);
+	filp = fget_light(fd, &fput_needed);
 	if (filp == NULL)
 		goto out;
 
@@ -1884,7 +1885,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 
 out:
 	if (filp)
-		fput(filp);
+		fput_light(filp, fput_needed);
 	if (inode)
 		iput(inode);
 	if (ret < 0) {
-- 
cgit 


From 2903ff019b346ab8d36ebbf54853c3aaf6590608 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 28 Aug 2012 12:52:22 -0400
Subject: switch simple cases of fget_light to fdget

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c                   |  26 +++---
 fs/coda/inode.c                    |  14 +--
 fs/compat.c                        |  90 +++++++++----------
 fs/compat_ioctl.c                  |  27 +++---
 fs/eventpoll.c                     |  25 +++---
 fs/ext4/ioctl.c                    |  14 +--
 fs/fcntl.c                         |  32 +++----
 fs/fhandle.c                       |  17 ++--
 fs/ioctl.c                         |  25 ++----
 fs/locks.c                         |  20 ++---
 fs/namei.c                         |  39 ++++----
 fs/notify/fanotify/fanotify_user.c |  28 +++---
 fs/notify/inotify/inotify_user.c   |  28 +++---
 fs/ocfs2/cluster/heartbeat.c       |  39 ++++----
 fs/open.c                          |  64 +++++++-------
 fs/read_write.c                    | 176 ++++++++++++++++---------------------
 fs/readdir.c                       |  36 ++++----
 fs/select.c                        |  28 +++---
 fs/signalfd.c                      |  13 ++-
 fs/splice.c                        |  69 +++++++--------
 fs/stat.c                          |  10 +--
 fs/statfs.c                        |   9 +-
 fs/sync.c                          |  33 +++----
 fs/timerfd.c                       |  48 +++++-----
 fs/utimes.c                        |  11 ++-
 fs/xattr.c                         |  52 +++++------
 fs/xfs/xfs_dfrag.c                 |  36 ++++----
 fs/xfs/xfs_ioctl.c                 |  12 +--
 28 files changed, 459 insertions(+), 562 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3494f2f44167..0a4f0c8bc58f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 				u64 *transid, bool readonly,
 				struct btrfs_qgroup_inherit **inherit)
 {
-	struct file *src_file;
 	int namelen;
 	int ret = 0;
 
@@ -1421,15 +1420,14 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 		ret = btrfs_mksubvol(&file->f_path, name, namelen,
 				     NULL, transid, readonly, inherit);
 	} else {
+		struct fd src = fdget(fd);
 		struct inode *src_inode;
-		int fput_needed;
-		src_file = fget_light(fd, &fput_needed);
-		if (!src_file) {
+		if (!src.file) {
 			ret = -EINVAL;
 			goto out_drop_write;
 		}
 
-		src_inode = src_file->f_path.dentry->d_inode;
+		src_inode = src.file->f_path.dentry->d_inode;
 		if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
 			printk(KERN_INFO "btrfs: Snapshot src from "
 			       "another FS\n");
@@ -1439,7 +1437,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 					     BTRFS_I(src_inode)->root,
 					     transid, readonly, inherit);
 		}
-		fput_light(src_file, fput_needed);
+		fdput(src);
 	}
 out_drop_write:
 	mnt_drop_write_file(file);
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 {
 	struct inode *inode = fdentry(file)->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct file *src_file;
+	struct fd src_file;
 	struct inode *src;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_path *path;
@@ -2350,7 +2348,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	struct btrfs_key key;
 	u32 nritems;
 	int slot;
-	int ret, fput_needed;
+	int ret;
 	u64 len = olen;
 	u64 bs = root->fs_info->sb->s_blocksize;
 	u64 hint_byte;
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	if (ret)
 		return ret;
 
-	src_file = fget_light(srcfd, &fput_needed);
-	if (!src_file) {
+	src_file = fdget(srcfd);
+	if (!src_file.file) {
 		ret = -EBADF;
 		goto out_drop_write;
 	}
 
 	ret = -EXDEV;
-	if (src_file->f_path.mnt != file->f_path.mnt)
+	if (src_file.file->f_path.mnt != file->f_path.mnt)
 		goto out_fput;
 
-	src = src_file->f_dentry->d_inode;
+	src = src_file.file->f_dentry->d_inode;
 
 	ret = -EINVAL;
 	if (src == inode)
 		goto out_fput;
 
 	/* the src must be open for reading */
-	if (!(src_file->f_mode & FMODE_READ))
+	if (!(src_file.file->f_mode & FMODE_READ))
 		goto out_fput;
 
 	/* don't make the dst file partly checksummed */
@@ -2724,7 +2722,7 @@ out_unlock:
 	vfree(buf);
 	btrfs_free_path(path);
 out_fput:
-	fput_light(src_file, fput_needed);
+	fdput(src_file);
 out_drop_write:
 	mnt_drop_write_file(file);
 	return ret;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index bd2313d106e5..d315c6c5891a 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -107,9 +107,9 @@ static const struct super_operations coda_super_operations =
 
 static int get_device_index(struct coda_mount_data *data)
 {
-	struct file *file;
+	struct fd f;
 	struct inode *inode;
-	int idx, fput_needed;
+	int idx;
 
 	if (data == NULL) {
 		printk("coda_read_super: Bad mount data\n");
@@ -121,17 +121,17 @@ static int get_device_index(struct coda_mount_data *data)
 		return -1;
 	}
 
-	file = fget_light(data->fd, &fput_needed);
-	if (!file)
+	f = fdget(data->fd);
+	if (!f.file)
 		goto Ebadf;
-	inode = file->f_path.dentry->d_inode;
+	inode = f.file->f_path.dentry->d_inode;
 	if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
-		fput_light(file, fput_needed);
+		fdput(f);
 		goto Ebadf;
 	}
 
 	idx = iminor(inode);
-	fput_light(file, fput_needed);
+	fdput(f);
 
 	if (idx < 0 || idx >= MAX_CODADEVS) {
 		printk("coda_read_super: Bad minor number\n");
diff --git a/fs/compat.c b/fs/compat.c
index 1bdb350ea5d3..d72d51e19025 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
 	struct compat_old_linux_dirent __user *dirent, unsigned int count)
 {
 	int error;
-	struct file *file;
-	int fput_needed;
+	struct fd f = fdget(fd);
 	struct compat_readdir_callback buf;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		return -EBADF;
 
 	buf.result = 0;
 	buf.dirent = dirent;
 
-	error = vfs_readdir(file, compat_fillonedir, &buf);
+	error = vfs_readdir(f.file, compat_fillonedir, &buf);
 	if (buf.result)
 		error = buf.result;
 
-	fput_light(file, fput_needed);
+	fdput(f);
 	return error;
 }
 
@@ -949,17 +947,16 @@ efault:
 asmlinkage long compat_sys_getdents(unsigned int fd,
 		struct compat_linux_dirent __user *dirent, unsigned int count)
 {
-	struct file * file;
+	struct fd f;
 	struct compat_linux_dirent __user * lastdirent;
 	struct compat_getdents_callback buf;
-	int fput_needed;
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
 		return -EFAULT;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		return -EBADF;
 
 	buf.current_dir = dirent;
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 	buf.count = count;
 	buf.error = 0;
 
-	error = vfs_readdir(file, compat_filldir, &buf);
+	error = vfs_readdir(f.file, compat_filldir, &buf);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		if (put_user(file->f_pos, &lastdirent->d_off))
+		if (put_user(f.file->f_pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
 	}
-	fput_light(file, fput_needed);
+	fdput(f);
 	return error;
 }
 
@@ -1035,17 +1032,16 @@ efault:
 asmlinkage long compat_sys_getdents64(unsigned int fd,
 		struct linux_dirent64 __user * dirent, unsigned int count)
 {
-	struct file * file;
+	struct fd f;
 	struct linux_dirent64 __user * lastdirent;
 	struct compat_getdents_callback64 buf;
-	int fput_needed;
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
 		return -EFAULT;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		return -EBADF;
 
 	buf.current_dir = dirent;
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 	buf.count = count;
 	buf.error = 0;
 
-	error = vfs_readdir(file, compat_filldir64, &buf);
+	error = vfs_readdir(f.file, compat_filldir64, &buf);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		typeof(lastdirent->d_off) d_off = file->f_pos;
+		typeof(lastdirent->d_off) d_off = f.file->f_pos;
 		if (__put_user_unaligned(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
 	}
-	fput_light(file, fput_needed);
+	fdput(f);
 	return error;
 }
 #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
@@ -1152,18 +1148,16 @@ asmlinkage ssize_t
 compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
 		 unsigned long vlen)
 {
-	struct file *file;
-	int fput_needed;
+	struct fd f = fdget(fd);
 	ssize_t ret;
 	loff_t pos;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		return -EBADF;
-	pos = file->f_pos;
-	ret = compat_readv(file, vec, vlen, &pos);
-	file->f_pos = pos;
-	fput_light(file, fput_needed);
+	pos = f.file->f_pos;
+	ret = compat_readv(f.file, vec, vlen, &pos);
+	f.file->f_pos = pos;
+	fdput(f);
 	return ret;
 }
 
@@ -1171,19 +1165,18 @@ asmlinkage ssize_t
 compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
 		    unsigned long vlen, loff_t pos)
 {
-	struct file *file;
-	int fput_needed;
+	struct fd f;
 	ssize_t ret;
 
 	if (pos < 0)
 		return -EINVAL;
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		return -EBADF;
 	ret = -ESPIPE;
-	if (file->f_mode & FMODE_PREAD)
-		ret = compat_readv(file, vec, vlen, &pos);
-	fput_light(file, fput_needed);
+	if (f.file->f_mode & FMODE_PREAD)
+		ret = compat_readv(f.file, vec, vlen, &pos);
+	fdput(f);
 	return ret;
 }
 
@@ -1221,18 +1214,16 @@ asmlinkage ssize_t
 compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
 		  unsigned long vlen)
 {
-	struct file *file;
-	int fput_needed;
+	struct fd f = fdget(fd);
 	ssize_t ret;
 	loff_t pos;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		return -EBADF;
-	pos = file->f_pos;
-	ret = compat_writev(file, vec, vlen, &pos);
-	file->f_pos = pos;
-	fput_light(file, fput_needed);
+	pos = f.file->f_pos;
+	ret = compat_writev(f.file, vec, vlen, &pos);
+	f.file->f_pos = pos;
+	fdput(f);
 	return ret;
 }
 
@@ -1240,19 +1231,18 @@ asmlinkage ssize_t
 compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
 		     unsigned long vlen, loff_t pos)
 {
-	struct file *file;
-	int fput_needed;
+	struct fd f;
 	ssize_t ret;
 
 	if (pos < 0)
 		return -EINVAL;
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		return -EBADF;
 	ret = -ESPIPE;
-	if (file->f_mode & FMODE_PWRITE)
-		ret = compat_writev(file, vec, vlen, &pos);
-	fput_light(file, fput_needed);
+	if (f.file->f_mode & FMODE_PWRITE)
+		ret = compat_writev(f.file, vec, vlen, &pos);
+	fdput(f);
 	return ret;
 }
 
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index debdfe0fc809..48f1987bec34 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1531,16 +1531,13 @@ static int compat_ioctl_check_table(unsigned int xcmd)
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				unsigned long arg)
 {
-	struct file *filp;
+	struct fd f = fdget(fd);
 	int error = -EBADF;
-	int fput_needed;
-
-	filp = fget_light(fd, &fput_needed);
-	if (!filp)
+	if (!f.file)
 		goto out;
 
 	/* RED-PEN how should LSM module know it's handling 32bit? */
-	error = security_file_ioctl(filp, cmd, arg);
+	error = security_file_ioctl(f.file, cmd, arg);
 	if (error)
 		goto out_fput;
 
@@ -1560,30 +1557,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
 	case FS_IOC_RESVSP_32:
 	case FS_IOC_RESVSP64_32:
-		error = compat_ioctl_preallocate(filp, compat_ptr(arg));
+		error = compat_ioctl_preallocate(f.file, compat_ptr(arg));
 		goto out_fput;
 #else
 	case FS_IOC_RESVSP:
 	case FS_IOC_RESVSP64:
-		error = ioctl_preallocate(filp, compat_ptr(arg));
+		error = ioctl_preallocate(f.file, compat_ptr(arg));
 		goto out_fput;
 #endif
 
 	case FIBMAP:
 	case FIGETBSZ:
 	case FIONREAD:
-		if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+		if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
 			break;
 		/*FALL THROUGH*/
 
 	default:
-		if (filp->f_op && filp->f_op->compat_ioctl) {
-			error = filp->f_op->compat_ioctl(filp, cmd, arg);
+		if (f.file->f_op && f.file->f_op->compat_ioctl) {
+			error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
 			if (error != -ENOIOCTLCMD)
 				goto out_fput;
 		}
 
-		if (!filp->f_op || !filp->f_op->unlocked_ioctl)
+		if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
 			goto do_ioctl;
 		break;
 	}
@@ -1591,7 +1588,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 	if (compat_ioctl_check_table(XFORM(cmd)))
 		goto found_handler;
 
-	error = do_ioctl_trans(fd, cmd, arg, filp);
+	error = do_ioctl_trans(fd, cmd, arg, f.file);
 	if (error == -ENOIOCTLCMD)
 		error = -ENOTTY;
 
@@ -1600,9 +1597,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
  found_handler:
 	arg = (unsigned long)compat_ptr(arg);
  do_ioctl:
-	error = do_vfs_ioctl(filp, fd, cmd, arg);
+	error = do_vfs_ioctl(f.file, fd, cmd, arg);
  out_fput:
-	fput_light(filp, fput_needed);
+	fdput(f);
  out:
 	return error;
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 567ae72e155d..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1809,8 +1809,8 @@ error_return:
 SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 		int, maxevents, int, timeout)
 {
-	int error, fput_needed;
-	struct file *file;
+	int error;
+	struct fd f;
 	struct eventpoll *ep;
 
 	/* The maximum number of event must be greater than zero */
@@ -1818,38 +1818,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 		return -EINVAL;
 
 	/* Verify that the area passed by the user is writeable */
-	if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) {
-		error = -EFAULT;
-		goto error_return;
-	}
+	if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
+		return -EFAULT;
 
 	/* Get the "struct file *" for the eventpoll file */
-	error = -EBADF;
-	file = fget_light(epfd, &fput_needed);
-	if (!file)
-		goto error_return;
+	f = fdget(epfd);
+	if (!f.file)
+		return -EBADF;
 
 	/*
 	 * We have to check that the file structure underneath the fd
 	 * the user passed to us _is_ an eventpoll file.
 	 */
 	error = -EINVAL;
-	if (!is_file_epoll(file))
+	if (!is_file_epoll(f.file))
 		goto error_fput;
 
 	/*
 	 * At this point it is safe to assume that the "private_data" contains
 	 * our own data structure.
 	 */
-	ep = file->private_data;
+	ep = f.file->private_data;
 
 	/* Time to fish for events ... */
 	error = ep_poll(ep, events, maxevents, timeout);
 
 error_fput:
-	fput_light(file, fput_needed);
-error_return:
-
+	fdput(f);
 	return error;
 }
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 39646f224514..5439d6a56e99 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -233,8 +233,8 @@ group_extend_out:
 
 	case EXT4_IOC_MOVE_EXT: {
 		struct move_extent me;
-		struct file *donor_filp;
-		int err, fput_needed;
+		struct fd donor;
+		int err;
 
 		if (!(filp->f_mode & FMODE_READ) ||
 		    !(filp->f_mode & FMODE_WRITE))
@@ -245,11 +245,11 @@ group_extend_out:
 			return -EFAULT;
 		me.moved_len = 0;
 
-		donor_filp = fget_light(me.donor_fd, &fput_needed);
-		if (!donor_filp)
+		donor = fdget(me.donor_fd);
+		if (!donor.file)
 			return -EBADF;
 
-		if (!(donor_filp->f_mode & FMODE_WRITE)) {
+		if (!(donor.file->f_mode & FMODE_WRITE)) {
 			err = -EBADF;
 			goto mext_out;
 		}
@@ -266,7 +266,7 @@ group_extend_out:
 		if (err)
 			goto mext_out;
 
-		err = ext4_move_extents(filp, donor_filp, me.orig_start,
+		err = ext4_move_extents(filp, donor.file, me.orig_start,
 					me.donor_start, me.len, &me.moved_len);
 		mnt_drop_write_file(filp);
 
@@ -274,7 +274,7 @@ group_extend_out:
 				 &me, sizeof(me)))
 			err = -EFAULT;
 mext_out:
-		fput_light(donor_filp, fput_needed);
+		fdput(donor);
 		return err;
 	}
 
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 40a5bfb72cca..91af39a33af7 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -348,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd)
 
 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 {	
-	struct file *filp;
-	int fput_needed;
+	struct fd f = fdget_raw(fd);
 	long err = -EBADF;
 
-	filp = fget_raw_light(fd, &fput_needed);
-	if (!filp)
+	if (!f.file)
 		goto out;
 
-	if (unlikely(filp->f_mode & FMODE_PATH)) {
+	if (unlikely(f.file->f_mode & FMODE_PATH)) {
 		if (!check_fcntl_cmd(cmd))
 			goto out1;
 	}
 
-	err = security_file_fcntl(filp, cmd, arg);
+	err = security_file_fcntl(f.file, cmd, arg);
 	if (!err)
-		err = do_fcntl(fd, cmd, arg, filp);
+		err = do_fcntl(fd, cmd, arg, f.file);
 
 out1:
- 	fput_light(filp, fput_needed);
+ 	fdput(f);
 out:
 	return err;
 }
@@ -375,38 +373,36 @@ out:
 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 		unsigned long, arg)
 {	
-	struct file * filp;
+	struct fd f = fdget_raw(fd);
 	long err = -EBADF;
-	int fput_needed;
 
-	filp = fget_raw_light(fd, &fput_needed);
-	if (!filp)
+	if (!f.file)
 		goto out;
 
-	if (unlikely(filp->f_mode & FMODE_PATH)) {
+	if (unlikely(f.file->f_mode & FMODE_PATH)) {
 		if (!check_fcntl_cmd(cmd))
 			goto out1;
 	}
 
-	err = security_file_fcntl(filp, cmd, arg);
+	err = security_file_fcntl(f.file, cmd, arg);
 	if (err)
 		goto out1;
 	
 	switch (cmd) {
 		case F_GETLK64:
-			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
+			err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
 			break;
 		case F_SETLK64:
 		case F_SETLKW64:
-			err = fcntl_setlk64(fd, filp, cmd,
+			err = fcntl_setlk64(fd, f.file, cmd,
 					(struct flock64 __user *) arg);
 			break;
 		default:
-			err = do_fcntl(fd, cmd, arg, filp);
+			err = do_fcntl(fd, cmd, arg, f.file);
 			break;
 	}
 out1:
-	fput_light(filp, fput_needed);
+	fdput(f);
 out:
 	return err;
 }
diff --git a/fs/fhandle.c b/fs/fhandle.c
index a48e4a139be1..f775bfdd6e4a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
 
 static struct vfsmount *get_vfsmount_from_fd(int fd)
 {
-	struct path path;
+	struct vfsmount *mnt;
 
 	if (fd == AT_FDCWD) {
 		struct fs_struct *fs = current->fs;
 		spin_lock(&fs->lock);
-		path = fs->pwd;
-		mntget(path.mnt);
+		mnt = mntget(fs->pwd.mnt);
 		spin_unlock(&fs->lock);
 	} else {
-		int fput_needed;
-		struct file *file = fget_light(fd, &fput_needed);
-		if (!file)
+		struct fd f = fdget(fd);
+		if (!f.file)
 			return ERR_PTR(-EBADF);
-		path = file->f_path;
-		mntget(path.mnt);
-		fput_light(file, fput_needed);
+		mnt = mntget(f.file->f_path.mnt);
+		fdput(f);
 	}
-	return path.mnt;
+	return mnt;
 }
 
 static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29167bebe874..3bdad6d1f268 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 
 SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 {
-	struct file *filp;
-	int error = -EBADF;
-	int fput_needed;
-
-	filp = fget_light(fd, &fput_needed);
-	if (!filp)
-		goto out;
-
-	error = security_file_ioctl(filp, cmd, arg);
-	if (error)
-		goto out_fput;
-
-	error = do_vfs_ioctl(filp, fd, cmd, arg);
- out_fput:
-	fput_light(filp, fput_needed);
- out:
+	int error;
+	struct fd f = fdget(fd);
+
+	if (!f.file)
+		return -EBADF;
+	error = security_file_ioctl(f.file, cmd, arg);
+	if (!error)
+		error = do_vfs_ioctl(f.file, fd, cmd, arg);
+	fdput(f);
 	return error;
 }
diff --git a/fs/locks.c b/fs/locks.c
index 7e81bfc75164..abc7dc6c490b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait);
  */
 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 {
-	struct file *filp;
-	int fput_needed;
+	struct fd f = fdget(fd);
 	struct file_lock *lock;
 	int can_sleep, unlock;
 	int error;
 
 	error = -EBADF;
-	filp = fget_light(fd, &fput_needed);
-	if (!filp)
+	if (!f.file)
 		goto out;
 
 	can_sleep = !(cmd & LOCK_NB);
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	unlock = (cmd == LOCK_UN);
 
 	if (!unlock && !(cmd & LOCK_MAND) &&
-	    !(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
+	    !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
 		goto out_putf;
 
-	error = flock_make_lock(filp, &lock, cmd);
+	error = flock_make_lock(f.file, &lock, cmd);
 	if (error)
 		goto out_putf;
 	if (can_sleep)
 		lock->fl_flags |= FL_SLEEP;
 
-	error = security_file_lock(filp, lock->fl_type);
+	error = security_file_lock(f.file, lock->fl_type);
 	if (error)
 		goto out_free;
 
-	if (filp->f_op && filp->f_op->flock)
-		error = filp->f_op->flock(filp,
+	if (f.file->f_op && f.file->f_op->flock)
+		error = f.file->f_op->flock(f.file,
 					  (can_sleep) ? F_SETLKW : F_SETLK,
 					  lock);
 	else
-		error = flock_lock_file_wait(filp, lock);
+		error = flock_lock_file_wait(f.file, lock);
 
  out_free:
 	locks_free_lock(lock);
 
  out_putf:
-	fput_light(filp, fput_needed);
+	fdput(f);
  out:
 	return error;
 }
diff --git a/fs/namei.c b/fs/namei.c
index c5b85b3523c0..e1c7072c7afa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1797,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 		     struct nameidata *nd, struct file **fp)
 {
 	int retval = 0;
-	int fput_needed;
-	struct file *file;
 
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED;
@@ -1850,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags,
 			get_fs_pwd(current->fs, &nd->path);
 		}
 	} else {
+		struct fd f = fdget_raw(dfd);
 		struct dentry *dentry;
 
-		file = fget_raw_light(dfd, &fput_needed);
-		retval = -EBADF;
-		if (!file)
-			goto out_fail;
+		if (!f.file)
+			return -EBADF;
 
-		dentry = file->f_path.dentry;
+		dentry = f.file->f_path.dentry;
 
 		if (*name) {
-			retval = -ENOTDIR;
-			if (!S_ISDIR(dentry->d_inode->i_mode))
-				goto fput_fail;
+			if (!S_ISDIR(dentry->d_inode->i_mode)) {
+				fdput(f);
+				return -ENOTDIR;
+			}
 
 			retval = inode_permission(dentry->d_inode, MAY_EXEC);
-			if (retval)
-				goto fput_fail;
+			if (retval) {
+				fdput(f);
+				return retval;
+			}
 		}
 
-		nd->path = file->f_path;
+		nd->path = f.file->f_path;
 		if (flags & LOOKUP_RCU) {
-			if (fput_needed)
-				*fp = file;
+			if (f.need_put)
+				*fp = f.file;
 			nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
 			lock_rcu_walk();
 		} else {
-			path_get(&file->f_path);
-			fput_light(file, fput_needed);
+			path_get(&nd->path);
+			fdput(f);
 		}
 	}
 
 	nd->inode = nd->path.dentry->d_inode;
 	return 0;
-
-fput_fail:
-	fput_light(file, fput_needed);
-out_fail:
-	return retval;
 }
 
 static inline int lookup_last(struct nameidata *nd, struct path *path)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ea48693940f1..721d692fa8d4 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -451,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename,
 		 dfd, filename, flags);
 
 	if (filename == NULL) {
-		struct file *file;
-		int fput_needed;
+		struct fd f = fdget(dfd);
 
 		ret = -EBADF;
-		file = fget_light(dfd, &fput_needed);
-		if (!file)
+		if (!f.file)
 			goto out;
 
 		ret = -ENOTDIR;
 		if ((flags & FAN_MARK_ONLYDIR) &&
-		    !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
-			fput_light(file, fput_needed);
+		    !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
+			fdput(f);
 			goto out;
 		}
 
-		*path = file->f_path;
+		*path = f.file->f_path;
 		path_get(path);
-		fput_light(file, fput_needed);
+		fdput(f);
 	} else {
 		unsigned int lookup_flags = 0;
 
@@ -748,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	struct inode *inode = NULL;
 	struct vfsmount *mnt = NULL;
 	struct fsnotify_group *group;
-	struct file *filp;
+	struct fd f;
 	struct path path;
-	int ret, fput_needed;
+	int ret;
 
 	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
 		 __func__, fanotify_fd, flags, dfd, pathname, mask);
@@ -784,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 #endif
 		return -EINVAL;
 
-	filp = fget_light(fanotify_fd, &fput_needed);
-	if (unlikely(!filp))
+	f = fdget(fanotify_fd);
+	if (unlikely(!f.file))
 		return -EBADF;
 
 	/* verify that this is indeed an fanotify instance */
 	ret = -EINVAL;
-	if (unlikely(filp->f_op != &fanotify_fops))
+	if (unlikely(f.file->f_op != &fanotify_fops))
 		goto fput_and_out;
-	group = filp->private_data;
+	group = f.file->private_data;
 
 	/*
 	 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
@@ -839,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 
 	path_put(&path);
 fput_and_out:
-	fput_light(filp, fput_needed);
+	fdput(f);
 	return ret;
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985c..c311dda054a3 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	struct fsnotify_group *group;
 	struct inode *inode;
 	struct path path;
-	struct file *filp;
-	int ret, fput_needed;
+	struct fd f;
+	int ret;
 	unsigned flags = 0;
 
-	filp = fget_light(fd, &fput_needed);
-	if (unlikely(!filp))
+	f = fdget(fd);
+	if (unlikely(!f.file))
 		return -EBADF;
 
 	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
+	if (unlikely(f.file->f_op != &inotify_fops)) {
 		ret = -EINVAL;
 		goto fput_and_out;
 	}
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 
 	/* inode held in place by reference to path; group by fget on fd */
 	inode = path.dentry->d_inode;
-	group = filp->private_data;
+	group = f.file->private_data;
 
 	/* create/update an inode mark */
 	ret = inotify_update_watch(group, inode, mask);
 	path_put(&path);
 fput_and_out:
-	fput_light(filp, fput_needed);
+	fdput(f);
 	return ret;
 }
 
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
 	struct fsnotify_group *group;
 	struct inotify_inode_mark *i_mark;
-	struct file *filp;
-	int ret = 0, fput_needed;
+	struct fd f;
+	int ret = 0;
 
-	filp = fget_light(fd, &fput_needed);
-	if (unlikely(!filp))
+	f = fdget(fd);
+	if (unlikely(!f.file))
 		return -EBADF;
 
 	/* verify that this is indeed an inotify instance */
 	ret = -EINVAL;
-	if (unlikely(filp->f_op != &inotify_fops))
+	if (unlikely(f.file->f_op != &inotify_fops))
 		goto out;
 
-	group = filp->private_data;
+	group = f.file->private_data;
 
 	ret = -EINVAL;
 	i_mark = inotify_idr_find(group, wd);
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 	fsnotify_put_mark(&i_mark->fsn_mark);
 
 out:
-	fput_light(filp, fput_needed);
+	fdput(f);
 	return ret;
 }
 
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 61c28ae266f5..f7c648d7d6bf 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1746,11 +1746,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	long fd;
 	int sectsize;
 	char *p = (char *)page;
-	struct file *filp = NULL;
-	struct inode *inode = NULL;
+	struct fd f;
+	struct inode *inode;
 	ssize_t ret = -EINVAL;
 	int live_threshold;
-	int fput_needed;
 
 	if (reg->hr_bdev)
 		goto out;
@@ -1767,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	if (fd < 0 || fd >= INT_MAX)
 		goto out;
 
-	filp = fget_light(fd, &fput_needed);
-	if (filp == NULL)
+	f = fdget(fd);
+	if (f.file == NULL)
 		goto out;
 
 	if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
 	    reg->hr_block_bytes == 0)
-		goto out;
+		goto out2;
 
-	inode = igrab(filp->f_mapping->host);
+	inode = igrab(f.file->f_mapping->host);
 	if (inode == NULL)
-		goto out;
+		goto out2;
 
 	if (!S_ISBLK(inode->i_mode))
-		goto out;
+		goto out3;
 
-	reg->hr_bdev = I_BDEV(filp->f_mapping->host);
+	reg->hr_bdev = I_BDEV(f.file->f_mapping->host);
 	ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
 	if (ret) {
 		reg->hr_bdev = NULL;
-		goto out;
+		goto out3;
 	}
 	inode = NULL;
 
@@ -1798,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 		     "blocksize %u incorrect for device, expected %d",
 		     reg->hr_block_bytes, sectsize);
 		ret = -EINVAL;
-		goto out;
+		goto out3;
 	}
 
 	o2hb_init_region_params(reg);
@@ -1812,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	ret = o2hb_map_slot_data(reg);
 	if (ret) {
 		mlog_errno(ret);
-		goto out;
+		goto out3;
 	}
 
 	ret = o2hb_populate_slot_data(reg);
 	if (ret) {
 		mlog_errno(ret);
-		goto out;
+		goto out3;
 	}
 
 	INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
@@ -1848,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	if (IS_ERR(hb_task)) {
 		ret = PTR_ERR(hb_task);
 		mlog_errno(ret);
-		goto out;
+		goto out3;
 	}
 
 	spin_lock(&o2hb_live_lock);
@@ -1864,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 
 	if (reg->hr_aborted_start) {
 		ret = -EIO;
-		goto out;
+		goto out3;
 	}
 
 	/* Ok, we were woken.  Make sure it wasn't by drop_item() */
@@ -1883,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 		printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
 		       config_item_name(&reg->hr_item), reg->hr_dev_name);
 
+out3:
+	iput(inode);
+out2:
+	fdput(f);
 out:
-	if (filp)
-		fput_light(filp, fput_needed);
-	if (inode)
-		iput(inode);
 	if (ret < 0) {
 		if (reg->hr_bdev) {
 			blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
diff --git a/fs/open.c b/fs/open.c
index 3c741eae6b99..85603262d8db 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -134,25 +134,25 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 {
 	struct inode *inode;
 	struct dentry *dentry;
-	struct file *file;
-	int error, fput_needed;
+	struct fd f;
+	int error;
 
 	error = -EINVAL;
 	if (length < 0)
 		goto out;
 	error = -EBADF;
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		goto out;
 
 	/* explicitly opened as large or we are on 64-bit box */
-	if (file->f_flags & O_LARGEFILE)
+	if (f.file->f_flags & O_LARGEFILE)
 		small = 0;
 
-	dentry = file->f_path.dentry;
+	dentry = f.file->f_path.dentry;
 	inode = dentry->d_inode;
 	error = -EINVAL;
-	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
+	if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
 		goto out_putf;
 
 	error = -EINVAL;
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 		goto out_putf;
 
 	sb_start_write(inode->i_sb);
-	error = locks_verify_truncate(inode, file, length);
+	error = locks_verify_truncate(inode, f.file, length);
 	if (!error)
-		error = security_path_truncate(&file->f_path);
+		error = security_path_truncate(&f.file->f_path);
 	if (!error)
-		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
 	sb_end_write(inode->i_sb);
 out_putf:
-	fput_light(file, fput_needed);
+	fdput(f);
 out:
 	return error;
 }
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 
 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
 {
-	struct file *file;
-	int error = -EBADF, fput_needed;
+	struct fd f = fdget(fd);
+	int error = -EBADF;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
-		error = do_fallocate(file, mode, offset, len);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		error = do_fallocate(f.file, mode, offset, len);
+		fdput(f);
 	}
-
 	return error;
 }
 
@@ -400,16 +398,15 @@ out:
 
 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 {
-	struct file *file;
+	struct fd f = fdget_raw(fd);
 	struct inode *inode;
-	int error, fput_needed;
+	int error = -EBADF;
 
 	error = -EBADF;
-	file = fget_raw_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		goto out;
 
-	inode = file->f_path.dentry->d_inode;
+	inode = f.file->f_path.dentry->d_inode;
 
 	error = -ENOTDIR;
 	if (!S_ISDIR(inode->i_mode))
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 
 	error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
 	if (!error)
-		set_fs_pwd(current->fs, &file->f_path);
+		set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
-	fput_light(file, fput_needed);
+	fdput(f);
 out:
 	return error;
 }
@@ -582,21 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
 
 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 {
-	struct file *file;
-	int error = -EBADF, fput_needed;
+	struct fd f = fdget(fd);
+	int error = -EBADF;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		goto out;
 
-	error = mnt_want_write_file(file);
+	error = mnt_want_write_file(f.file);
 	if (error)
 		goto out_fput;
-	audit_inode(NULL, file->f_path.dentry);
-	error = chown_common(&file->f_path, user, group);
-	mnt_drop_write_file(file);
+	audit_inode(NULL, f.file->f_path.dentry);
+	error = chown_common(&f.file->f_path, user, group);
+	mnt_drop_write_file(f.file);
 out_fput:
-	fput_light(file, fput_needed);
+	fdput(f);
 out:
 	return error;
 }
diff --git a/fs/read_write.c b/fs/read_write.c
index 1adfb691e4f1..28b38279a219 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek);
 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
 {
 	off_t retval;
-	struct file * file;
-	int fput_needed;
-
-	retval = -EBADF;
-	file = fget_light(fd, &fput_needed);
-	if (!file)
-		goto bad;
+	struct fd f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
 
 	retval = -EINVAL;
 	if (origin <= SEEK_MAX) {
-		loff_t res = vfs_llseek(file, offset, origin);
+		loff_t res = vfs_llseek(f.file, offset, origin);
 		retval = res;
 		if (res != (loff_t)retval)
 			retval = -EOVERFLOW;	/* LFS: should only happen on 32 bit platforms */
 	}
-	fput_light(file, fput_needed);
-bad:
+	fdput(f);
 	return retval;
 }
 
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 		unsigned int, origin)
 {
 	int retval;
-	struct file * file;
+	struct fd f = fdget(fd);
 	loff_t offset;
-	int fput_needed;
 
-	retval = -EBADF;
-	file = fget_light(fd, &fput_needed);
-	if (!file)
-		goto bad;
+	if (!f.file)
+		return -EBADF;
 
 	retval = -EINVAL;
 	if (origin > SEEK_MAX)
 		goto out_putf;
 
-	offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
+	offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
 			origin);
 
 	retval = (int)offset;
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
 			retval = 0;
 	}
 out_putf:
-	fput_light(file, fput_needed);
-bad:
+	fdput(f);
 	return retval;
 }
 #endif
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos)
 
 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
 {
-	struct file *file;
+	struct fd f = fdget(fd);
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
-		loff_t pos = file_pos_read(file);
-		ret = vfs_read(file, buf, count, &pos);
-		file_pos_write(file, pos);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		loff_t pos = file_pos_read(f.file);
+		ret = vfs_read(f.file, buf, count, &pos);
+		file_pos_write(f.file, pos);
+		fdput(f);
 	}
-
 	return ret;
 }
 
 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 		size_t, count)
 {
-	struct file *file;
+	struct fd f = fdget(fd);
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
-		loff_t pos = file_pos_read(file);
-		ret = vfs_write(file, buf, count, &pos);
-		file_pos_write(file, pos);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		loff_t pos = file_pos_read(f.file);
+		ret = vfs_write(f.file, buf, count, &pos);
+		file_pos_write(f.file, pos);
+		fdput(f);
 	}
 
 	return ret;
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
 			size_t count, loff_t pos)
 {
-	struct file *file;
+	struct fd f;
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
 	if (pos < 0)
 		return -EINVAL;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
+	f = fdget(fd);
+	if (f.file) {
 		ret = -ESPIPE;
-		if (file->f_mode & FMODE_PREAD)
-			ret = vfs_read(file, buf, count, &pos);
-		fput_light(file, fput_needed);
+		if (f.file->f_mode & FMODE_PREAD)
+			ret = vfs_read(f.file, buf, count, &pos);
+		fdput(f);
 	}
 
 	return ret;
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64);
 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
 			 size_t count, loff_t pos)
 {
-	struct file *file;
+	struct fd f;
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
 	if (pos < 0)
 		return -EINVAL;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
+	f = fdget(fd);
+	if (f.file) {
 		ret = -ESPIPE;
-		if (file->f_mode & FMODE_PWRITE)  
-			ret = vfs_write(file, buf, count, &pos);
-		fput_light(file, fput_needed);
+		if (f.file->f_mode & FMODE_PWRITE)  
+			ret = vfs_write(f.file, buf, count, &pos);
+		fdput(f);
 	}
 
 	return ret;
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev);
 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 		unsigned long, vlen)
 {
-	struct file *file;
+	struct fd f = fdget(fd);
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
-		loff_t pos = file_pos_read(file);
-		ret = vfs_readv(file, vec, vlen, &pos);
-		file_pos_write(file, pos);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		loff_t pos = file_pos_read(f.file);
+		ret = vfs_readv(f.file, vec, vlen, &pos);
+		file_pos_write(f.file, pos);
+		fdput(f);
 	}
 
 	if (ret > 0)
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
 		unsigned long, vlen)
 {
-	struct file *file;
+	struct fd f = fdget(fd);
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
-		loff_t pos = file_pos_read(file);
-		ret = vfs_writev(file, vec, vlen, &pos);
-		file_pos_write(file, pos);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		loff_t pos = file_pos_read(f.file);
+		ret = vfs_writev(f.file, vec, vlen, &pos);
+		file_pos_write(f.file, pos);
+		fdput(f);
 	}
 
 	if (ret > 0)
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
 		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 {
 	loff_t pos = pos_from_hilo(pos_h, pos_l);
-	struct file *file;
+	struct fd f;
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
 	if (pos < 0)
 		return -EINVAL;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
+	f = fdget(fd);
+	if (f.file) {
 		ret = -ESPIPE;
-		if (file->f_mode & FMODE_PREAD)
-			ret = vfs_readv(file, vec, vlen, &pos);
-		fput_light(file, fput_needed);
+		if (f.file->f_mode & FMODE_PREAD)
+			ret = vfs_readv(f.file, vec, vlen, &pos);
+		fdput(f);
 	}
 
 	if (ret > 0)
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 		unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
 {
 	loff_t pos = pos_from_hilo(pos_h, pos_l);
-	struct file *file;
+	struct fd f;
 	ssize_t ret = -EBADF;
-	int fput_needed;
 
 	if (pos < 0)
 		return -EINVAL;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
+	f = fdget(fd);
+	if (f.file) {
 		ret = -ESPIPE;
-		if (file->f_mode & FMODE_PWRITE)
-			ret = vfs_writev(file, vec, vlen, &pos);
-		fput_light(file, fput_needed);
+		if (f.file->f_mode & FMODE_PWRITE)
+			ret = vfs_writev(f.file, vec, vlen, &pos);
+		fdput(f);
 	}
 
 	if (ret > 0)
@@ -887,28 +865,28 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 			   size_t count, loff_t max)
 {
-	struct file * in_file, * out_file;
-	struct inode * in_inode, * out_inode;
+	struct fd in, out;
+	struct inode *in_inode, *out_inode;
 	loff_t pos;
 	ssize_t retval;
-	int fput_needed_in, fput_needed_out, fl;
+	int fl;
 
 	/*
 	 * Get input file, and verify that it is ok..
 	 */
 	retval = -EBADF;
-	in_file = fget_light(in_fd, &fput_needed_in);
-	if (!in_file)
+	in = fdget(in_fd);
+	if (!in.file)
 		goto out;
-	if (!(in_file->f_mode & FMODE_READ))
+	if (!(in.file->f_mode & FMODE_READ))
 		goto fput_in;
 	retval = -ESPIPE;
 	if (!ppos)
-		ppos = &in_file->f_pos;
+		ppos = &in.file->f_pos;
 	else
-		if (!(in_file->f_mode & FMODE_PREAD))
+		if (!(in.file->f_mode & FMODE_PREAD))
 			goto fput_in;
-	retval = rw_verify_area(READ, in_file, ppos, count);
+	retval = rw_verify_area(READ, in.file, ppos, count);
 	if (retval < 0)
 		goto fput_in;
 	count = retval;
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	 * Get output file, and verify that it is ok..
 	 */
 	retval = -EBADF;
-	out_file = fget_light(out_fd, &fput_needed_out);
-	if (!out_file)
+	out = fdget(out_fd);
+	if (!out.file)
 		goto fput_in;
-	if (!(out_file->f_mode & FMODE_WRITE))
+	if (!(out.file->f_mode & FMODE_WRITE))
 		goto fput_out;
 	retval = -EINVAL;
-	in_inode = in_file->f_path.dentry->d_inode;
-	out_inode = out_file->f_path.dentry->d_inode;
-	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
+	in_inode = in.file->f_path.dentry->d_inode;
+	out_inode = out.file->f_path.dentry->d_inode;
+	retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
 	if (retval < 0)
 		goto fput_out;
 	count = retval;
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 	 * and the application is arguably buggy if it doesn't expect
 	 * EAGAIN on a non-blocking file descriptor.
 	 */
-	if (in_file->f_flags & O_NONBLOCK)
+	if (in.file->f_flags & O_NONBLOCK)
 		fl = SPLICE_F_NONBLOCK;
 #endif
-	retval = do_splice_direct(in_file, ppos, out_file, count, fl);
+	retval = do_splice_direct(in.file, ppos, out.file, count, fl);
 
 	if (retval > 0) {
 		add_rchar(current, retval);
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 		retval = -EOVERFLOW;
 
 fput_out:
-	fput_light(out_file, fput_needed_out);
+	fdput(out);
 fput_in:
-	fput_light(in_file, fput_needed_in);
+	fdput(in);
 out:
 	return retval;
 }
diff --git a/fs/readdir.c b/fs/readdir.c
index 39e3370d79cf..5e69ef533b77 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
 		struct old_linux_dirent __user *, dirent, unsigned int, count)
 {
 	int error;
-	struct file * file;
+	struct fd f = fdget(fd);
 	struct readdir_callback buf;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		return -EBADF;
 
 	buf.result = 0;
 	buf.dirent = dirent;
 
-	error = vfs_readdir(file, fillonedir, &buf);
+	error = vfs_readdir(f.file, fillonedir, &buf);
 	if (buf.result)
 		error = buf.result;
 
-	fput_light(file, fput_needed);
+	fdput(f);
 	return error;
 }
 
@@ -191,17 +189,16 @@ efault:
 SYSCALL_DEFINE3(getdents, unsigned int, fd,
 		struct linux_dirent __user *, dirent, unsigned int, count)
 {
-	struct file * file;
+	struct fd f;
 	struct linux_dirent __user * lastdirent;
 	struct getdents_callback buf;
-	int fput_needed;
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
 		return -EFAULT;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		return -EBADF;
 
 	buf.current_dir = dirent;
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 	buf.count = count;
 	buf.error = 0;
 
-	error = vfs_readdir(file, filldir, &buf);
+	error = vfs_readdir(f.file, filldir, &buf);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		if (put_user(file->f_pos, &lastdirent->d_off))
+		if (put_user(f.file->f_pos, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
 	}
-	fput_light(file, fput_needed);
+	fdput(f);
 	return error;
 }
 
@@ -272,17 +269,16 @@ efault:
 SYSCALL_DEFINE3(getdents64, unsigned int, fd,
 		struct linux_dirent64 __user *, dirent, unsigned int, count)
 {
-	struct file * file;
+	struct fd f;
 	struct linux_dirent64 __user * lastdirent;
 	struct getdents_callback64 buf;
-	int fput_needed;
 	int error;
 
 	if (!access_ok(VERIFY_WRITE, dirent, count))
 		return -EFAULT;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		return -EBADF;
 
 	buf.current_dir = dirent;
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
 	buf.count = count;
 	buf.error = 0;
 
-	error = vfs_readdir(file, filldir64, &buf);
+	error = vfs_readdir(f.file, filldir64, &buf);
 	if (error >= 0)
 		error = buf.error;
 	lastdirent = buf.previous;
 	if (lastdirent) {
-		typeof(lastdirent->d_off) d_off = file->f_pos;
+		typeof(lastdirent->d_off) d_off = f.file->f_pos;
 		if (__put_user(d_off, &lastdirent->d_off))
 			error = -EFAULT;
 		else
 			error = count - buf.count;
 	}
-	fput_light(file, fput_needed);
+	fdput(f);
 	return error;
 }
diff --git a/fs/select.c b/fs/select.c
index ffdd16d6e691..2ef72d965036 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -428,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
 			unsigned long in, out, ex, all_bits, bit = 1, mask, j;
 			unsigned long res_in = 0, res_out = 0, res_ex = 0;
-			const struct file_operations *f_op = NULL;
-			struct file *file = NULL;
 
 			in = *inp++; out = *outp++; ex = *exp++;
 			all_bits = in | out | ex;
@@ -439,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 			}
 
 			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
-				int fput_needed;
+				struct fd f;
 				if (i >= n)
 					break;
 				if (!(bit & all_bits))
 					continue;
-				file = fget_light(i, &fput_needed);
-				if (file) {
-					f_op = file->f_op;
+				f = fdget(i);
+				if (f.file) {
+					const struct file_operations *f_op;
+					f_op = f.file->f_op;
 					mask = DEFAULT_POLLMASK;
 					if (f_op && f_op->poll) {
 						wait_key_set(wait, in, out, bit);
-						mask = (*f_op->poll)(file, wait);
+						mask = (*f_op->poll)(f.file, wait);
 					}
-					fput_light(file, fput_needed);
+					fdput(f);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
 						retval++;
@@ -725,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
 	mask = 0;
 	fd = pollfd->fd;
 	if (fd >= 0) {
-		int fput_needed;
-		struct file * file;
-
-		file = fget_light(fd, &fput_needed);
+		struct fd f = fdget(fd);
 		mask = POLLNVAL;
-		if (file != NULL) {
+		if (f.file) {
 			mask = DEFAULT_POLLMASK;
-			if (file->f_op && file->f_op->poll) {
+			if (f.file->f_op && f.file->f_op->poll) {
 				pwait->_key = pollfd->events|POLLERR|POLLHUP;
-				mask = file->f_op->poll(file, pwait);
+				mask = f.file->f_op->poll(f.file, pwait);
 			}
 			/* Mask out unneeded events. */
 			mask &= pollfd->events | POLLERR | POLLHUP;
-			fput_light(file, fput_needed);
+			fdput(f);
 		}
 	}
 	pollfd->revents = mask;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9f35a37173de..8bee4e570911 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
 		if (ufd < 0)
 			kfree(ctx);
 	} else {
-		int fput_needed;
-		struct file *file = fget_light(ufd, &fput_needed);
-		if (!file)
+		struct fd f = fdget(ufd);
+		if (!f.file)
 			return -EBADF;
-		ctx = file->private_data;
-		if (file->f_op != &signalfd_fops) {
-			fput_light(file, fput_needed);
+		ctx = f.file->private_data;
+		if (f.file->f_op != &signalfd_fops) {
+			fdput(f);
 			return -EINVAL;
 		}
 		spin_lock_irq(&current->sighand->siglock);
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
 		spin_unlock_irq(&current->sighand->siglock);
 
 		wake_up(&current->sighand->signalfd_wqh);
-		fput_light(file, fput_needed);
+		fdput(f);
 	}
 
 	return ufd;
diff --git a/fs/splice.c b/fs/splice.c
index 41514dd89462..13e5b4776e7a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
 		unsigned long, nr_segs, unsigned int, flags)
 {
-	struct file *file;
+	struct fd f;
 	long error;
-	int fput;
 
 	if (unlikely(nr_segs > UIO_MAXIOV))
 		return -EINVAL;
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
 		return 0;
 
 	error = -EBADF;
-	file = fget_light(fd, &fput);
-	if (file) {
-		if (file->f_mode & FMODE_WRITE)
-			error = vmsplice_to_pipe(file, iov, nr_segs, flags);
-		else if (file->f_mode & FMODE_READ)
-			error = vmsplice_to_user(file, iov, nr_segs, flags);
-
-		fput_light(file, fput);
+	f = fdget(fd);
+	if (f.file) {
+		if (f.file->f_mode & FMODE_WRITE)
+			error = vmsplice_to_pipe(f.file, iov, nr_segs, flags);
+		else if (f.file->f_mode & FMODE_READ)
+			error = vmsplice_to_user(f.file, iov, nr_segs, flags);
+
+		fdput(f);
 	}
 
 	return error;
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
 		int, fd_out, loff_t __user *, off_out,
 		size_t, len, unsigned int, flags)
 {
+	struct fd in, out;
 	long error;
-	struct file *in, *out;
-	int fput_in, fput_out;
 
 	if (unlikely(!len))
 		return 0;
 
 	error = -EBADF;
-	in = fget_light(fd_in, &fput_in);
-	if (in) {
-		if (in->f_mode & FMODE_READ) {
-			out = fget_light(fd_out, &fput_out);
-			if (out) {
-				if (out->f_mode & FMODE_WRITE)
-					error = do_splice(in, off_in,
-							  out, off_out,
+	in = fdget(fd_in);
+	if (in.file) {
+		if (in.file->f_mode & FMODE_READ) {
+			out = fdget(fd_out);
+			if (out.file) {
+				if (out.file->f_mode & FMODE_WRITE)
+					error = do_splice(in.file, off_in,
+							  out.file, off_out,
 							  len, flags);
-				fput_light(out, fput_out);
+				fdput(out);
 			}
 		}
-
-		fput_light(in, fput_in);
+		fdput(in);
 	}
-
 	return error;
 }
 
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 
 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
 {
-	struct file *in;
-	int error, fput_in;
+	struct fd in;
+	int error;
 
 	if (unlikely(!len))
 		return 0;
 
 	error = -EBADF;
-	in = fget_light(fdin, &fput_in);
-	if (in) {
-		if (in->f_mode & FMODE_READ) {
-			int fput_out;
-			struct file *out = fget_light(fdout, &fput_out);
-
-			if (out) {
-				if (out->f_mode & FMODE_WRITE)
-					error = do_tee(in, out, len, flags);
-				fput_light(out, fput_out);
+	in = fdget(fdin);
+	if (in.file) {
+		if (in.file->f_mode & FMODE_READ) {
+			struct fd out = fdget(fdout);
+			if (out.file) {
+				if (out.file->f_mode & FMODE_WRITE)
+					error = do_tee(in.file, out.file,
+							len, flags);
+				fdput(out);
 			}
 		}
- 		fput_light(in, fput_in);
+ 		fdput(in);
  	}
 
 	return error;
diff --git a/fs/stat.c b/fs/stat.c
index 40780229a032..ee18fa122ae0 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
 
 int vfs_fstat(unsigned int fd, struct kstat *stat)
 {
-	int fput_needed;
-	struct file *f = fget_raw_light(fd, &fput_needed);
+	struct fd f = fdget_raw(fd);
 	int error = -EBADF;
 
-	if (f) {
-		error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
-		fput_light(f, fput_needed);
+	if (f.file) {
+		error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
+				    stat);
+		fdput(f);
 	}
 	return error;
 }
diff --git a/fs/statfs.c b/fs/statfs.c
index 95ad5c0e586c..f8e832e6f0a2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st)
 
 int fd_statfs(int fd, struct kstatfs *st)
 {
-	int fput_needed;
-	struct file *file = fget_light(fd, &fput_needed);
+	struct fd f = fdget(fd);
 	int error = -EBADF;
-	if (file) {
-		error = vfs_statfs(&file->f_path, st);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		error = vfs_statfs(&f.file->f_path, st);
+		fdput(f);
 	}
 	return error;
 }
diff --git a/fs/sync.c b/fs/sync.c
index eb8722dc556f..14eefeb44636 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -148,21 +148,19 @@ void emergency_sync(void)
  */
 SYSCALL_DEFINE1(syncfs, int, fd)
 {
-	struct file *file;
+	struct fd f = fdget(fd);
 	struct super_block *sb;
 	int ret;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	if (!f.file)
 		return -EBADF;
-	sb = file->f_dentry->d_sb;
+	sb = f.file->f_dentry->d_sb;
 
 	down_read(&sb->s_umount);
 	ret = sync_filesystem(sb);
 	up_read(&sb->s_umount);
 
-	fput_light(file, fput_needed);
+	fdput(f);
 	return ret;
 }
 
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync);
 
 static int do_fsync(unsigned int fd, int datasync)
 {
-	struct file *file;
+	struct fd f = fdget(fd);
 	int ret = -EBADF;
-	int fput_needed;
 
-	file = fget_light(fd, &fput_needed);
-	if (file) {
-		ret = vfs_fsync(file, datasync);
-		fput_light(file, fput_needed);
+	if (f.file) {
+		ret = vfs_fsync(f.file, datasync);
+		fdput(f);
 	}
 	return ret;
 }
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 				unsigned int flags)
 {
 	int ret;
-	struct file *file;
+	struct fd f;
 	struct address_space *mapping;
 	loff_t endbyte;			/* inclusive */
-	int fput_needed;
 	umode_t i_mode;
 
 	ret = -EINVAL;
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 		endbyte--;		/* inclusive */
 
 	ret = -EBADF;
-	file = fget_light(fd, &fput_needed);
-	if (!file)
+	f = fdget(fd);
+	if (!f.file)
 		goto out;
 
-	i_mode = file->f_path.dentry->d_inode->i_mode;
+	i_mode = f.file->f_path.dentry->d_inode->i_mode;
 	ret = -ESPIPE;
 	if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
 			!S_ISLNK(i_mode))
 		goto out_put;
 
-	mapping = file->f_mapping;
+	mapping = f.file->f_mapping;
 	if (!mapping) {
 		ret = -EINVAL;
 		goto out_put;
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
 		ret = filemap_fdatawait_range(mapping, offset, endbyte);
 
 out_put:
-	fput_light(file, fput_needed);
+	fdput(f);
 out:
 	return ret;
 }
diff --git a/fs/timerfd.c b/fs/timerfd.c
index dd91e9420c9e..d03822bbf190 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = {
 	.llseek		= noop_llseek,
 };
 
-static struct file *timerfd_fget(int fd, int *fput_needed)
+static int timerfd_fget(int fd, struct fd *p)
 {
-	struct file *file;
-
-	file = fget_light(fd, fput_needed);
-	if (!file)
-		return ERR_PTR(-EBADF);
-	if (file->f_op != &timerfd_fops) {
-		fput_light(file, *fput_needed);
-		return ERR_PTR(-EINVAL);
+	struct fd f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+	if (f.file->f_op != &timerfd_fops) {
+		fdput(f);
+		return -EINVAL;
 	}
-
-	return file;
+	*p = f;
+	return 0;
 }
 
 SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
@@ -284,10 +282,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 		const struct itimerspec __user *, utmr,
 		struct itimerspec __user *, otmr)
 {
-	struct file *file;
+	struct fd f;
 	struct timerfd_ctx *ctx;
 	struct itimerspec ktmr, kotmr;
-	int ret, fput_needed;
+	int ret;
 
 	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
 		return -EFAULT;
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	    !timespec_valid(&ktmr.it_interval))
 		return -EINVAL;
 
-	file = timerfd_fget(ufd, &fput_needed);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-	ctx = file->private_data;
+	ret = timerfd_fget(ufd, &f);
+	if (ret)
+		return ret;
+	ctx = f.file->private_data;
 
 	timerfd_setup_cancel(ctx, flags);
 
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	ret = timerfd_setup(ctx, flags, &ktmr);
 
 	spin_unlock_irq(&ctx->wqh.lock);
-	fput_light(file, fput_needed);
+	fdput(f);
 	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
 		return -EFAULT;
 
@@ -343,15 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 
 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 {
-	struct file *file;
+	struct fd f;
 	struct timerfd_ctx *ctx;
 	struct itimerspec kotmr;
-	int fput_needed;
-
-	file = timerfd_fget(ufd, &fput_needed);
-	if (IS_ERR(file))
-		return PTR_ERR(file);
-	ctx = file->private_data;
+	int ret = timerfd_fget(ufd, &f);
+	if (ret)
+		return ret;
+	ctx = f.file->private_data;
 
 	spin_lock_irq(&ctx->wqh.lock);
 	if (ctx->expired && ctx->tintv.tv64) {
@@ -363,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
 	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
 	spin_unlock_irq(&ctx->wqh.lock);
-	fput_light(file, fput_needed);
+	fdput(f);
 
 	return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index fa4dbe451e27..bb0696a41735 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times,
 		goto out;
 
 	if (filename == NULL && dfd != AT_FDCWD) {
-		int fput_needed;
-		struct file *file;
+		struct fd f;
 
 		if (flags & AT_SYMLINK_NOFOLLOW)
 			goto out;
 
-		file = fget_light(dfd, &fput_needed);
+		f = fdget(dfd);
 		error = -EBADF;
-		if (!file)
+		if (!f.file)
 			goto out;
 
-		error = utimes_common(&file->f_path, times);
-		fput_light(file, fput_needed);
+		error = utimes_common(&f.file->f_path, times);
+		fdput(f);
 	} else {
 		struct path path;
 		int lookup_flags = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4d45b7189e7e..14a7e2544fe3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -399,22 +399,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 		const void __user *,value, size_t, size, int, flags)
 {
-	int fput_needed;
-	struct file *f;
+	struct fd f = fdget(fd);
 	struct dentry *dentry;
 	int error = -EBADF;
 
-	f = fget_light(fd, &fput_needed);
-	if (!f)
+	if (!f.file)
 		return error;
-	dentry = f->f_path.dentry;
+	dentry = f.file->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write_file(f);
+	error = mnt_want_write_file(f.file);
 	if (!error) {
 		error = setxattr(dentry, name, value, size, flags);
-		mnt_drop_write_file(f);
+		mnt_drop_write_file(f.file);
 	}
-	fput_light(f, fput_needed);
+	fdput(f);
 	return error;
 }
 
@@ -495,16 +493,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
 		void __user *, value, size_t, size)
 {
-	int fput_needed;
-	struct file *f;
+	struct fd f = fdget(fd);
 	ssize_t error = -EBADF;
 
-	f = fget_light(fd, &fput_needed);
-	if (!f)
+	if (!f.file)
 		return error;
-	audit_inode(NULL, f->f_path.dentry);
-	error = getxattr(f->f_path.dentry, name, value, size);
-	fput_light(f, fput_needed);
+	audit_inode(NULL, f.file->f_path.dentry);
+	error = getxattr(f.file->f_path.dentry, name, value, size);
+	fdput(f);
 	return error;
 }
 
@@ -576,16 +572,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
 
 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
 {
-	int fput_needed;
-	struct file *f;
+	struct fd f = fdget(fd);
 	ssize_t error = -EBADF;
 
-	f = fget_light(fd, &fput_needed);
-	if (!f)
+	if (!f.file)
 		return error;
-	audit_inode(NULL, f->f_path.dentry);
-	error = listxattr(f->f_path.dentry, list, size);
-	fput_light(f, fput_needed);
+	audit_inode(NULL, f.file->f_path.dentry);
+	error = listxattr(f.file->f_path.dentry, list, size);
+	fdput(f);
 	return error;
 }
 
@@ -645,22 +639,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
 
 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 {
-	int fput_needed;
-	struct file *f;
+	struct fd f = fdget(fd);
 	struct dentry *dentry;
 	int error = -EBADF;
 
-	f = fget_light(fd, &fput_needed);
-	if (!f)
+	if (!f.file)
 		return error;
-	dentry = f->f_path.dentry;
+	dentry = f.file->f_path.dentry;
 	audit_inode(NULL, dentry);
-	error = mnt_want_write_file(f);
+	error = mnt_want_write_file(f.file);
 	if (!error) {
 		error = removexattr(dentry, name);
-		mnt_drop_write_file(f);
+		mnt_drop_write_file(f.file);
 	}
-	fput_light(f, fput_needed);
+	fdput(f);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e6cdf224d7ea..b9b8646e62db 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -48,44 +48,44 @@ xfs_swapext(
 	xfs_swapext_t	*sxp)
 {
 	xfs_inode_t     *ip, *tip;
-	struct file	*file, *tmp_file;
-	int		error = 0, fput_needed, fput_needed_tmp;
+	struct fd	f, tmp;
+	int		error = 0;
 
 	/* Pull information for the target fd */
-	file = fget_light((int)sxp->sx_fdtarget, &fput_needed);
-	if (!file) {
+	f = fdget((int)sxp->sx_fdtarget);
+	if (!f.file) {
 		error = XFS_ERROR(EINVAL);
 		goto out;
 	}
 
-	if (!(file->f_mode & FMODE_WRITE) ||
-	    !(file->f_mode & FMODE_READ) ||
-	    (file->f_flags & O_APPEND)) {
+	if (!(f.file->f_mode & FMODE_WRITE) ||
+	    !(f.file->f_mode & FMODE_READ) ||
+	    (f.file->f_flags & O_APPEND)) {
 		error = XFS_ERROR(EBADF);
 		goto out_put_file;
 	}
 
-	tmp_file = fget_light((int)sxp->sx_fdtmp, &fput_needed_tmp);
-	if (!tmp_file) {
+	tmp = fdget((int)sxp->sx_fdtmp);
+	if (!tmp.file) {
 		error = XFS_ERROR(EINVAL);
 		goto out_put_file;
 	}
 
-	if (!(tmp_file->f_mode & FMODE_WRITE) ||
-	    !(tmp_file->f_mode & FMODE_READ) ||
-	    (tmp_file->f_flags & O_APPEND)) {
+	if (!(tmp.file->f_mode & FMODE_WRITE) ||
+	    !(tmp.file->f_mode & FMODE_READ) ||
+	    (tmp.file->f_flags & O_APPEND)) {
 		error = XFS_ERROR(EBADF);
 		goto out_put_tmp_file;
 	}
 
-	if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
-	    IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
+	if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
+	    IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
 		error = XFS_ERROR(EINVAL);
 		goto out_put_tmp_file;
 	}
 
-	ip = XFS_I(file->f_path.dentry->d_inode);
-	tip = XFS_I(tmp_file->f_path.dentry->d_inode);
+	ip = XFS_I(f.file->f_path.dentry->d_inode);
+	tip = XFS_I(tmp.file->f_path.dentry->d_inode);
 
 	if (ip->i_mount != tip->i_mount) {
 		error = XFS_ERROR(EINVAL);
@@ -105,9 +105,9 @@ xfs_swapext(
 	error = xfs_swap_extents(ip, tip, sxp);
 
  out_put_tmp_file:
-	fput_light(tmp_file, fput_needed_tmp);
+	fdput(tmp);
  out_put_file:
-	fput_light(file, fput_needed);
+	fdput(f);
  out:
 	return error;
 }
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 21483eac402d..8305f2ac6773 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,16 +70,16 @@ xfs_find_handle(
 	int			hsize;
 	xfs_handle_t		handle;
 	struct inode		*inode;
-	struct file		*file = NULL;
+	struct fd		f;
 	struct path		path;
-	int			error, fput_needed;
+	int			error;
 	struct xfs_inode	*ip;
 
 	if (cmd == XFS_IOC_FD_TO_HANDLE) {
-		file = fget_light(hreq->fd, &fput_needed);
-		if (!file)
+		f = fdget(hreq->fd);
+		if (!f.file)
 			return -EBADF;
-		inode = file->f_path.dentry->d_inode;
+		inode = f.file->f_path.dentry->d_inode;
 	} else {
 		error = user_lpath((const char __user *)hreq->path, &path);
 		if (error)
@@ -134,7 +134,7 @@ xfs_find_handle(
 
  out_put:
 	if (cmd == XFS_IOC_FD_TO_HANDLE)
-		fput_light(file, fput_needed);
+		fdput(f);
 	else
 		path_put(&path);
 	return error;
-- 
cgit 


From 1fe0c0230a7c2d5f4061e681a3f3be9512446d23 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Wed, 19 Sep 2012 15:49:51 +0100
Subject: vfs: delete surplus inode NULL check

Each iteration of d_delete we reload inode from dentry->d_inode and
then call S_ISDIR(inode-i_mode), so inode cannot possibly be NULL
shortly afterwards unless something went horribly wrong.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 16521a9f2038..fbee67b92651 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2109,7 +2109,7 @@ again:
 	inode = dentry->d_inode;
 	isdir = S_ISDIR(inode->i_mode);
 	if (dentry->d_count == 1) {
-		if (inode && !spin_trylock(&inode->i_lock)) {
+		if (!spin_trylock(&inode->i_lock)) {
 			spin_unlock(&dentry->d_lock);
 			cpu_relax();
 			goto again;
-- 
cgit 


From 2744c171dbaa0b1ec7639e7d0ff817fba9461a38 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 26 Sep 2012 21:41:05 -0400
Subject: ceph: don't abuse d_delete() on failure exits

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4b5762ef7c2b..ba95eea201bf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1104,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 				pr_err("fill_trace bad get_inode "
 				       "%llx.%llx\n", vino.ino, vino.snap);
 				err = PTR_ERR(in);
-				d_delete(dn);
+				d_drop(dn);
 				goto done;
 			}
 			dn = splice_dentry(dn, in, &have_lease, true);
@@ -1277,7 +1277,7 @@ retry_lookup:
 			in = ceph_get_inode(parent->d_sb, vino);
 			if (IS_ERR(in)) {
 				dout("new_inode badness\n");
-				d_delete(dn);
+				d_drop(dn);
 				dput(dn);
 				err = PTR_ERR(in);
 				goto out;
-- 
cgit 


From 63784dd02b2ac85e867be9d6a6f5536c4ff738be Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 26 Sep 2012 21:43:05 -0400
Subject: fcntl: fix misannotations

__user * != * __user...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fcntl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 91af39a33af7..8f704291d4ed 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -149,7 +149,7 @@ pid_t f_getown(struct file *filp)
 
 static int f_setown_ex(struct file *filp, unsigned long arg)
 {
-	struct f_owner_ex * __user owner_p = (void * __user)arg;
+	struct f_owner_ex __user *owner_p = (void __user *)arg;
 	struct f_owner_ex owner;
 	struct pid *pid;
 	int type;
@@ -189,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
 
 static int f_getown_ex(struct file *filp, unsigned long arg)
 {
-	struct f_owner_ex * __user owner_p = (void * __user)arg;
+	struct f_owner_ex __user *owner_p = (void __user *)arg;
 	struct f_owner_ex owner;
 	int ret = 0;
 
@@ -227,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
 static int f_getowner_uids(struct file *filp, unsigned long arg)
 {
 	struct user_namespace *user_ns = current_user_ns();
-	uid_t * __user dst = (void * __user)arg;
+	uid_t __user *dst = (void __user *)arg;
 	uid_t src[2];
 	int err;
 
-- 
cgit 


From f34f9d186df35e5c39163444c43b4fc6255e39c5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 26 Sep 2012 11:34:50 +1000
Subject: coredump: prevent double-free on an error path in core dumper

In !CORE_DUMP_USE_REGSET case, if elf_note_info_init fails to allocate
memory for info->fields, it frees already allocated stuff and returns
error to its caller, fill_note_info.  Which in turn returns error to its
caller, elf_core_dump.  Which jumps to cleanup label and calls
free_note_info, which will happily try to free all info->fields again.
BOOM.

This is the fix.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Venu Byravarasu <vbyravarasu@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/binfmt_elf.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1b52956afe33..0225fddf49b7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1696,30 +1696,19 @@ static int elf_note_info_init(struct elf_note_info *info)
 		return 0;
 	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
 	if (!info->psinfo)
-		goto notes_free;
+		return 0;
 	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
 	if (!info->prstatus)
-		goto psinfo_free;
+		return 0;
 	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
 	if (!info->fpu)
-		goto prstatus_free;
+		return 0;
 #ifdef ELF_CORE_COPY_XFPREGS
 	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
 	if (!info->xfpu)
-		goto fpu_free;
+		return 0;
 #endif
 	return 1;
-#ifdef ELF_CORE_COPY_XFPREGS
- fpu_free:
-	kfree(info->fpu);
-#endif
- prstatus_free:
-	kfree(info->prstatus);
- psinfo_free:
-	kfree(info->psinfo);
- notes_free:
-	kfree(info->notes);
-	return 0;
 }
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
-- 
cgit 


From 760ad0cac198356c1148cad7531c1a6138322493 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Tue, 25 Sep 2012 11:00:07 +0400
Subject: CIFS: Make ops->close return void

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h | 3 ++-
 fs/cifs/file.c     | 5 ++---
 fs/cifs/smb1ops.c  | 4 ++--
 fs/cifs/smb2ops.c  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a39e5b7fc844..f6f40635abca 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -316,7 +316,8 @@ struct smb_version_operations {
 	/* set fid protocol-specific info */
 	void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
 	/* close a file */
-	int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
+	void (*close)(const unsigned int, struct cifs_tcon *,
+		      struct cifs_fid *);
 	/* send a flush request to the server */
 	int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
 	/* async read from the server */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 075f7cfd1da5..7d7bbdc4c8e7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -355,12 +355,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 		struct TCP_Server_Info *server = tcon->ses->server;
 		unsigned int xid;
-		int rc = -ENOSYS;
 
 		xid = get_xid();
 		if (server->ops->close)
-			rc = server->ops->close(xid, tcon, &cifs_file->fid);
-		free_xid(xid);
+			server->ops->close(xid, tcon, &cifs_file->fid);
+		_free_xid(xid);
 	}
 
 	cifs_del_pending_open(&open);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 5fb0fe5f72b6..42dccbb57c40 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -720,11 +720,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
 }
 
-static int
+static void
 cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
 		struct cifs_fid *fid)
 {
-	return CIFSSMBClose(xid, tcon, fid->netfid);
+	CIFSSMBClose(xid, tcon, fid->netfid);
 }
 
 static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 2183bb343edd..1570cbea4a49 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -374,11 +374,11 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 	cinode->can_cache_brlcks = cinode->clientCanCacheAll;
 }
 
-static int
+static void
 smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
 		struct cifs_fid *fid)
 {
-	return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+	SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
 }
 
 static int
-- 
cgit 


From 4ca3a99ca4bf8f5dcfc4fef4f2b1d8322bb60ad9 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Tue, 25 Sep 2012 11:00:09 +0400
Subject: CIFS: Fix possible freed pointer dereference in SMB2_sess_setup

and remove redundant (rsp == NULL) checks after SendReceive2.

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 35 +++--------------------------------
 1 file changed, 3 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a7db95f4760c..5ad88b4b9990 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -409,11 +409,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	if (rc != 0)
 		goto neg_exit;
 
-	if (rsp == NULL) {
-		rc = -EIO;
-		goto neg_exit;
-	}
-
 	cFYI(1, "mode 0x%x", rsp->SecurityMode);
 
 	if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name)
@@ -637,13 +632,14 @@ ssetup_ntlmssp_authenticate:
 
 	kfree(security_blob);
 	rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
-	if (rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
+	if (resp_buftype != CIFS_NO_BUFFER &&
+	    rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
 		if (phase != NtLmNegotiate) {
 			cERROR(1, "Unexpected more processing error");
 			goto ssetup_exit;
 		}
 		if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
-			le16_to_cpu(rsp->SecurityBufferOffset)) {
+				le16_to_cpu(rsp->SecurityBufferOffset)) {
 			cERROR(1, "Invalid security buffer offset %d",
 				  le16_to_cpu(rsp->SecurityBufferOffset));
 			rc = -EIO;
@@ -669,11 +665,6 @@ ssetup_ntlmssp_authenticate:
 	if (rc != 0)
 		goto ssetup_exit;
 
-	if (rsp == NULL) {
-		rc = -EIO;
-		goto ssetup_exit;
-	}
-
 	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
 ssetup_exit:
 	free_rsp_buf(resp_buftype, rsp);
@@ -793,11 +784,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 		goto tcon_error_exit;
 	}
 
-	if (rsp == NULL) {
-		rc = -EIO;
-		goto tcon_exit;
-	}
-
 	if (tcon == NULL) {
 		ses->ipc_tid = rsp->hdr.TreeId;
 		goto tcon_exit;
@@ -1046,10 +1032,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
 		goto creat_exit;
 	}
 
-	if (rsp == NULL) {
-		rc = -EIO;
-		goto creat_exit;
-	}
 	*persistent_fid = rsp->PersistentFileId;
 	*volatile_fid = rsp->VolatileFileId;
 
@@ -1111,11 +1093,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		goto close_exit;
 	}
 
-	if (rsp == NULL) {
-		rc = -EIO;
-		goto close_exit;
-	}
-
 	/* BB FIXME - decode close response, update inode for caching */
 
 close_exit:
@@ -1950,12 +1927,6 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 		cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
 		goto out;
 	}
-
-	if (rsp == NULL) {
-		rc = -EIO;
-		goto out;
-	}
-
 out:
 	free_rsp_buf(resp_buftype, rsp);
 	kfree(iov);
-- 
cgit 


From f065fd099fc475333fc7a55677a7f64764445d55 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <piastry@etersoft.ru>
Date: Tue, 25 Sep 2012 11:00:08 +0400
Subject: CIFS: Fix possible freed pointer dereference in CIFS_SessSetup

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/sess.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 382c06d01b38..76809f4d3428 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -876,7 +876,8 @@ ssetup_ntlmssp_authenticate:
 	pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
 	smb_buf = (struct smb_hdr *)iov[0].iov_base;
 
-	if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError ==
+	if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) &&
+	    (smb_buf->Status.CifsError ==
 			cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
 		if (phase != NtLmNegotiate) {
 			cERROR(1, "Unexpected more processing error");
-- 
cgit 


From c052e2b423f3eabe9f3f32e60744afa5cf26f6b9 Mon Sep 17 00:00:00 2001
From: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Date: Fri, 28 Sep 2012 12:21:14 -0500
Subject: cifs: obtain file access during backup intent lookup (resend)

Rebased and resending the patch.

Path based queries can fail for lack of access, especially during lookup
during open.
open itself would actually succeed becasue of back up intent bit
but queries (either path or file handle based) do not have a means to
specifiy backup intent bit.
So query the file info during lookup using
 trans2 / findfirst / file_id_full_dir_info
to obtain file info as well as file_id/inode value.

Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Acked-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsproto.h |  6 +++--
 fs/cifs/cifssmb.c   | 43 +++++++++++++++++++++--------------
 fs/cifs/inode.c     | 64 +++++++++++++++++++++++++++++++++++++++--------------
 fs/cifs/readdir.c   |  2 +-
 fs/cifs/smb1ops.c   |  6 ++---
 5 files changed, 80 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 09ea6321c55a..5144e9fbeb8c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -140,6 +140,8 @@ void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr);
 extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
 				     FILE_UNIX_BASIC_INFO *info,
 				     struct cifs_sb_info *cifs_sb);
+extern void cifs_dir_info_to_fattr(struct cifs_fattr *, FILE_DIRECTORY_INFO *,
+					struct cifs_sb_info *);
 extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
 extern struct inode *cifs_iget(struct super_block *sb,
 			       struct cifs_fattr *fattr);
@@ -216,10 +218,10 @@ extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
 		    const struct nls_table *);
 
 extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon,
-		const char *searchName, const struct nls_table *nls_codepage,
+		const char *searchName, struct cifs_sb_info *cifs_sb,
 		__u16 *searchHandle, __u16 search_flags,
 		struct cifs_search_info *psrch_inf,
-		int map, const char dirsep);
+		bool msearch);
 
 extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon,
 		__u16 searchHandle, __u16 search_flags,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 88bbb3ef95b3..76d0d2998850 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4214,10 +4214,9 @@ UnixQPathInfoRetry:
 /* xid, tcon, searchName and codepage are input parms, rest are returned */
 int
 CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon,
-	      const char *searchName,
-	      const struct nls_table *nls_codepage,
+	      const char *searchName, struct cifs_sb_info *cifs_sb,
 	      __u16 *pnetfid, __u16 search_flags,
-	      struct cifs_search_info *psrch_inf, int remap, const char dirsep)
+	      struct cifs_search_info *psrch_inf, bool msearch)
 {
 /* level 257 SMB_ */
 	TRANSACTION2_FFIRST_REQ *pSMB = NULL;
@@ -4225,8 +4224,9 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon,
 	T2_FFIRST_RSP_PARMS *parms;
 	int rc = 0;
 	int bytes_returned = 0;
-	int name_len;
+	int name_len, remap;
 	__u16 params, byte_count;
+	struct nls_table *nls_codepage;
 
 	cFYI(1, "In FindFirst for %s", searchName);
 
@@ -4236,6 +4236,9 @@ findFirstRetry:
 	if (rc)
 		return rc;
 
+	nls_codepage = cifs_sb->local_nls;
+	remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
+
 	if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
 		name_len =
 		    cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName,
@@ -4244,24 +4247,29 @@ findFirstRetry:
 		it got remapped to 0xF03A as if it were part of the
 		directory name instead of a wildcard */
 		name_len *= 2;
-		pSMB->FileName[name_len] = dirsep;
-		pSMB->FileName[name_len+1] = 0;
-		pSMB->FileName[name_len+2] = '*';
-		pSMB->FileName[name_len+3] = 0;
-		name_len += 4; /* now the trailing null */
-		pSMB->FileName[name_len] = 0; /* null terminate just in case */
-		pSMB->FileName[name_len+1] = 0;
-		name_len += 2;
+		if (msearch) {
+			pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb);
+			pSMB->FileName[name_len+1] = 0;
+			pSMB->FileName[name_len+2] = '*';
+			pSMB->FileName[name_len+3] = 0;
+			name_len += 4; /* now the trailing null */
+			/* null terminate just in case */
+			pSMB->FileName[name_len] = 0;
+			pSMB->FileName[name_len+1] = 0;
+			name_len += 2;
+		}
 	} else {	/* BB add check for overrun of SMB buf BB */
 		name_len = strnlen(searchName, PATH_MAX);
 /* BB fix here and in unicode clause above ie
 		if (name_len > buffersize-header)
 			free buffer exit; BB */
 		strncpy(pSMB->FileName, searchName, name_len);
-		pSMB->FileName[name_len] = dirsep;
-		pSMB->FileName[name_len+1] = '*';
-		pSMB->FileName[name_len+2] = 0;
-		name_len += 3;
+		if (msearch) {
+			pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb);
+			pSMB->FileName[name_len+1] = '*';
+			pSMB->FileName[name_len+2] = 0;
+			name_len += 3;
+		}
 	}
 
 	params = 12 + name_len /* includes null */ ;
@@ -4349,7 +4357,8 @@ findFirstRetry:
 			psrch_inf->last_entry = psrch_inf->srch_entries_start +
 							lnoff;
 
-			*pnetfid = parms->SearchHandle;
+			if (pnetfid)
+				*pnetfid = parms->SearchHandle;
 		} else {
 			cifs_buf_release(pSMB);
 		}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3d155875f446..afdff79651f1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -607,7 +607,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 		    FILE_ALL_INFO *data, struct super_block *sb, int xid,
 		    const __u16 *fid)
 {
-	int rc = 0, tmprc;
+	bool validinum = false;
+	__u16 srchflgs;
+	int rc = 0, tmprc = ENOSYS;
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
 	struct tcon_link *tlink;
@@ -615,6 +617,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 	char *buf = NULL;
 	bool adjust_tz = false;
 	struct cifs_fattr fattr;
+	struct cifs_search_info *srchinf = NULL;
 
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
@@ -653,9 +656,38 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 	} else if (rc == -EREMOTE) {
 		cifs_create_dfs_fattr(&fattr, sb);
 		rc = 0;
-	} else {
+	} else if (rc == -EACCES && backup_cred(cifs_sb)) {
+			srchinf = kzalloc(sizeof(struct cifs_search_info),
+						GFP_KERNEL);
+			if (srchinf == NULL) {
+				rc = -ENOMEM;
+				goto cgii_exit;
+			}
+
+			srchinf->endOfSearch = false;
+			srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
+
+			srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
+					CIFS_SEARCH_CLOSE_AT_END |
+					CIFS_SEARCH_BACKUP_SEARCH;
+
+			rc = CIFSFindFirst(xid, tcon, full_path,
+				cifs_sb, NULL, srchflgs, srchinf, false);
+			if (!rc) {
+				data =
+				(FILE_ALL_INFO *)srchinf->srch_entries_start;
+
+				cifs_dir_info_to_fattr(&fattr,
+				(FILE_DIRECTORY_INFO *)data, cifs_sb);
+				fattr.cf_uniqueid = le64_to_cpu(
+				((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
+				validinum = true;
+
+				cifs_buf_release(srchinf->ntwrk_buf_start);
+			}
+			kfree(srchinf);
+	} else
 		goto cgii_exit;
-	}
 
 	/*
 	 * If an inode wasn't passed in, then get the inode number
@@ -666,23 +698,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 	 */
 	if (*inode == NULL) {
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
-			if (server->ops->get_srv_inum)
-				tmprc = server->ops->get_srv_inum(xid, tcon,
-					cifs_sb, full_path, &fattr.cf_uniqueid,
-					data);
-			else
-				tmprc = -ENOSYS;
-			if (tmprc || !fattr.cf_uniqueid) {
-				cFYI(1, "GetSrvInodeNum rc %d", tmprc);
-				fattr.cf_uniqueid = iunique(sb, ROOT_I);
-				cifs_autodisable_serverino(cifs_sb);
+			if (validinum == false) {
+				if (server->ops->get_srv_inum)
+					tmprc = server->ops->get_srv_inum(xid,
+						tcon, cifs_sb, full_path,
+						&fattr.cf_uniqueid, data);
+				if (tmprc) {
+					cFYI(1, "GetSrvInodeNum rc %d", tmprc);
+					fattr.cf_uniqueid = iunique(sb, ROOT_I);
+					cifs_autodisable_serverino(cifs_sb);
+				}
 			}
-		} else {
+		} else
 			fattr.cf_uniqueid = iunique(sb, ROOT_I);
-		}
-	} else {
+	} else
 		fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
-	}
 
 	/* query for SFU type info if supported and needed */
 	if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b0f4a428398d..f9b5d3d6cf33 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -151,7 +151,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 	}
 }
 
-static void
+void
 cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
 		       struct cifs_sb_info *cifs_sb)
 {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 42dccbb57c40..56cc4be87807 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -837,10 +837,8 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 		     struct cifs_fid *fid, __u16 search_flags,
 		     struct cifs_search_info *srch_inf)
 {
-	return CIFSFindFirst(xid, tcon, path, cifs_sb->local_nls,
-			     &fid->netfid, search_flags, srch_inf,
-			     cifs_sb->mnt_cifs_flags &
-			     CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
+	return CIFSFindFirst(xid, tcon, path, cifs_sb,
+			     &fid->netfid, search_flags, srch_inf, true);
 }
 
 static int
-- 
cgit 


From e4aa25e7801163df058f62c617b859e9d3d4b148 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Mon, 1 Oct 2012 12:26:22 -0500
Subject: [CIFS] Fix SMB2 negotiation support to select only one dialect (based
 on vers=)

Based on whether the user (on mount command) chooses:

vers=3.0 (for smb3.0 support)
vers=2.1 (for smb2.1 support)
or (with subsequent patch, which will allow SMB2 support)
vers=2.0 (for original smb2.02 dialect support)

send only one dialect at a time during negotiate (we
had been sending a list).

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h |  9 +++++++++
 fs/cifs/connect.c  |  5 +++++
 fs/cifs/smb2ops.c  | 19 +++++++++++++++++++
 fs/cifs/smb2pdu.c  | 40 ++++++++++++----------------------------
 fs/cifs/smb2pdu.h  | 12 +++++++++++-
 5 files changed, 56 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f6f40635abca..f5af2527fc69 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -179,6 +179,7 @@ struct smb_rqst {
 enum smb_version {
 	Smb_1 = 1,
 	Smb_21,
+	Smb_30,
 };
 
 struct mid_q_entry;
@@ -372,6 +373,8 @@ struct smb_version_operations {
 
 struct smb_version_values {
 	char		*version_string;
+	__u16		protocol_id;
+	__u32		req_capabilities;
 	__u32		large_lock_type;
 	__u32		exclusive_lock_type;
 	__u32		shared_lock_type;
@@ -1496,7 +1499,13 @@ extern mempool_t *cifs_mid_poolp;
 #define SMB1_VERSION_STRING	"1.0"
 extern struct smb_version_operations smb1_operations;
 extern struct smb_version_values smb1_values;
+#define SMB20_VERSION_STRING	"2.0"
+/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */
+extern struct smb_version_values smb20_values;
 #define SMB21_VERSION_STRING	"2.1"
 extern struct smb_version_operations smb21_operations;
 extern struct smb_version_values smb21_values;
+#define SMB30_VERSION_STRING	"3.0"
+/*extern struct smb_version_operations smb30_operations; */ /* not needed yet */
+extern struct smb_version_values smb30_values;
 #endif	/* _CIFS_GLOB_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a792282f02f7..2fdbe08a7a23 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -272,6 +272,7 @@ static const match_table_t cifs_cacheflavor_tokens = {
 static const match_table_t cifs_smb_version_tokens = {
 	{ Smb_1, SMB1_VERSION_STRING },
 	{ Smb_21, SMB21_VERSION_STRING },
+	{ Smb_30, SMB30_VERSION_STRING },
 };
 
 static int ip_connect(struct TCP_Server_Info *server);
@@ -1074,6 +1075,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
 		vol->ops = &smb21_operations;
 		vol->vals = &smb21_values;
 		break;
+	case Smb_30:
+		vol->ops = &smb21_operations; /* currently identical with 2.1 */
+		vol->vals = &smb30_values;
+		break;
 #endif
 	default:
 		cERROR(1, "Unknown vers= option specified: %s", value);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 1570cbea4a49..4d9dbe0b7385 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -645,6 +645,25 @@ struct smb_version_operations smb21_operations = {
 
 struct smb_version_values smb21_values = {
 	.version_string = SMB21_VERSION_STRING,
+	.protocol_id = SMB21_PROT_ID,
+	.req_capabilities = 0, /* MBZ on negotiate req until SMB3 dialect */
+	.large_lock_type = 0,
+	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+	.header_size = sizeof(struct smb2_hdr),
+	.max_header_size = MAX_SMB2_HDR_SIZE,
+	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+	.lock_cmd = SMB2_LOCK,
+	.cap_unix = 0,
+	.cap_nt_find = SMB2_NT_FIND,
+	.cap_large_files = SMB2_LARGE_FILES,
+};
+
+struct smb_version_values smb30_values = {
+	.version_string = SMB30_VERSION_STRING,
+	.protocol_id = SMB30_PROT_ID,
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU,
 	.large_lock_type = 0,
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5ad88b4b9990..cf33622cdac8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/smb2pdu.c
  *
- *   Copyright (C) International Business Machines  Corp., 2009, 2011
+ *   Copyright (C) International Business Machines  Corp., 2009, 2012
  *                 Etersoft, 2012
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *              Pavel Shilovsky (pshilovsky@samba.org) 2012
@@ -304,24 +304,6 @@ free_rsp_buf(int resp_buftype, void *rsp)
 		cifs_buf_release(rsp);
 }
 
-#define SMB2_NUM_PROT 2
-
-#define SMB2_PROT   0
-#define SMB21_PROT  1
-#define BAD_PROT 0xFFFF
-
-#define SMB2_PROT_ID  0x0202
-#define SMB21_PROT_ID 0x0210
-#define BAD_PROT_ID   0xFFFF
-
-static struct {
-	int index;
-	__le16 name;
-} smb2protocols[] = {
-	{SMB2_PROT,  cpu_to_le16(SMB2_PROT_ID)},
-	{SMB21_PROT, cpu_to_le16(SMB21_PROT_ID)},
-	{BAD_PROT,   cpu_to_le16(BAD_PROT_ID)}
-};
 
 /*
  *
@@ -348,7 +330,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	int resp_buftype;
 	struct TCP_Server_Info *server;
 	unsigned int sec_flags;
-	u16 i;
 	u16 temp = 0;
 	int blob_offset, blob_length;
 	char *security_blob;
@@ -377,11 +358,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 	req->hdr.SessionId = 0;
 
-	for (i = 0; i < SMB2_NUM_PROT; i++)
-		req->Dialects[i] = smb2protocols[i].name;
+	req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
 
-	req->DialectCount = cpu_to_le16(i);
-	inc_rfc1001_len(req, i * 2);
+	req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */
+	inc_rfc1001_len(req, 2);
 
 	/* only one of SMB2 signing flags may be set in SMB2 request */
 	if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN)
@@ -391,7 +371,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 	req->SecurityMode = cpu_to_le16(temp);
 
-	req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS);
+	req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities);
 
 	memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
 
@@ -411,10 +391,14 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 	cFYI(1, "mode 0x%x", rsp->SecurityMode);
 
-	if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name)
+	/* BB we may eventually want to match the negotiated vs. requested
+	   dialect, even though we are only requesting one at a time */
+	if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
+		cFYI(1, "negotiated smb2.0 dialect");
+	else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
 		cFYI(1, "negotiated smb2.1 dialect");
-	else if (rsp->DialectRevision == smb2protocols[SMB2_PROT].name)
-		cFYI(1, "negotiated smb2 dialect");
+	else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID))
+		cFYI(1, "negotiated smb3.0 dialect");
 	else {
 		cERROR(1, "Illegal dialect returned by server %d",
 			   le16_to_cpu(rsp->DialectRevision));
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index da099225b1a9..4cb4ced258cb 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -163,9 +163,15 @@ struct smb2_negotiate_req {
 	__le32 Capabilities;
 	__u8   ClientGUID[SMB2_CLIENT_GUID_SIZE];
 	__le64 ClientStartTime;	/* MBZ */
-	__le16 Dialects[2]; /* variable length */
+	__le16 Dialects[1]; /* One dialect (vers=) at a time for now */
 } __packed;
 
+/* Dialects */
+#define SMB20_PROT_ID 0x0202
+#define SMB21_PROT_ID 0x0210
+#define SMB30_PROT_ID 0x0300
+#define BAD_PROT_ID   0xFFFF
+
 /* SecurityMode flags */
 #define	SMB2_NEGOTIATE_SIGNING_ENABLED	0x0001
 #define SMB2_NEGOTIATE_SIGNING_REQUIRED	0x0002
@@ -173,6 +179,10 @@ struct smb2_negotiate_req {
 #define SMB2_GLOBAL_CAP_DFS		0x00000001
 #define SMB2_GLOBAL_CAP_LEASING		0x00000002 /* Resp only New to SMB2.1 */
 #define SMB2_GLOBAL_CAP_LARGE_MTU	0X00000004 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_MULTI_CHANNEL	0x00000008 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING  0x00000020 /* New to SMB3 */
+#define SMB2_GLOBAL_CAP_ENCRYPTION	0x00000040 /* New to SMB3 */
 /* Internal types */
 #define SMB2_NT_FIND			0x00100000
 #define SMB2_LARGE_FILES		0x00200000
-- 
cgit 


From 1d4ab9077681b7cce60ff46e3a42fe2dafa0b83d Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Mon, 1 Oct 2012 12:48:03 -0500
Subject: [CIFS] Fix indentation of fs/cifs/Kconfig entries

make menuconfig for cifs shows multiple entries toward
the end of the list with the incorrect indentation
(probably a bug in Kconfig parsing of items
that are dependant on the module (cifs=m instead of
just CONFIG_CIFS).  This patch fixes the indentation
of all but the last entry (CIFS_ACL) which I don't
know how to fix. It also clarifies wording in
two places

Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/Kconfig | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 253033ec0904..2075ddfffa73 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -15,8 +15,8 @@ config CIFS
 	  (CIFS) protocol which is the successor to the Server Message Block
 	  (SMB) protocol, the native file sharing mechanism for most early
 	  PC operating systems.  The CIFS protocol is fully supported by
-	  file servers such as Windows 2000 (including Windows 2003, NT 4
-	  and Windows XP) as well by Samba (which provides excellent CIFS
+	  file servers such as Windows 2000 (including Windows 2003, Windows 2008,
+	  NT 4 and Windows XP) as well by Samba (which provides excellent CIFS
 	  server support for Linux and many other operating systems). Limited
 	  support for OS/2 and Windows ME and similar servers is provided as
 	  well.
@@ -115,6 +115,13 @@ config CIFS_POSIX
 	  (such as Samba 3.10 and later) which can negotiate
 	  CIFS POSIX ACL support.  If unsure, say N.
 
+config CIFS_ACL
+	  bool "Provide CIFS ACL support"
+	  depends on CIFS_XATTR && KEYS
+	  help
+	    Allows fetching CIFS/NTFS ACL from the server.  The DACL blob
+	    is handed over to the application/caller.
+
 config CIFS_DEBUG2
 	bool "Enable additional CIFS debugging routines"
 	depends on CIFS
@@ -139,21 +146,6 @@ config CIFS_DFS_UPCALL
 	    IP addresses) which is needed for implicit mounts of DFS junction
 	    points. If unsure, say N.
 
-config CIFS_FSCACHE
-	  bool "Provide CIFS client caching support"
-	  depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
-	  help
-	    Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
-	    to be cached locally on disk through the general filesystem cache
-	    manager. If unsure, say N.
-
-config CIFS_ACL
-	  bool "Provide CIFS ACL support"
-	  depends on CIFS_XATTR && KEYS
-	  help
-	    Allows to fetch CIFS/NTFS ACL from the server.  The DACL blob
-	    is handed over to the application/caller.
-
 config CIFS_NFSD_EXPORT
 	  bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
 	  depends on CIFS && EXPERIMENTAL && BROKEN
@@ -162,7 +154,7 @@ config CIFS_NFSD_EXPORT
 
 config CIFS_SMB2
 	bool "SMB2 network file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && INET
+	depends on CIFS && EXPERIMENTAL && INET
 	select NLS
 	select KEYS
 	select FSCACHE
@@ -179,3 +171,12 @@ config CIFS_SMB2
 	  (compared to cifs) due to protocol improvements.
 
 	  Unless you are a developer or tester, say N.
+
+config CIFS_FSCACHE
+	  bool "Provide CIFS client caching support"
+	  depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
+	  help
+	    Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
+	    to be cached locally on disk through the general filesystem cache
+	    manager. If unsure, say N.
+
-- 
cgit 


From 10c28d937e2cca577c2d804106b50dd0562fb062 Mon Sep 17 00:00:00 2001
From: Alex Kelly <alex.page.kelly@gmail.com>
Date: Wed, 26 Sep 2012 21:52:08 -0400
Subject: coredump: move core dump functionality into its own file

This prepares for making core dump functionality optional.

The variable "suid_dumpable" and associated functions are left in fs/exec.c
because they're used elsewhere, such as in ptrace.

Signed-off-by: Alex Kelly <alex.page.kelly@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile   |   2 +-
 fs/coredump.c | 686 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/exec.c     | 645 +-----------------------------------------------------
 3 files changed, 688 insertions(+), 645 deletions(-)
 create mode 100644 fs/coredump.c

(limited to 'fs')

diff --git a/fs/Makefile b/fs/Makefile
index 2fb977934673..8938f8250320 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o drop_caches.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o
+		stack.o fs_struct.o statfs.o coredump.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 000000000000..f045bbad6822
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,686 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
+#include "internal.h"
+
+#include <trace/events/sched.h>
+
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+
+struct core_name {
+	char *corename;
+	int used, size;
+};
+static atomic_t call_count = ATOMIC_INIT(1);
+
+/* The maximal length of core_pattern is also specified in sysctl.c */
+
+static int expand_corename(struct core_name *cn)
+{
+	char *old_corename = cn->corename;
+
+	cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+	cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+	if (!cn->corename) {
+		kfree(old_corename);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+	char *cur;
+	int need;
+	int ret;
+	va_list arg;
+
+	va_start(arg, fmt);
+	need = vsnprintf(NULL, 0, fmt, arg);
+	va_end(arg);
+
+	if (likely(need < cn->size - cn->used - 1))
+		goto out_printf;
+
+	ret = expand_corename(cn);
+	if (ret)
+		goto expand_fail;
+
+out_printf:
+	cur = cn->corename + cn->used;
+	va_start(arg, fmt);
+	vsnprintf(cur, need + 1, fmt, arg);
+	va_end(arg);
+	cn->used += need;
+	return 0;
+
+expand_fail:
+	return ret;
+}
+
+static void cn_escape(char *str)
+{
+	for (; *str; str++)
+		if (*str == '/')
+			*str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+	struct file *exe_file;
+	char *pathbuf, *path;
+	int ret;
+
+	exe_file = get_mm_exe_file(current->mm);
+	if (!exe_file) {
+		char *commstart = cn->corename + cn->used;
+		ret = cn_printf(cn, "%s (path unknown)", current->comm);
+		cn_escape(commstart);
+		return ret;
+	}
+
+	pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+	if (!pathbuf) {
+		ret = -ENOMEM;
+		goto put_exe_file;
+	}
+
+	path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+	if (IS_ERR(path)) {
+		ret = PTR_ERR(path);
+		goto free_buf;
+	}
+
+	cn_escape(path);
+
+	ret = cn_printf(cn, "%s", path);
+
+free_buf:
+	kfree(pathbuf);
+put_exe_file:
+	fput(exe_file);
+	return ret;
+}
+
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+	const struct cred *cred = current_cred();
+	const char *pat_ptr = core_pattern;
+	int ispipe = (*pat_ptr == '|');
+	int pid_in_pattern = 0;
+	int err = 0;
+
+	cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+	cn->corename = kmalloc(cn->size, GFP_KERNEL);
+	cn->used = 0;
+
+	if (!cn->corename)
+		return -ENOMEM;
+
+	/* Repeat as long as we have more pattern to process and more output
+	   space */
+	while (*pat_ptr) {
+		if (*pat_ptr != '%') {
+			if (*pat_ptr == 0)
+				goto out;
+			err = cn_printf(cn, "%c", *pat_ptr++);
+		} else {
+			switch (*++pat_ptr) {
+			/* single % at the end, drop that */
+			case 0:
+				goto out;
+			/* Double percent, output one percent */
+			case '%':
+				err = cn_printf(cn, "%c", '%');
+				break;
+			/* pid */
+			case 'p':
+				pid_in_pattern = 1;
+				err = cn_printf(cn, "%d",
+					      task_tgid_vnr(current));
+				break;
+			/* uid */
+			case 'u':
+				err = cn_printf(cn, "%d", cred->uid);
+				break;
+			/* gid */
+			case 'g':
+				err = cn_printf(cn, "%d", cred->gid);
+				break;
+			/* signal that caused the coredump */
+			case 's':
+				err = cn_printf(cn, "%ld", signr);
+				break;
+			/* UNIX time of coredump */
+			case 't': {
+				struct timeval tv;
+				do_gettimeofday(&tv);
+				err = cn_printf(cn, "%lu", tv.tv_sec);
+				break;
+			}
+			/* hostname */
+			case 'h': {
+				char *namestart = cn->corename + cn->used;
+				down_read(&uts_sem);
+				err = cn_printf(cn, "%s",
+					      utsname()->nodename);
+				up_read(&uts_sem);
+				cn_escape(namestart);
+				break;
+			}
+			/* executable */
+			case 'e': {
+				char *commstart = cn->corename + cn->used;
+				err = cn_printf(cn, "%s", current->comm);
+				cn_escape(commstart);
+				break;
+			}
+			case 'E':
+				err = cn_print_exe_file(cn);
+				break;
+			/* core limit size */
+			case 'c':
+				err = cn_printf(cn, "%lu",
+					      rlimit(RLIMIT_CORE));
+				break;
+			default:
+				break;
+			}
+			++pat_ptr;
+		}
+
+		if (err)
+			return err;
+	}
+
+	/* Backward compatibility with core_uses_pid:
+	 *
+	 * If core_pattern does not include a %p (as is the default)
+	 * and core_uses_pid is set, then .%pid will be appended to
+	 * the filename. Do not do this for piped commands. */
+	if (!ispipe && !pid_in_pattern && core_uses_pid) {
+		err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+		if (err)
+			return err;
+	}
+out:
+	return ispipe;
+}
+
+static int zap_process(struct task_struct *start, int exit_code)
+{
+	struct task_struct *t;
+	int nr = 0;
+
+	start->signal->flags = SIGNAL_GROUP_EXIT;
+	start->signal->group_exit_code = exit_code;
+	start->signal->group_stop_count = 0;
+
+	t = start;
+	do {
+		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+		if (t != current && t->mm) {
+			sigaddset(&t->pending.signal, SIGKILL);
+			signal_wake_up(t, 1);
+			nr++;
+		}
+	} while_each_thread(start, t);
+
+	return nr;
+}
+
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+				struct core_state *core_state, int exit_code)
+{
+	struct task_struct *g, *p;
+	unsigned long flags;
+	int nr = -EAGAIN;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (!signal_group_exit(tsk->signal)) {
+		mm->core_state = core_state;
+		nr = zap_process(tsk, exit_code);
+	}
+	spin_unlock_irq(&tsk->sighand->siglock);
+	if (unlikely(nr < 0))
+		return nr;
+
+	if (atomic_read(&mm->mm_users) == nr + 1)
+		goto done;
+	/*
+	 * We should find and kill all tasks which use this mm, and we should
+	 * count them correctly into ->nr_threads. We don't take tasklist
+	 * lock, but this is safe wrt:
+	 *
+	 * fork:
+	 *	None of sub-threads can fork after zap_process(leader). All
+	 *	processes which were created before this point should be
+	 *	visible to zap_threads() because copy_process() adds the new
+	 *	process to the tail of init_task.tasks list, and lock/unlock
+	 *	of ->siglock provides a memory barrier.
+	 *
+	 * do_exit:
+	 *	The caller holds mm->mmap_sem. This means that the task which
+	 *	uses this mm can't pass exit_mm(), so it can't exit or clear
+	 *	its ->mm.
+	 *
+	 * de_thread:
+	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
+	 *	we must see either old or new leader, this does not matter.
+	 *	However, it can change p->sighand, so lock_task_sighand(p)
+	 *	must be used. Since p->mm != NULL and we hold ->mmap_sem
+	 *	it can't fail.
+	 *
+	 *	Note also that "g" can be the old leader with ->mm == NULL
+	 *	and already unhashed and thus removed from ->thread_group.
+	 *	This is OK, __unhash_process()->list_del_rcu() does not
+	 *	clear the ->next pointer, we will find the new leader via
+	 *	next_thread().
+	 */
+	rcu_read_lock();
+	for_each_process(g) {
+		if (g == tsk->group_leader)
+			continue;
+		if (g->flags & PF_KTHREAD)
+			continue;
+		p = g;
+		do {
+			if (p->mm) {
+				if (unlikely(p->mm == mm)) {
+					lock_task_sighand(p, &flags);
+					nr += zap_process(p, exit_code);
+					unlock_task_sighand(p, &flags);
+				}
+				break;
+			}
+		} while_each_thread(g, p);
+	}
+	rcu_read_unlock();
+done:
+	atomic_set(&core_state->nr_threads, nr);
+	return nr;
+}
+
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	int core_waiters = -EBUSY;
+
+	init_completion(&core_state->startup);
+	core_state->dumper.task = tsk;
+	core_state->dumper.next = NULL;
+
+	down_write(&mm->mmap_sem);
+	if (!mm->core_state)
+		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+	up_write(&mm->mmap_sem);
+
+	if (core_waiters > 0) {
+		struct core_thread *ptr;
+
+		wait_for_completion(&core_state->startup);
+		/*
+		 * Wait for all the threads to become inactive, so that
+		 * all the thread context (extended register state, like
+		 * fpu etc) gets copied to the memory.
+		 */
+		ptr = core_state->dumper.next;
+		while (ptr != NULL) {
+			wait_task_inactive(ptr->task, 0);
+			ptr = ptr->next;
+		}
+	}
+
+	return core_waiters;
+}
+
+static void coredump_finish(struct mm_struct *mm)
+{
+	struct core_thread *curr, *next;
+	struct task_struct *task;
+
+	next = mm->core_state->dumper.next;
+	while ((curr = next) != NULL) {
+		next = curr->next;
+		task = curr->task;
+		/*
+		 * see exit_mm(), curr->task must not see
+		 * ->task == NULL before we read ->next.
+		 */
+		smp_mb();
+		curr->task = NULL;
+		wake_up_process(task);
+	}
+
+	mm->core_state = NULL;
+}
+
+static void wait_for_dump_helpers(struct file *file)
+{
+	struct pipe_inode_info *pipe;
+
+	pipe = file->f_path.dentry->d_inode->i_pipe;
+
+	pipe_lock(pipe);
+	pipe->readers++;
+	pipe->writers--;
+
+	while ((pipe->readers > 1) && (!signal_pending(current))) {
+		wake_up_interruptible_sync(&pipe->wait);
+		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+		pipe_wait(pipe);
+	}
+
+	pipe->readers--;
+	pipe->writers++;
+	pipe_unlock(pipe);
+
+}
+
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace.  Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process.  Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1.  This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+	struct file *files[2];
+	struct coredump_params *cp = (struct coredump_params *)info->data;
+	int err = create_pipe_files(files, 0);
+	if (err)
+		return err;
+
+	cp->file = files[1];
+
+	replace_fd(0, files[0], 0);
+	/* and disallow core files too */
+	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+	return 0;
+}
+
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+	struct core_state core_state;
+	struct core_name cn;
+	struct mm_struct *mm = current->mm;
+	struct linux_binfmt * binfmt;
+	const struct cred *old_cred;
+	struct cred *cred;
+	int retval = 0;
+	int flag = 0;
+	int ispipe;
+	struct files_struct *displaced;
+	bool need_nonrelative = false;
+	static atomic_t core_dump_count = ATOMIC_INIT(0);
+	struct coredump_params cprm = {
+		.signr = signr,
+		.regs = regs,
+		.limit = rlimit(RLIMIT_CORE),
+		/*
+		 * We must use the same mm->flags while dumping core to avoid
+		 * inconsistency of bit flags, since this flag is not protected
+		 * by any locks.
+		 */
+		.mm_flags = mm->flags,
+	};
+
+	audit_core_dumps(signr);
+
+	binfmt = mm->binfmt;
+	if (!binfmt || !binfmt->core_dump)
+		goto fail;
+	if (!__get_dumpable(cprm.mm_flags))
+		goto fail;
+
+	cred = prepare_creds();
+	if (!cred)
+		goto fail;
+	/*
+	 * We cannot trust fsuid as being the "true" uid of the process
+	 * nor do we know its entire history. We only know it was tainted
+	 * so we dump it as root in mode 2, and only into a controlled
+	 * environment (pipe handler or fully qualified path).
+	 */
+	if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+		/* Setuid core dump mode */
+		flag = O_EXCL;		/* Stop rewrite attacks */
+		cred->fsuid = GLOBAL_ROOT_UID;	/* Dump root private */
+		need_nonrelative = true;
+	}
+
+	retval = coredump_wait(exit_code, &core_state);
+	if (retval < 0)
+		goto fail_creds;
+
+	old_cred = override_creds(cred);
+
+	/*
+	 * Clear any false indication of pending signals that might
+	 * be seen by the filesystem code called to write the core file.
+	 */
+	clear_thread_flag(TIF_SIGPENDING);
+
+	ispipe = format_corename(&cn, signr);
+
+ 	if (ispipe) {
+		int dump_count;
+		char **helper_argv;
+
+		if (ispipe < 0) {
+			printk(KERN_WARNING "format_corename failed\n");
+			printk(KERN_WARNING "Aborting core\n");
+			goto fail_corename;
+		}
+
+		if (cprm.limit == 1) {
+			/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+			 *
+			 * Normally core limits are irrelevant to pipes, since
+			 * we're not writing to the file system, but we use
+			 * cprm.limit of 1 here as a speacial value, this is a
+			 * consistent way to catch recursive crashes.
+			 * We can still crash if the core_pattern binary sets
+			 * RLIM_CORE = !1, but it runs as root, and can do
+			 * lots of stupid things.
+			 *
+			 * Note that we use task_tgid_vnr here to grab the pid
+			 * of the process group leader.  That way we get the
+			 * right pid if a thread in a multi-threaded
+			 * core_pattern process dies.
+			 */
+			printk(KERN_WARNING
+				"Process %d(%s) has RLIMIT_CORE set to 1\n",
+				task_tgid_vnr(current), current->comm);
+			printk(KERN_WARNING "Aborting core\n");
+			goto fail_unlock;
+		}
+		cprm.limit = RLIM_INFINITY;
+
+		dump_count = atomic_inc_return(&core_dump_count);
+		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+			       task_tgid_vnr(current), current->comm);
+			printk(KERN_WARNING "Skipping core dump\n");
+			goto fail_dropcount;
+		}
+
+		helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+		if (!helper_argv) {
+			printk(KERN_WARNING "%s failed to allocate memory\n",
+			       __func__);
+			goto fail_dropcount;
+		}
+
+		retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+					NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+					NULL, &cprm);
+		argv_free(helper_argv);
+		if (retval) {
+ 			printk(KERN_INFO "Core dump to %s pipe failed\n",
+			       cn.corename);
+			goto close_fail;
+ 		}
+	} else {
+		struct inode *inode;
+
+		if (cprm.limit < binfmt->min_coredump)
+			goto fail_unlock;
+
+		if (need_nonrelative && cn.corename[0] != '/') {
+			printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+				"to fully qualified path!\n",
+				task_tgid_vnr(current), current->comm);
+			printk(KERN_WARNING "Skipping core dump\n");
+			goto fail_unlock;
+		}
+
+		cprm.file = filp_open(cn.corename,
+				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+				 0600);
+		if (IS_ERR(cprm.file))
+			goto fail_unlock;
+
+		inode = cprm.file->f_path.dentry->d_inode;
+		if (inode->i_nlink > 1)
+			goto close_fail;
+		if (d_unhashed(cprm.file->f_path.dentry))
+			goto close_fail;
+		/*
+		 * AK: actually i see no reason to not allow this for named
+		 * pipes etc, but keep the previous behaviour for now.
+		 */
+		if (!S_ISREG(inode->i_mode))
+			goto close_fail;
+		/*
+		 * Dont allow local users get cute and trick others to coredump
+		 * into their pre-created files.
+		 */
+		if (!uid_eq(inode->i_uid, current_fsuid()))
+			goto close_fail;
+		if (!cprm.file->f_op || !cprm.file->f_op->write)
+			goto close_fail;
+		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+			goto close_fail;
+	}
+
+	/* get us an unshared descriptor table; almost always a no-op */
+	retval = unshare_files(&displaced);
+	if (retval)
+		goto close_fail;
+	if (displaced)
+		put_files_struct(displaced);
+	retval = binfmt->core_dump(&cprm);
+	if (retval)
+		current->signal->group_exit_code |= 0x80;
+
+	if (ispipe && core_pipe_limit)
+		wait_for_dump_helpers(cprm.file);
+close_fail:
+	if (cprm.file)
+		filp_close(cprm.file, NULL);
+fail_dropcount:
+	if (ispipe)
+		atomic_dec(&core_dump_count);
+fail_unlock:
+	kfree(cn.corename);
+fail_corename:
+	coredump_finish(mm);
+	revert_creds(old_cred);
+fail_creds:
+	put_cred(cred);
+fail:
+	return;
+}
+
+/*
+ * Core dumping helper functions.  These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+
+int dump_seek(struct file *file, loff_t off)
+{
+	int ret = 1;
+
+	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+			return 0;
+	} else {
+		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+		if (!buf)
+			return 0;
+		while (off > 0) {
+			unsigned long n = off;
+
+			if (n > PAGE_SIZE)
+				n = PAGE_SIZE;
+			if (!dump_write(file, buf, n)) {
+				ret = 0;
+				break;
+			}
+			off -= n;
+		}
+		free_page((unsigned long)buf);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exec.c b/fs/exec.c
index beb05a95e4a3..48fb26ef8a1b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@
 
 #include <trace/events/sched.h>
 
-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
 int suid_dumpable = 0;
 
-struct core_name {
-	char *corename;
-	int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
 static LIST_HEAD(formats);
 static DEFINE_RWLOCK(binfmt_lock);
 
@@ -1603,353 +1592,6 @@ void set_binfmt(struct linux_binfmt *new)
 
 EXPORT_SYMBOL(set_binfmt);
 
-static int expand_corename(struct core_name *cn)
-{
-	char *old_corename = cn->corename;
-
-	cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
-	cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
-	if (!cn->corename) {
-		kfree(old_corename);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
-	char *cur;
-	int need;
-	int ret;
-	va_list arg;
-
-	va_start(arg, fmt);
-	need = vsnprintf(NULL, 0, fmt, arg);
-	va_end(arg);
-
-	if (likely(need < cn->size - cn->used - 1))
-		goto out_printf;
-
-	ret = expand_corename(cn);
-	if (ret)
-		goto expand_fail;
-
-out_printf:
-	cur = cn->corename + cn->used;
-	va_start(arg, fmt);
-	vsnprintf(cur, need + 1, fmt, arg);
-	va_end(arg);
-	cn->used += need;
-	return 0;
-
-expand_fail:
-	return ret;
-}
-
-static void cn_escape(char *str)
-{
-	for (; *str; str++)
-		if (*str == '/')
-			*str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
-	struct file *exe_file;
-	char *pathbuf, *path;
-	int ret;
-
-	exe_file = get_mm_exe_file(current->mm);
-	if (!exe_file) {
-		char *commstart = cn->corename + cn->used;
-		ret = cn_printf(cn, "%s (path unknown)", current->comm);
-		cn_escape(commstart);
-		return ret;
-	}
-
-	pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
-	if (!pathbuf) {
-		ret = -ENOMEM;
-		goto put_exe_file;
-	}
-
-	path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
-	if (IS_ERR(path)) {
-		ret = PTR_ERR(path);
-		goto free_buf;
-	}
-
-	cn_escape(path);
-
-	ret = cn_printf(cn, "%s", path);
-
-free_buf:
-	kfree(pathbuf);
-put_exe_file:
-	fput(exe_file);
-	return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
-	const struct cred *cred = current_cred();
-	const char *pat_ptr = core_pattern;
-	int ispipe = (*pat_ptr == '|');
-	int pid_in_pattern = 0;
-	int err = 0;
-
-	cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
-	cn->corename = kmalloc(cn->size, GFP_KERNEL);
-	cn->used = 0;
-
-	if (!cn->corename)
-		return -ENOMEM;
-
-	/* Repeat as long as we have more pattern to process and more output
-	   space */
-	while (*pat_ptr) {
-		if (*pat_ptr != '%') {
-			if (*pat_ptr == 0)
-				goto out;
-			err = cn_printf(cn, "%c", *pat_ptr++);
-		} else {
-			switch (*++pat_ptr) {
-			/* single % at the end, drop that */
-			case 0:
-				goto out;
-			/* Double percent, output one percent */
-			case '%':
-				err = cn_printf(cn, "%c", '%');
-				break;
-			/* pid */
-			case 'p':
-				pid_in_pattern = 1;
-				err = cn_printf(cn, "%d",
-					      task_tgid_vnr(current));
-				break;
-			/* uid */
-			case 'u':
-				err = cn_printf(cn, "%d", cred->uid);
-				break;
-			/* gid */
-			case 'g':
-				err = cn_printf(cn, "%d", cred->gid);
-				break;
-			/* signal that caused the coredump */
-			case 's':
-				err = cn_printf(cn, "%ld", signr);
-				break;
-			/* UNIX time of coredump */
-			case 't': {
-				struct timeval tv;
-				do_gettimeofday(&tv);
-				err = cn_printf(cn, "%lu", tv.tv_sec);
-				break;
-			}
-			/* hostname */
-			case 'h': {
-				char *namestart = cn->corename + cn->used;
-				down_read(&uts_sem);
-				err = cn_printf(cn, "%s",
-					      utsname()->nodename);
-				up_read(&uts_sem);
-				cn_escape(namestart);
-				break;
-			}
-			/* executable */
-			case 'e': {
-				char *commstart = cn->corename + cn->used;
-				err = cn_printf(cn, "%s", current->comm);
-				cn_escape(commstart);
-				break;
-			}
-			case 'E':
-				err = cn_print_exe_file(cn);
-				break;
-			/* core limit size */
-			case 'c':
-				err = cn_printf(cn, "%lu",
-					      rlimit(RLIMIT_CORE));
-				break;
-			default:
-				break;
-			}
-			++pat_ptr;
-		}
-
-		if (err)
-			return err;
-	}
-
-	/* Backward compatibility with core_uses_pid:
-	 *
-	 * If core_pattern does not include a %p (as is the default)
-	 * and core_uses_pid is set, then .%pid will be appended to
-	 * the filename. Do not do this for piped commands. */
-	if (!ispipe && !pid_in_pattern && core_uses_pid) {
-		err = cn_printf(cn, ".%d", task_tgid_vnr(current));
-		if (err)
-			return err;
-	}
-out:
-	return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
-	struct task_struct *t;
-	int nr = 0;
-
-	start->signal->flags = SIGNAL_GROUP_EXIT;
-	start->signal->group_exit_code = exit_code;
-	start->signal->group_stop_count = 0;
-
-	t = start;
-	do {
-		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-		if (t != current && t->mm) {
-			sigaddset(&t->pending.signal, SIGKILL);
-			signal_wake_up(t, 1);
-			nr++;
-		}
-	} while_each_thread(start, t);
-
-	return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
-				struct core_state *core_state, int exit_code)
-{
-	struct task_struct *g, *p;
-	unsigned long flags;
-	int nr = -EAGAIN;
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	if (!signal_group_exit(tsk->signal)) {
-		mm->core_state = core_state;
-		nr = zap_process(tsk, exit_code);
-	}
-	spin_unlock_irq(&tsk->sighand->siglock);
-	if (unlikely(nr < 0))
-		return nr;
-
-	if (atomic_read(&mm->mm_users) == nr + 1)
-		goto done;
-	/*
-	 * We should find and kill all tasks which use this mm, and we should
-	 * count them correctly into ->nr_threads. We don't take tasklist
-	 * lock, but this is safe wrt:
-	 *
-	 * fork:
-	 *	None of sub-threads can fork after zap_process(leader). All
-	 *	processes which were created before this point should be
-	 *	visible to zap_threads() because copy_process() adds the new
-	 *	process to the tail of init_task.tasks list, and lock/unlock
-	 *	of ->siglock provides a memory barrier.
-	 *
-	 * do_exit:
-	 *	The caller holds mm->mmap_sem. This means that the task which
-	 *	uses this mm can't pass exit_mm(), so it can't exit or clear
-	 *	its ->mm.
-	 *
-	 * de_thread:
-	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
-	 *	we must see either old or new leader, this does not matter.
-	 *	However, it can change p->sighand, so lock_task_sighand(p)
-	 *	must be used. Since p->mm != NULL and we hold ->mmap_sem
-	 *	it can't fail.
-	 *
-	 *	Note also that "g" can be the old leader with ->mm == NULL
-	 *	and already unhashed and thus removed from ->thread_group.
-	 *	This is OK, __unhash_process()->list_del_rcu() does not
-	 *	clear the ->next pointer, we will find the new leader via
-	 *	next_thread().
-	 */
-	rcu_read_lock();
-	for_each_process(g) {
-		if (g == tsk->group_leader)
-			continue;
-		if (g->flags & PF_KTHREAD)
-			continue;
-		p = g;
-		do {
-			if (p->mm) {
-				if (unlikely(p->mm == mm)) {
-					lock_task_sighand(p, &flags);
-					nr += zap_process(p, exit_code);
-					unlock_task_sighand(p, &flags);
-				}
-				break;
-			}
-		} while_each_thread(g, p);
-	}
-	rcu_read_unlock();
-done:
-	atomic_set(&core_state->nr_threads, nr);
-	return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
-	struct task_struct *tsk = current;
-	struct mm_struct *mm = tsk->mm;
-	int core_waiters = -EBUSY;
-
-	init_completion(&core_state->startup);
-	core_state->dumper.task = tsk;
-	core_state->dumper.next = NULL;
-
-	down_write(&mm->mmap_sem);
-	if (!mm->core_state)
-		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
-	up_write(&mm->mmap_sem);
-
-	if (core_waiters > 0) {
-		struct core_thread *ptr;
-
-		wait_for_completion(&core_state->startup);
-		/*
-		 * Wait for all the threads to become inactive, so that
-		 * all the thread context (extended register state, like
-		 * fpu etc) gets copied to the memory.
-		 */
-		ptr = core_state->dumper.next;
-		while (ptr != NULL) {
-			wait_task_inactive(ptr->task, 0);
-			ptr = ptr->next;
-		}
-	}
-
-	return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
-	struct core_thread *curr, *next;
-	struct task_struct *task;
-
-	next = mm->core_state->dumper.next;
-	while ((curr = next) != NULL) {
-		next = curr->next;
-		task = curr->task;
-		/*
-		 * see exit_mm(), curr->task must not see
-		 * ->task == NULL before we read ->next.
-		 */
-		smp_mb();
-		curr->task = NULL;
-		wake_up_process(task);
-	}
-
-	mm->core_state = NULL;
-}
-
 /*
  * set_dumpable converts traditional three-value dumpable to two flags and
  * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1991,7 +1633,7 @@ void set_dumpable(struct mm_struct *mm, int value)
 	}
 }
 
-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
 {
 	int ret;
 
@@ -2003,288 +1645,3 @@ int get_dumpable(struct mm_struct *mm)
 {
 	return __get_dumpable(mm->flags);
 }
-
-static void wait_for_dump_helpers(struct file *file)
-{
-	struct pipe_inode_info *pipe;
-
-	pipe = file->f_path.dentry->d_inode->i_pipe;
-
-	pipe_lock(pipe);
-	pipe->readers++;
-	pipe->writers--;
-
-	while ((pipe->readers > 1) && (!signal_pending(current))) {
-		wake_up_interruptible_sync(&pipe->wait);
-		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-		pipe_wait(pipe);
-	}
-
-	pipe->readers--;
-	pipe->writers++;
-	pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace.  Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process.  Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1.  This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
-	struct file *files[2];
-	struct coredump_params *cp = (struct coredump_params *)info->data;
-	int err = create_pipe_files(files, 0);
-	if (err)
-		return err;
-
-	cp->file = files[1];
-
-	replace_fd(0, files[0], 0);
-	/* and disallow core files too */
-	current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
-	return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
-	struct core_state core_state;
-	struct core_name cn;
-	struct mm_struct *mm = current->mm;
-	struct linux_binfmt * binfmt;
-	const struct cred *old_cred;
-	struct cred *cred;
-	int retval = 0;
-	int flag = 0;
-	int ispipe;
-	struct files_struct *displaced;
-	bool need_nonrelative = false;
-	static atomic_t core_dump_count = ATOMIC_INIT(0);
-	struct coredump_params cprm = {
-		.signr = signr,
-		.regs = regs,
-		.limit = rlimit(RLIMIT_CORE),
-		/*
-		 * We must use the same mm->flags while dumping core to avoid
-		 * inconsistency of bit flags, since this flag is not protected
-		 * by any locks.
-		 */
-		.mm_flags = mm->flags,
-	};
-
-	audit_core_dumps(signr);
-
-	binfmt = mm->binfmt;
-	if (!binfmt || !binfmt->core_dump)
-		goto fail;
-	if (!__get_dumpable(cprm.mm_flags))
-		goto fail;
-
-	cred = prepare_creds();
-	if (!cred)
-		goto fail;
-	/*
-	 * We cannot trust fsuid as being the "true" uid of the process
-	 * nor do we know its entire history. We only know it was tainted
-	 * so we dump it as root in mode 2, and only into a controlled
-	 * environment (pipe handler or fully qualified path).
-	 */
-	if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
-		/* Setuid core dump mode */
-		flag = O_EXCL;		/* Stop rewrite attacks */
-		cred->fsuid = GLOBAL_ROOT_UID;	/* Dump root private */
-		need_nonrelative = true;
-	}
-
-	retval = coredump_wait(exit_code, &core_state);
-	if (retval < 0)
-		goto fail_creds;
-
-	old_cred = override_creds(cred);
-
-	/*
-	 * Clear any false indication of pending signals that might
-	 * be seen by the filesystem code called to write the core file.
-	 */
-	clear_thread_flag(TIF_SIGPENDING);
-
-	ispipe = format_corename(&cn, signr);
-
- 	if (ispipe) {
-		int dump_count;
-		char **helper_argv;
-
-		if (ispipe < 0) {
-			printk(KERN_WARNING "format_corename failed\n");
-			printk(KERN_WARNING "Aborting core\n");
-			goto fail_corename;
-		}
-
-		if (cprm.limit == 1) {
-			/* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
-			 *
-			 * Normally core limits are irrelevant to pipes, since
-			 * we're not writing to the file system, but we use
-			 * cprm.limit of 1 here as a speacial value, this is a
-			 * consistent way to catch recursive crashes.
-			 * We can still crash if the core_pattern binary sets
-			 * RLIM_CORE = !1, but it runs as root, and can do
-			 * lots of stupid things.
-			 *
-			 * Note that we use task_tgid_vnr here to grab the pid
-			 * of the process group leader.  That way we get the
-			 * right pid if a thread in a multi-threaded
-			 * core_pattern process dies.
-			 */
-			printk(KERN_WARNING
-				"Process %d(%s) has RLIMIT_CORE set to 1\n",
-				task_tgid_vnr(current), current->comm);
-			printk(KERN_WARNING "Aborting core\n");
-			goto fail_unlock;
-		}
-		cprm.limit = RLIM_INFINITY;
-
-		dump_count = atomic_inc_return(&core_dump_count);
-		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
-			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
-			       task_tgid_vnr(current), current->comm);
-			printk(KERN_WARNING "Skipping core dump\n");
-			goto fail_dropcount;
-		}
-
-		helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
-		if (!helper_argv) {
-			printk(KERN_WARNING "%s failed to allocate memory\n",
-			       __func__);
-			goto fail_dropcount;
-		}
-
-		retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
-					NULL, UMH_WAIT_EXEC, umh_pipe_setup,
-					NULL, &cprm);
-		argv_free(helper_argv);
-		if (retval) {
- 			printk(KERN_INFO "Core dump to %s pipe failed\n",
-			       cn.corename);
-			goto close_fail;
- 		}
-	} else {
-		struct inode *inode;
-
-		if (cprm.limit < binfmt->min_coredump)
-			goto fail_unlock;
-
-		if (need_nonrelative && cn.corename[0] != '/') {
-			printk(KERN_WARNING "Pid %d(%s) can only dump core "\
-				"to fully qualified path!\n",
-				task_tgid_vnr(current), current->comm);
-			printk(KERN_WARNING "Skipping core dump\n");
-			goto fail_unlock;
-		}
-
-		cprm.file = filp_open(cn.corename,
-				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
-				 0600);
-		if (IS_ERR(cprm.file))
-			goto fail_unlock;
-
-		inode = cprm.file->f_path.dentry->d_inode;
-		if (inode->i_nlink > 1)
-			goto close_fail;
-		if (d_unhashed(cprm.file->f_path.dentry))
-			goto close_fail;
-		/*
-		 * AK: actually i see no reason to not allow this for named
-		 * pipes etc, but keep the previous behaviour for now.
-		 */
-		if (!S_ISREG(inode->i_mode))
-			goto close_fail;
-		/*
-		 * Dont allow local users get cute and trick others to coredump
-		 * into their pre-created files.
-		 */
-		if (!uid_eq(inode->i_uid, current_fsuid()))
-			goto close_fail;
-		if (!cprm.file->f_op || !cprm.file->f_op->write)
-			goto close_fail;
-		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
-			goto close_fail;
-	}
-
-	/* get us an unshared descriptor table; almost always a no-op */
-	retval = unshare_files(&displaced);
-	if (retval)
-		goto close_fail;
-	if (displaced)
-		put_files_struct(displaced);
-	retval = binfmt->core_dump(&cprm);
-	if (retval)
-		current->signal->group_exit_code |= 0x80;
-
-	if (ispipe && core_pipe_limit)
-		wait_for_dump_helpers(cprm.file);
-close_fail:
-	if (cprm.file)
-		filp_close(cprm.file, NULL);
-fail_dropcount:
-	if (ispipe)
-		atomic_dec(&core_dump_count);
-fail_unlock:
-	kfree(cn.corename);
-fail_corename:
-	coredump_finish(mm);
-	revert_creds(old_cred);
-fail_creds:
-	put_cred(cred);
-fail:
-	return;
-}
-
-/*
- * Core dumping helper functions.  These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
-	return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
-	int ret = 1;
-
-	if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-		if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-			return 0;
-	} else {
-		char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-		if (!buf)
-			return 0;
-		while (off > 0) {
-			unsigned long n = off;
-
-			if (n > PAGE_SIZE)
-				n = PAGE_SIZE;
-			if (!dump_write(file, buf, n)) {
-				ret = 0;
-				break;
-			}
-			off -= n;
-		}
-		free_page((unsigned long)buf);
-	}
-	return ret;
-}
-EXPORT_SYMBOL(dump_seek);
-- 
cgit 


From 99621b44aa194eab594e1f17217231c02b519211 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Aug 2012 16:31:33 -0400
Subject: btrfs: reada_extent doesn't need kref for refcount

All increments and decrements are under the same spinlock - have to be,
since they need to protect the radix_tree it's found in.  Just use
int, no need to wank with kref...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/reada.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 48a4882d8ad5..a955669519a2 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -68,7 +68,7 @@ struct reada_extent {
 	u32			blocksize;
 	int			err;
 	struct list_head	extctl;
-	struct kref		refcnt;
+	int 			refcnt;
 	spinlock_t		lock;
 	struct reada_zone	*zones[BTRFS_MAX_MIRRORS];
 	int			nzones;
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 	spin_lock(&fs_info->reada_lock);
 	re = radix_tree_lookup(&fs_info->reada_tree, index);
 	if (re)
-		kref_get(&re->refcnt);
+		re->refcnt++;
 	spin_unlock(&fs_info->reada_lock);
 
 	if (!re)
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 	spin_lock(&fs_info->reada_lock);
 	re = radix_tree_lookup(&fs_info->reada_tree, index);
 	if (re)
-		kref_get(&re->refcnt);
+		re->refcnt++;
 	spin_unlock(&fs_info->reada_lock);
 
 	if (re)
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 	re->top = *top;
 	INIT_LIST_HEAD(&re->extctl);
 	spin_lock_init(&re->lock);
-	kref_init(&re->refcnt);
+	re->refcnt = 1;
 
 	/*
 	 * map block
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 	if (ret == -EEXIST) {
 		re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
 		BUG_ON(!re_exist);
-		kref_get(&re_exist->refcnt);
+		re_exist->refcnt++;
 		spin_unlock(&fs_info->reada_lock);
 		goto error;
 	}
@@ -465,10 +465,6 @@ error:
 	return re_exist;
 }
 
-static void reada_kref_dummy(struct kref *kr)
-{
-}
-
 static void reada_extent_put(struct btrfs_fs_info *fs_info,
 			     struct reada_extent *re)
 {
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
 	unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
 
 	spin_lock(&fs_info->reada_lock);
-	if (!kref_put(&re->refcnt, reada_kref_dummy)) {
+	if (--re->refcnt) {
 		spin_unlock(&fs_info->reada_lock);
 		return;
 	}
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
 		return 0;
 	}
 	dev->reada_next = re->logical + re->blocksize;
-	kref_get(&re->refcnt);
+	re->refcnt++;
 
 	spin_unlock(&fs_info->reada_lock);
 
-- 
cgit 


From 8c0a85377048b64c880e76ec7368904fe46d0b94 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 26 Sep 2012 11:33:07 +1000
Subject: fs: push rcu_barrier() from deactivate_locked_super() to filesystems

There's no reason to call rcu_barrier() on every
deactivate_locked_super().  We only need to make sure that all delayed rcu
free inodes are flushed before we destroy related cache.

Removing rcu_barrier() from deactivate_locked_super() affects some fast
paths.  E.g.  on my machine exit_group() of a last process in IPC
namespace takes 0.07538s.  rcu_barrier() takes 0.05188s of that time.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/v9fs.c             | 5 +++++
 fs/adfs/super.c          | 5 +++++
 fs/affs/super.c          | 5 +++++
 fs/afs/super.c           | 5 +++++
 fs/befs/linuxvfs.c       | 5 +++++
 fs/bfs/inode.c           | 5 +++++
 fs/btrfs/extent_io.c     | 6 ++++++
 fs/btrfs/inode.c         | 5 +++++
 fs/ceph/super.c          | 5 +++++
 fs/cifs/cifsfs.c         | 5 +++++
 fs/coda/inode.c          | 5 +++++
 fs/ecryptfs/main.c       | 6 ++++++
 fs/efs/super.c           | 5 +++++
 fs/exofs/super.c         | 5 +++++
 fs/ext2/super.c          | 5 +++++
 fs/ext3/super.c          | 5 +++++
 fs/ext4/super.c          | 5 +++++
 fs/fat/inode.c           | 5 +++++
 fs/freevxfs/vxfs_super.c | 5 +++++
 fs/fuse/inode.c          | 6 ++++++
 fs/hfs/super.c           | 6 ++++++
 fs/hfsplus/super.c       | 6 ++++++
 fs/hpfs/super.c          | 5 +++++
 fs/hugetlbfs/inode.c     | 5 +++++
 fs/isofs/inode.c         | 5 +++++
 fs/jffs2/super.c         | 6 ++++++
 fs/jfs/super.c           | 6 ++++++
 fs/logfs/inode.c         | 5 +++++
 fs/minix/inode.c         | 5 +++++
 fs/ncpfs/inode.c         | 5 +++++
 fs/nfs/inode.c           | 5 +++++
 fs/nilfs2/super.c        | 6 ++++++
 fs/ntfs/super.c          | 6 ++++++
 fs/ocfs2/dlmfs/dlmfs.c   | 5 +++++
 fs/ocfs2/super.c         | 5 +++++
 fs/openpromfs/inode.c    | 5 +++++
 fs/qnx4/inode.c          | 5 +++++
 fs/qnx6/inode.c          | 5 +++++
 fs/reiserfs/super.c      | 5 +++++
 fs/romfs/super.c         | 5 +++++
 fs/squashfs/super.c      | 5 +++++
 fs/super.c               | 6 ------
 fs/sysv/inode.c          | 5 +++++
 fs/ubifs/super.c         | 6 ++++++
 fs/udf/super.c           | 5 +++++
 fs/ufs/super.c           | 5 +++++
 fs/xfs/xfs_super.c       | 5 +++++
 47 files changed, 240 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b85efa773949..392c5dac1981 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void)
  */
 static void v9fs_destroy_inode_cache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(v9fs_inode_cache);
 }
 
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index bdaec92353c2..c830c857c663 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -275,6 +275,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(adfs_inode_cachep);
 }
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c70f1e5fc024..2f57053bf26c 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -147,6 +147,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(affs_inode_cachep);
 }
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index df8c6047c2a1..43165009428d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void)
 		BUG();
 	}
 
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(afs_inode_cachep);
 	_leave("");
 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index cf7f3c67c8b7..962b4f8f7994 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -454,6 +454,11 @@ befs_init_inodecache(void)
 static void
 befs_destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(befs_inode_cachep);
 }
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 9870417c26e7..d5fc598d6e4a 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -280,6 +280,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(bfs_inode_cachep);
 }
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4c878476bb91..b08ea4717e9d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -107,6 +107,12 @@ void extent_io_exit(void)
 		list_del(&eb->leak_list);
 		kmem_cache_free(extent_buffer_cache, eb);
 	}
+
+	/*
+	 * Make sure all delayed rcu free are flushed before we
+	 * destroy caches.
+	 */
+	rcu_barrier();
 	if (extent_state_cache)
 		kmem_cache_destroy(extent_state_cache);
 	if (extent_buffer_cache)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ec154f954646..cf03a91d806f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7076,6 +7076,11 @@ static void init_once(void *foo)
 
 void btrfs_destroy_cachep(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	if (btrfs_inode_cachep)
 		kmem_cache_destroy(btrfs_inode_cachep);
 	if (btrfs_trans_handle_cachep)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b982239f38f9..3a42d9326378 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -603,6 +603,11 @@ bad_cap:
 
 static void destroy_caches(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ceph_inode_cachep);
 	kmem_cache_destroy(ceph_cap_cachep);
 	kmem_cache_destroy(ceph_dentry_cachep);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index db8a404a51dd..d4ce77a02327 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -977,6 +977,11 @@ cifs_init_inodecache(void)
 static void
 cifs_destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(cifs_inode_cachep);
 }
 
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d315c6c5891a..be2aa4909487 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -85,6 +85,11 @@ int coda_init_inodecache(void)
 
 void coda_destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(coda_inode_cachep);
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 9b627c15010a..34fcde765d24 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -710,6 +710,12 @@ static void ecryptfs_free_kmem_caches(void)
 {
 	int i;
 
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+
 	for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
 		struct ecryptfs_cache_info *info;
 
diff --git a/fs/efs/super.c b/fs/efs/super.c
index e755ec746c69..2002431ef9a0 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -96,6 +96,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(efs_inode_cachep);
 }
 
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index dde41a75c7c8..59e3bbfac0b1 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
  */
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(exofs_inode_cachep);
 }
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index af74d9e27b71..6c205d0c565b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ext2_inode_cachep);
 }
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c892e93d8e7..8d41c8889eee 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -532,6 +532,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ext3_inode_cachep);
 }
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c6e0cb3d1f4a..455b7d8c6d62 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1019,6 +1019,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ext4_inode_cachep);
 }
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 05e897fe9866..fd8e47cd898b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -521,6 +521,11 @@ static int __init fat_init_inodecache(void)
 
 static void __exit fat_destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(fat_inode_cachep);
 }
 
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index d4fabd26084e..fed2c8afb3a9 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -279,6 +279,11 @@ static void __exit
 vxfs_cleanup(void)
 {
 	unregister_filesystem(&vxfs_fs_type);
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(vxfs_inode_cachep);
 }
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fca222dabe3c..f0eda124cffb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1197,6 +1197,12 @@ static void fuse_fs_cleanup(void)
 {
 	unregister_filesystem(&fuse_fs_type);
 	unregister_fuseblk();
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(fuse_inode_cachep);
 }
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4eb873e0c07b..941d7a8c2197 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -482,6 +482,12 @@ static int __init init_hfs_fs(void)
 static void __exit exit_hfs_fs(void)
 {
 	unregister_filesystem(&hfs_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(hfs_inode_cachep);
 }
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index fdafb2d71654..811a84d2d964 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void)
 static void __exit exit_hfsplus_fs(void)
 {
 	unregister_filesystem(&hfsplus_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(hfsplus_inode_cachep);
 }
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 706a12c083ea..3cb1da56eb73 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -210,6 +210,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(hpfs_inode_cachep);
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8349a899912e..c4b85d064e6b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1042,6 +1042,11 @@ static int __init init_hugetlbfs_fs(void)
 
 static void __exit exit_hugetlbfs_fs(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(hugetlbfs_inode_cachep);
 	kern_unmount(hugetlbfs_vfsmount);
 	unregister_filesystem(&hugetlbfs_fs_type);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 29037c365ba4..f94cde4527e8 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -114,6 +114,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(isofs_inode_cachep);
 }
 
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 61ea41389f90..ff487954cd96 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void)
 	unregister_filesystem(&jffs2_fs_type);
 	jffs2_destroy_slab_caches();
 	jffs2_compressors_exit();
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(jffs2_inode_cachep);
 }
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index c55c7452d285..3735347fd5f6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -903,6 +903,12 @@ static void __exit exit_jfs_fs(void)
 	jfs_proc_clean();
 #endif
 	unregister_filesystem(&jfs_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(jfs_inode_cachep);
 }
 
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 6984562738d3..121bba2cf6f2 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -417,5 +417,10 @@ int logfs_init_inode_cache(void)
 
 void logfs_destroy_inode_cache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(logfs_inode_cache);
 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2a503ad020d5..dc8d3629c20a 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -100,6 +100,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(minix_inode_cachep);
 }
 
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 333df07ae3bd..0c62c55b25d7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -89,6 +89,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ncp_inode_cachep);
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9b47610338f5..e4c716d374a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void)
 
 static void nfs_destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(nfs_inode_cachep);
 }
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6a10812711c1..3c991dc84f2f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj)
 
 static void nilfs_destroy_cachep(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
+
 	if (nilfs_inode_cachep)
 		kmem_cache_destroy(nilfs_inode_cachep);
 	if (nilfs_transaction_cachep)
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 2bc149d6a784..fe08d4afa106 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3168,6 +3168,12 @@ static void __exit exit_ntfs_fs(void)
 	ntfs_debug("Unregistering NTFS driver.");
 
 	unregister_filesystem(&ntfs_fs_type);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ntfs_big_inode_cache);
 	kmem_cache_destroy(ntfs_inode_cache);
 	kmem_cache_destroy(ntfs_name_cache);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 83b6f98e0665..16b712d260d4 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void)
 	flush_workqueue(user_dlm_worker);
 	destroy_workqueue(user_dlm_worker);
 
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(dlmfs_inode_cache);
 
 	bdi_destroy(&dlmfs_backing_dev_info);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 68f4541c2db9..0e91ec22a940 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void)
 
 static void ocfs2_free_mem_caches(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	if (ocfs2_inode_cachep)
 		kmem_cache_destroy(ocfs2_inode_cachep);
 	ocfs2_inode_cachep = NULL;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 4a3477949bca..2ad080faca34 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void)
 static void __exit exit_openprom_fs(void)
 {
 	unregister_filesystem(&openprom_fs_type);
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(op_inode_cachep);
 }
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 552e994e3aa1..9534b4f76579 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -391,6 +391,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(qnx4_inode_cachep);
 }
 
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 2049c814bda4..1b37fff7b5ff 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -651,6 +651,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(qnx6_inode_cachep);
 }
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7a37dabf5a96..1078ae179993 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -608,6 +608,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(reiserfs_inode_cachep);
 }
 
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 77c5f2173983..fd7c5f60b46b 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -648,6 +648,11 @@ error_register:
 static void __exit exit_romfs_fs(void)
 {
 	unregister_filesystem(&romfs_fs_type);
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(romfs_inode_cachep);
 }
 
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 29cd014ed3a1..260e3928d4f5 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -425,6 +425,11 @@ static int __init init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(squashfs_inode_cachep);
 }
 
diff --git a/fs/super.c b/fs/super.c
index 0902cfa6a12e..5fdf7ff32c4e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s)
 
 		/* caches are now gone, we can safely kill the shrinker now */
 		unregister_shrinker(&s->s_shrink);
-
-		/*
-		 * We need to call rcu_barrier so all the delayed rcu free
-		 * inodes are flushed before we release the fs module.
-		 */
-		rcu_barrier();
 		put_filesystem(fs);
 		put_super(s);
 	} else {
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 80e1e2b18df1..0d0c50bd3321 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void)
 
 void sysv_destroy_icache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(sysv_inode_cachep);
 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 71a197f0f93d..36e09ca9130b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2298,6 +2298,12 @@ static void __exit ubifs_exit(void)
 	dbg_debugfs_exit();
 	ubifs_compressors_exit();
 	unregister_shrinker(&ubifs_shrinker_info);
+
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ubifs_inode_slab);
 	unregister_filesystem(&ubifs_fs_type);
 }
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 18fc038a438d..b8d27642ab06 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -171,6 +171,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(udf_inode_cachep);
 }
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 444927e5706b..f7cfecfe1cab 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1466,6 +1466,11 @@ static int init_inodecache(void)
 
 static void destroy_inodecache(void)
 {
+	/*
+	 * Make sure all delayed rcu free inodes are flushed before we
+	 * destroy cache.
+	 */
+	rcu_barrier();
 	kmem_cache_destroy(ufs_inode_cachep);
 }
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 19e2380fb867..83d36e473d2f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1506,6 +1506,11 @@ xfs_init_zones(void)
 STATIC void
 xfs_destroy_zones(void)
 {
+	/*
+	 * Make sure all delayed rcu free are flushed before we
+	 * destroy caches.
+	 */
+	rcu_barrier();
 	kmem_zone_destroy(xfs_ili_zone);
 	kmem_zone_destroy(xfs_inode_zone);
 	kmem_zone_destroy(xfs_efi_zone);
-- 
cgit 


From 8f9c0119d7ba94c3ad13876acc240d7f12b6d8e1 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 19 Sep 2012 12:01:52 +0100
Subject: compat: fs: Generic compat_sys_sendfile implementation

This function is used by sparc, powerpc and arm64 for compat support.
The patch adds a generic implementation which calls do_sendfile()
directly and avoids set_fs().

The sparc architecture has wrappers for the sign extensions while
powerpc relies on the compiler to do the this. The patch adds wrappers
for powerpc to handle the u32->int type conversion.

compat_sys_sendfile64() can be replaced by a sys_sendfile() call since
compat_loff_t has the same size as off_t on a 64-bit system.

On powerpc, the patch also changes the 64-bit sendfile call from
sys_sendile64 to sys_sendfile.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c     | 22 ++++++++++++++++++++++
 fs/read_write.c |  4 ++--
 fs/read_write.h |  2 ++
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index d72d51e19025..b7a24d0ca30d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1792,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd,
 	return do_handle_open(mountdirfd, handle, flags);
 }
 #endif
+
+#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE
+asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
+				    compat_off_t __user *offset, compat_size_t count)
+{
+	loff_t pos;
+	off_t off;
+	ssize_t ret;
+
+	if (offset) {
+		if (unlikely(get_user(off, offset)))
+			return -EFAULT;
+		pos = off;
+		ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
+		if (unlikely(put_user(pos, offset)))
+			return -EFAULT;
+		return ret;
+	}
+
+	return do_sendfile(out_fd, in_fd, NULL, count, 0);
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */
diff --git a/fs/read_write.c b/fs/read_write.c
index 28b38279a219..d06534857e9e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -862,8 +862,8 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
 	return ret;
 }
 
-static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
-			   size_t count, loff_t max)
+ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
+		    loff_t max)
 {
 	struct fd in, out;
 	struct inode *in_inode, *out_inode;
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0c..d3e00ef67420 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 		unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
+ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
+		    loff_t max);
-- 
cgit