190 files changed, 10666 insertions, 8358 deletions
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs.h b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
index 3f6447c65042..3b92d38d37e2 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs.h
@@ -138,8 +138,8 @@ struct lnet_debugfs_symlink_def {
 void lustre_insert_debugfs(struct ctl_table *table,
 			   const struct lnet_debugfs_symlink_def *symlinks);
 int lprocfs_call_handler(void *data, int write, loff_t *ppos,
-			  void __user *buffer, size_t *lenp,
-			  int (*handler)(void *data, int write,
-			  loff_t pos, void __user *buffer, int len));
+			 void __user *buffer, size_t *lenp,
+			 int (*handler)(void *data, int write, loff_t pos,
+					void __user *buffer, int len));
 
 #endif /* _LIBCFS_H */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
index 25adab19fd86..b7bd6e8ab33f 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_debug.h
@@ -247,19 +247,19 @@ do {									\
 #define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
 
 int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
-			    const char *format1, ...)
+		     const char *format1, ...)
 	__printf(2, 3);
 
 int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
-			      const char *format1,
-			      va_list args, const char *format2, ...)
+		       const char *format1,
+		       va_list args, const char *format2, ...)
 	__printf(4, 5);
 
 /* other external symbols that tracefile provides: */
 int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
-		const char __user *usr_buffer, int usr_buffer_nob);
+			    const char __user *usr_buffer, int usr_buffer_nob);
 int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
-		const char *knl_buffer, char *append);
+			     const char *knl_buffer, char *append);
 
 #define LIBCFS_DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
 
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
index d3f9a6020ee3..bdbbe934584c 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_fail.h
@@ -143,6 +143,9 @@ static inline int cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
 #define CFS_FAIL_TIMEOUT_ORSET(id, value, secs) \
 	cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_ORSET)
 
+#define CFS_FAIL_TIMEOUT_RESET(id, value, secs) \
+	cfs_fail_timeout_set(id, value, secs * 1000, CFS_FAIL_LOC_RESET)
+
 #define CFS_FAIL_TIMEOUT_MS_ORSET(id, value, ms) \
 	cfs_fail_timeout_set(id, value, ms, CFS_FAIL_LOC_ORSET)
 
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index 4daa3823f60a..e0e1a5d0949d 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -310,13 +310,13 @@ do {							    \
 
 #define MKSTR(ptr) ((ptr)) ? (ptr) : ""
 
-static inline int cfs_size_round4(int val)
+static inline size_t cfs_size_round4(int val)
 {
 	return (val + 3) & (~0x3);
 }
 
 #ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round(int val)
+static inline size_t cfs_size_round(int val)
 {
 	return (val + 7) & (~0x7);
 }
@@ -324,17 +324,17 @@ static inline int cfs_size_round(int val)
 #define HAVE_CFS_SIZE_ROUND
 #endif
 
-static inline int cfs_size_round16(int val)
+static inline size_t cfs_size_round16(int val)
 {
 	return (val + 0xf) & (~0xf);
 }
 
-static inline int cfs_size_round32(int val)
+static inline size_t cfs_size_round32(int val)
 {
 	return (val + 0x1f) & (~0x1f);
 }
 
-static inline int cfs_size_round0(int val)
+static inline size_t cfs_size_round0(int val)
 {
 	if (!val)
 		return 0;
@@ -343,7 +343,7 @@ static inline int cfs_size_round0(int val)
 
 static inline size_t cfs_round_strlen(char *fset)
 {
-	return (size_t)cfs_size_round((int)strlen(fset) + 1);
+	return cfs_size_round((int)strlen(fset) + 1);
 }
 
 #define LOGL(var, len, ptr)				       \
@@ -360,13 +360,4 @@ do {							    \
 	ptr += cfs_size_round(len);			     \
 } while (0)
 
-#define LOGL0(var, len, ptr)			      \
-do {						    \
-	if (!len)				       \
-		break;				  \
-	memcpy((char *)ptr, (const char *)var, len);    \
-	*((char *)(ptr) + len) = 0;		     \
-	ptr += cfs_size_round(len + 1);		 \
-} while (0)
-
 #endif
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 513a8225f888..a59c5e99cbd3 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -605,73 +605,20 @@ void lnet_counters_reset(void);
 
 unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov);
 int lnet_extract_iov(int dst_niov, struct kvec *dst,
-		     int src_niov, struct kvec *src,
+		     int src_niov, const struct kvec *src,
 		      unsigned int offset, unsigned int len);
 
 unsigned int lnet_kiov_nob(unsigned int niov, lnet_kiov_t *iov);
 int lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
-		      int src_niov, lnet_kiov_t *src,
+		      int src_niov, const lnet_kiov_t *src,
 		      unsigned int offset, unsigned int len);
 
-void lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov,
-		       unsigned int doffset,
-			unsigned int nsiov, struct kvec *siov,
+void lnet_copy_iov2iter(struct iov_iter *to,
+			unsigned int nsiov, const struct kvec *siov,
 			unsigned int soffset, unsigned int nob);
-void lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov,
-			unsigned int iovoffset,
-			 unsigned int nkiov, lnet_kiov_t *kiov,
+void lnet_copy_kiov2iter(struct iov_iter *to,
+			 unsigned int nkiov, const lnet_kiov_t *kiov,
 			 unsigned int kiovoffset, unsigned int nob);
-void lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov,
-			unsigned int kiovoffset,
-			 unsigned int niov, struct kvec *iov,
-			 unsigned int iovoffset, unsigned int nob);
-void lnet_copy_kiov2kiov(unsigned int ndkiov, lnet_kiov_t *dkiov,
-			 unsigned int doffset,
-			  unsigned int nskiov, lnet_kiov_t *skiov,
-			  unsigned int soffset, unsigned int nob);
-
-static inline void
-lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset,
-		   unsigned int nsiov, struct kvec *siov, unsigned int soffset,
-		   unsigned int nob)
-{
-	struct kvec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
-
-	lnet_copy_iov2iov(1, &diov, doffset,
-			  nsiov, siov, soffset, nob);
-}
-
-static inline void
-lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset,
-		    unsigned int nsiov, lnet_kiov_t *skiov,
-		    unsigned int soffset, unsigned int nob)
-{
-	struct kvec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
-
-	lnet_copy_kiov2iov(1, &diov, doffset,
-			   nsiov, skiov, soffset, nob);
-}
-
-static inline void
-lnet_copy_flat2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
-		   int slen, void *src, unsigned int soffset, unsigned int nob)
-{
-	struct kvec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
-
-	lnet_copy_iov2iov(ndiov, diov, doffset,
-			  1, &siov, soffset, nob);
-}
-
-static inline void
-lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov,
-		    unsigned int doffset, int slen, void *src,
-		    unsigned int soffset, unsigned int nob)
-{
-	struct kvec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
-
-	lnet_copy_iov2kiov(ndiov, dkiov, doffset,
-			   1, &siov, soffset, nob);
-}
 
 void lnet_me_unlink(lnet_me_t *me);
 
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 7967b013cbae..b84a5bb9186c 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -220,10 +220,7 @@ typedef struct lnet_lnd {
 	 * credit if the LND does flow control.
 	 */
 	int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg,
-			int delayed, unsigned int niov,
-			struct kvec *iov, lnet_kiov_t *kiov,
-			unsigned int offset, unsigned int mlen,
-			unsigned int rlen);
+			int delayed, struct iov_iter *to, unsigned int rlen);
 
 	/*
 	 * lnet_parse() has had to delay processing of this message
@@ -278,6 +275,8 @@ typedef struct lnet_ni {
 	struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
 	/* equivalent interfaces to use */
 	char			 *ni_interfaces[LNET_MAX_INTERFACES];
+	/* original net namespace */
+	struct net		 *ni_net_ns;
 } lnet_ni_t;
 
 #define LNET_PROTO_PING_MATCHBITS	0x8000000000000000LL
diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h
index e098b6c086e1..f8be0e2f7bf7 100644
--- a/drivers/staging/lustre/include/linux/lnet/types.h
+++ b/drivers/staging/lustre/include/linux/lnet/types.h
@@ -503,21 +503,7 @@ typedef struct {
 /* NB lustre portals uses struct iovec internally! */
 typedef struct iovec lnet_md_iovec_t;
 
-/**
- * A page-based fragment of a MD.
- */
-typedef struct {
-	/** Pointer to the page where the fragment resides */
-	struct page	*kiov_page;
-	/** Length in bytes of the fragment */
-	unsigned int	 kiov_len;
-	/**
-	 * Starting offset of the fragment within the page. Note that the
-	 * end of the fragment must not pass the end of the page; i.e.,
-	 * kiov_len + kiov_offset <= PAGE_SIZE.
-	 */
-	unsigned int	 kiov_offset;
-} lnet_kiov_t;
+typedef struct bio_vec lnet_kiov_t;
 /** @} lnet_md */
 
 /** \addtogroup lnet_eq
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 4f5978b3767b..c7a5d49e487f 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -128,6 +128,7 @@ static int kiblnd_msgtype2size(int type)
 static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
 {
 	struct kib_rdma_desc *rd;
+	int msg_size;
 	int nob;
 	int n;
 	int i;
@@ -146,12 +147,6 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
 
 	n = rd->rd_nfrags;
 
-	if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) {
-		CERROR("Bad nfrags: %d, should be 0 < n <= %d\n",
-		       n, IBLND_MAX_RDMA_FRAGS);
-		return 1;
-	}
-
 	nob = offsetof(struct kib_msg, ibm_u) +
 	      kiblnd_rd_msg_size(rd, msg->ibm_type, n);
 
@@ -161,6 +156,13 @@ static int kiblnd_unpack_rd(struct kib_msg *msg, int flip)
 		return 1;
 	}
 
+	msg_size = kiblnd_rd_size(rd);
+	if (msg_size <= 0 || msg_size > LNET_MAX_PAYLOAD) {
+		CERROR("Bad msg_size: %d, should be 0 < n <= %d\n",
+		       msg_size, LNET_MAX_PAYLOAD);
+		return 1;
+	}
+
 	if (!flip)
 		return 0;
 
@@ -618,7 +620,7 @@ static int kiblnd_get_completion_vector(struct kib_conn *conn, int cpt)
 }
 
 struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, struct rdma_cm_id *cmid,
-			       int state, int version)
+				    int state, int version)
 {
 	/*
 	 * CAVEAT EMPTOR:
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 078a0c3e8845..14576977200f 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -113,8 +113,9 @@ extern struct kib_tunables  kiblnd_tunables;
 #define IBLND_OOB_CAPABLE(v)       ((v) != IBLND_MSG_VERSION_1)
 #define IBLND_OOB_MSGS(v)	   (IBLND_OOB_CAPABLE(v) ? 2 : 0)
 
-#define IBLND_MSG_SIZE		(4 << 10)	 /* max size of queued messages (inc hdr) */
-#define IBLND_MAX_RDMA_FRAGS	 LNET_MAX_IOV	   /* max # of fragments supported */
+#define IBLND_FRAG_SHIFT	(PAGE_SHIFT - 12)	/* frag size on wire is in 4K units */
+#define IBLND_MSG_SIZE		(4 << 10)		/* max size of queued messages (inc hdr) */
+#define IBLND_MAX_RDMA_FRAGS	(LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */
 
 /************************/
 /* derived constants... */
@@ -133,8 +134,8 @@ extern struct kib_tunables  kiblnd_tunables;
 /* WRs and CQEs (per connection) */
 #define IBLND_RECV_WRS(c)	IBLND_RX_MSGS(c)
 #define IBLND_SEND_WRS(c)	\
-	((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \
-							  c->ibc_peer->ibp_ni))
+	(((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \
+	  kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni))
 #define IBLND_CQ_ENTRIES(c)	(IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
 
 struct kib_hca_dev;
@@ -582,6 +583,8 @@ struct kib_peer {
 	unsigned short		ibp_connecting;
 	/* reconnect this peer later */
 	unsigned short		ibp_reconnecting:1;
+	/* counter of how many times we triggered a conn race */
+	unsigned char		ibp_races;
 	/* # consecutive reconnection attempts to this peer */
 	unsigned int		ibp_reconnected;
 	/* errno on closing this peer */
@@ -607,14 +610,14 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
 
 	tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
 	mod = tunables->lnd_map_on_demand;
-	return mod ? mod : IBLND_MAX_RDMA_FRAGS;
+	return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT;
 }
 
 static inline int
 kiblnd_rdma_frags(int version, struct lnet_ni *ni)
 {
 	return version == IBLND_MSG_VERSION_1 ?
-			  IBLND_MAX_RDMA_FRAGS :
+			  (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) :
 			  kiblnd_cfg_rdma_frags(ni);
 }
 
@@ -1034,5 +1037,4 @@ int  kiblnd_post_rx(struct kib_rx *rx, int credit);
 
 int  kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
 int  kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
-		 unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
-		 unsigned int offset, unsigned int mlen, unsigned int rlen);
+		 struct iov_iter *to, unsigned int rlen);
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 596a697b9d39..b27de8888149 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -36,16 +36,19 @@
 
 #include "o2iblnd.h"
 
+#define MAX_CONN_RACES_BEFORE_ABORT 20
+
 static void kiblnd_peer_alive(struct kib_peer *peer);
 static void kiblnd_peer_connect_failed(struct kib_peer *peer, int active, int error);
-static void kiblnd_check_sends(struct kib_conn *conn);
 static void kiblnd_init_tx_msg(lnet_ni_t *ni, struct kib_tx *tx,
-				int type, int body_nob);
+			       int type, int body_nob);
 static int kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
-			     int resid, struct kib_rdma_desc *dstrd, __u64 dstcookie);
+			    int resid, struct kib_rdma_desc *dstrd,
+			    __u64 dstcookie);
 static void kiblnd_queue_tx_locked(struct kib_tx *tx, struct kib_conn *conn);
 static void kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn);
 static void kiblnd_unmap_tx(lnet_ni_t *ni, struct kib_tx *tx);
+static void kiblnd_check_sends_locked(struct kib_conn *conn);
 
 static void
 kiblnd_tx_done(lnet_ni_t *ni, struct kib_tx *tx)
@@ -211,9 +214,9 @@ kiblnd_post_rx(struct kib_rx *rx, int credit)
 		conn->ibc_outstanding_credits++;
 	else
 		conn->ibc_reserved_credits++;
+	kiblnd_check_sends_locked(conn);
 	spin_unlock(&conn->ibc_lock);
 
-	kiblnd_check_sends(conn);
 out:
 	kiblnd_conn_decref(conn);
 	return rc;
@@ -344,8 +347,8 @@ kiblnd_handle_rx(struct kib_rx *rx)
 		    !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
 			conn->ibc_outstanding_credits++;
 
+		kiblnd_check_sends_locked(conn);
 		spin_unlock(&conn->ibc_lock);
-		kiblnd_check_sends(conn);
 	}
 
 	switch (msg->ibm_type) {
@@ -648,7 +651,7 @@ static int kiblnd_map_tx(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc
 
 static int
 kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
-		    unsigned int niov, struct kvec *iov, int offset, int nob)
+		    unsigned int niov, const struct kvec *iov, int offset, int nob)
 {
 	struct kib_net *net = ni->ni_data;
 	struct page *page;
@@ -705,7 +708,7 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
 
 static int
 kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
-		     int nkiov, lnet_kiov_t *kiov, int offset, int nob)
+		     int nkiov, const lnet_kiov_t *kiov, int offset, int nob)
 {
 	struct kib_net *net = ni->ni_data;
 	struct scatterlist *sg;
@@ -717,8 +720,8 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
 	LASSERT(nkiov > 0);
 	LASSERT(net);
 
-	while (offset >= kiov->kiov_len) {
-		offset -= kiov->kiov_len;
+	while (offset >= kiov->bv_len) {
+		offset -= kiov->bv_len;
 		nkiov--;
 		kiov++;
 		LASSERT(nkiov > 0);
@@ -728,10 +731,10 @@ kiblnd_setup_rd_kiov(lnet_ni_t *ni, struct kib_tx *tx, struct kib_rdma_desc *rd,
 	do {
 		LASSERT(nkiov > 0);
 
-		fragnob = min((int)(kiov->kiov_len - offset), nob);
+		fragnob = min((int)(kiov->bv_len - offset), nob);
 
-		sg_set_page(sg, kiov->kiov_page, fragnob,
-			    kiov->kiov_offset + offset);
+		sg_set_page(sg, kiov->bv_page, fragnob,
+			    kiov->bv_offset + offset);
 		sg = sg_next(sg);
 		if (!sg) {
 			CERROR("lacking enough sg entries to map tx\n");
@@ -761,7 +764,6 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
 	LASSERT(tx->tx_queued);
 	/* We rely on this for QP sizing */
 	LASSERT(tx->tx_nwrq > 0);
-	LASSERT(tx->tx_nwrq <= 1 + conn->ibc_max_frags);
 
 	LASSERT(!credit || credit == 1);
 	LASSERT(conn->ibc_outstanding_credits >= 0);
@@ -800,7 +802,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
 	      conn->ibc_noops_posted == IBLND_OOB_MSGS(ver)))) {
 		/*
 		 * OK to drop when posted enough NOOPs, since
-		 * kiblnd_check_sends will queue NOOP again when
+		 * kiblnd_check_sends_locked will queue NOOP again when
 		 * posted NOOPs complete
 		 */
 		spin_unlock(&conn->ibc_lock);
@@ -905,7 +907,7 @@ kiblnd_post_tx_locked(struct kib_conn *conn, struct kib_tx *tx, int credit)
 }
 
 static void
-kiblnd_check_sends(struct kib_conn *conn)
+kiblnd_check_sends_locked(struct kib_conn *conn)
 {
 	int ver = conn->ibc_version;
 	lnet_ni_t *ni = conn->ibc_peer->ibp_ni;
@@ -918,8 +920,6 @@ kiblnd_check_sends(struct kib_conn *conn)
 		return;
 	}
 
-	spin_lock(&conn->ibc_lock);
-
 	LASSERT(conn->ibc_nsends_posted <= kiblnd_concurrent_sends(ver, ni));
 	LASSERT(!IBLND_OOB_CAPABLE(ver) ||
 		conn->ibc_noops_posted <= IBLND_OOB_MSGS(ver));
@@ -969,8 +969,6 @@ kiblnd_check_sends(struct kib_conn *conn)
 		if (kiblnd_post_tx_locked(conn, tx, credit))
 			break;
 	}
-
-	spin_unlock(&conn->ibc_lock);
 }
 
 static void
@@ -1016,16 +1014,11 @@ kiblnd_tx_complete(struct kib_tx *tx, int status)
 	if (idle)
 		list_del(&tx->tx_list);
 
-	kiblnd_conn_addref(conn);	       /* 1 ref for me.... */
-
+	kiblnd_check_sends_locked(conn);
 	spin_unlock(&conn->ibc_lock);
 
 	if (idle)
 		kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx);
-
-	kiblnd_check_sends(conn);
-
-	kiblnd_conn_decref(conn);	       /* ...until here */
 }
 
 static void
@@ -1078,6 +1071,15 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
 	LASSERT(type == IBLND_MSG_GET_DONE ||
 		type == IBLND_MSG_PUT_DONE);
 
+	if (kiblnd_rd_size(srcrd) > conn->ibc_max_frags << PAGE_SHIFT) {
+		CERROR("RDMA is too large for peer %s (%d), src size: %d dst size: %d\n",
+		       libcfs_nid2str(conn->ibc_peer->ibp_nid),
+		       conn->ibc_max_frags << PAGE_SHIFT,
+		       kiblnd_rd_size(srcrd), kiblnd_rd_size(dstrd));
+		rc = -EMSGSIZE;
+		goto too_big;
+	}
+
 	while (resid > 0) {
 		if (srcidx >= srcrd->rd_nfrags) {
 			CERROR("Src buffer exhausted: %d frags\n", srcidx);
@@ -1091,10 +1093,10 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
 			break;
 		}
 
-		if (tx->tx_nwrq >= conn->ibc_max_frags) {
+		if (tx->tx_nwrq >= IBLND_MAX_RDMA_FRAGS) {
 			CERROR("RDMA has too many fragments for peer %s (%d), src idx/frags: %d/%d dst idx/frags: %d/%d\n",
 			       libcfs_nid2str(conn->ibc_peer->ibp_nid),
-			       conn->ibc_max_frags,
+			       IBLND_MAX_RDMA_FRAGS,
 			       srcidx, srcrd->rd_nfrags,
 			       dstidx, dstrd->rd_nfrags);
 			rc = -EMSGSIZE;
@@ -1132,7 +1134,7 @@ kiblnd_init_rdma(struct kib_conn *conn, struct kib_tx *tx, int type,
 		wrq++;
 		sge++;
 	}
-
+too_big:
 	if (rc < 0)			     /* no RDMA if completing with failure */
 		tx->tx_nwrq = 0;
 
@@ -1204,9 +1206,8 @@ kiblnd_queue_tx(struct kib_tx *tx, struct kib_conn *conn)
 {
 	spin_lock(&conn->ibc_lock);
 	kiblnd_queue_tx_locked(tx, conn);
+	kiblnd_check_sends_locked(conn);
 	spin_unlock(&conn->ibc_lock);
-
-	kiblnd_check_sends(conn);
 }
 
 static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
@@ -1499,6 +1500,7 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
 	unsigned int payload_offset = lntmsg->msg_offset;
 	unsigned int payload_nob = lntmsg->msg_len;
+	struct iov_iter from;
 	struct kib_msg *ibmsg;
 	struct kib_rdma_desc  *rd;
 	struct kib_tx *tx;
@@ -1518,6 +1520,17 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	/* payload is either all vaddrs or all pages */
 	LASSERT(!(payload_kiov && payload_iov));
 
+	if (payload_kiov)
+		iov_iter_bvec(&from, ITER_BVEC | WRITE,
+			      payload_kiov, payload_niov,
+			      payload_nob + payload_offset);
+	else
+		iov_iter_kvec(&from, ITER_KVEC | WRITE,
+			      payload_iov, payload_niov,
+			      payload_nob + payload_offset);
+
+	iov_iter_advance(&from, payload_offset);
+
 	switch (type) {
 	default:
 		LBUG();
@@ -1637,17 +1650,8 @@ kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 	ibmsg = tx->tx_msg;
 	ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
 
-	if (payload_kiov)
-		lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
-				    offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
-				    payload_niov, payload_kiov,
-				    payload_offset, payload_nob);
-	else
-		lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg,
-				   offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
-				   payload_niov, payload_iov,
-				   payload_offset, payload_nob);
-
+	copy_from_iter(&ibmsg->ibm_u.immediate.ibim_payload, IBLND_MSG_SIZE,
+		       &from);
 	nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
 	kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
 
@@ -1719,8 +1723,7 @@ kiblnd_reply(lnet_ni_t *ni, struct kib_rx *rx, lnet_msg_t *lntmsg)
 
 int
 kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
-	    unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
-	    unsigned int offset, unsigned int mlen, unsigned int rlen)
+	    struct iov_iter *to, unsigned int rlen)
 {
 	struct kib_rx *rx = private;
 	struct kib_msg *rxmsg = rx->rx_msg;
@@ -1730,10 +1733,9 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 	int post_credit = IBLND_POSTRX_PEER_CREDIT;
 	int rc = 0;
 
-	LASSERT(mlen <= rlen);
+	LASSERT(iov_iter_count(to) <= rlen);
 	LASSERT(!in_interrupt());
 	/* Either all pages or all vaddrs */
-	LASSERT(!(kiov && iov));
 
 	switch (rxmsg->ibm_type) {
 	default:
@@ -1749,16 +1751,8 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 			break;
 		}
 
-		if (kiov)
-			lnet_copy_flat2kiov(niov, kiov, offset,
-					    IBLND_MSG_SIZE, rxmsg,
-					    offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
-					    mlen);
-		else
-			lnet_copy_flat2iov(niov, iov, offset,
-					   IBLND_MSG_SIZE, rxmsg,
-					   offsetof(struct kib_msg, ibm_u.immediate.ibim_payload),
-					   mlen);
+		copy_to_iter(&rxmsg->ibm_u.immediate.ibim_payload,
+			     IBLND_MSG_SIZE, to);
 		lnet_finalize(ni, lntmsg, 0);
 		break;
 
@@ -1766,7 +1760,7 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 		struct kib_msg	*txmsg;
 		struct kib_rdma_desc *rd;
 
-		if (!mlen) {
+		if (!iov_iter_count(to)) {
 			lnet_finalize(ni, lntmsg, 0);
 			kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0,
 					       rxmsg->ibm_u.putreq.ibprm_cookie);
@@ -1784,12 +1778,16 @@ kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
 
 		txmsg = tx->tx_msg;
 		rd = &txmsg->ibm_u.putack.ibpam_rd;
-		if (!kiov)
+		if (!(to->type & ITER_BVEC))
 			rc = kiblnd_setup_rd_iov(ni, tx, rd,
-						 niov, iov, offset, mlen);
+						 to->nr_segs, to->kvec,
+						 to->iov_offset,
+						 iov_iter_count(to));
 		else
 			rc = kiblnd_setup_rd_kiov(ni, tx, rd,
-						  niov, kiov, offset, mlen);
+						  to->nr_segs, to->bvec,
+						  to->iov_offset,
+						  iov_iter_count(to));
 		if (rc) {
 			CERROR("Can't setup PUT sink for %s: %d\n",
 			       libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
@@ -2183,14 +2181,11 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
 		return;
 	}
 
-	/**
-	 * refcount taken by cmid is not reliable after I released the glock
-	 * because this connection is visible to other threads now, another
-	 * thread can find and close this connection right after I released
-	 * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is
-	 * called, it can release the connection refcount taken by cmid.
-	 * It means the connection could be destroyed before I finish my
-	 * operations on it.
+	/*
+	 * +1 ref for myself, this connection is visible to other threads
+	 * now, refcount of peer:ibp_conns can be released by connection
+	 * close from either a different thread, or the calling of
+	 * kiblnd_check_sends_locked() below. See bz21911 for details.
 	 */
 	kiblnd_conn_addref(conn);
 	write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
@@ -2202,10 +2197,9 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
 
 		kiblnd_queue_tx_locked(tx, conn);
 	}
+	kiblnd_check_sends_locked(conn);
 	spin_unlock(&conn->ibc_lock);
 
-	kiblnd_check_sends(conn);
-
 	/* schedule blocked rxs */
 	kiblnd_handle_early_rxs(conn);
 
@@ -2240,6 +2234,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	struct kib_rej rej;
 	int version = IBLND_MSG_VERSION;
 	unsigned long flags;
+	int max_frags;
 	int rc;
 	struct sockaddr_in *peer_addr;
 
@@ -2346,22 +2341,20 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 		goto failed;
 	}
 
-	if (reqmsg->ibm_u.connparams.ibcp_max_frags >
-	    kiblnd_rdma_frags(version, ni)) {
-		CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
-		      libcfs_nid2str(nid), version,
-		      reqmsg->ibm_u.connparams.ibcp_max_frags,
+	max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
+	if (max_frags > kiblnd_rdma_frags(version, ni)) {
+		CWARN("Can't accept conn from %s (version %x): max message size %d is too large (%d wanted)\n",
+		      libcfs_nid2str(nid), version, max_frags,
 		      kiblnd_rdma_frags(version, ni));
 
 		if (version >= IBLND_MSG_VERSION)
 			rej.ibr_why = IBLND_REJECT_RDMA_FRAGS;
 
 		goto failed;
-	} else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
-		   kiblnd_rdma_frags(version, ni) && !net->ibn_fmr_ps) {
-		CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
-		      libcfs_nid2str(nid), version,
-		      reqmsg->ibm_u.connparams.ibcp_max_frags,
+	} else if (max_frags < kiblnd_rdma_frags(version, ni) &&
+		   !net->ibn_fmr_ps) {
+		CWARN("Can't accept conn from %s (version %x): max message size %d incompatible without FMR pool (%d wanted)\n",
+		      libcfs_nid2str(nid), version, max_frags,
 		      kiblnd_rdma_frags(version, ni));
 
 		if (version == IBLND_MSG_VERSION)
@@ -2387,7 +2380,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	}
 
 	/* We have validated the peer's parameters so use those */
-	peer->ibp_max_frags = reqmsg->ibm_u.connparams.ibcp_max_frags;
+	peer->ibp_max_frags = max_frags;
 	peer->ibp_queue_depth = reqmsg->ibm_u.connparams.ibcp_queue_depth;
 
 	write_lock_irqsave(g_lock, flags);
@@ -2419,23 +2412,37 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 			goto failed;
 		}
 
-		/* tie-break connection race in favour of the higher NID */
+		/*
+		 * Tie-break connection race in favour of the higher NID.
+		 * If we keep running into a race condition multiple times,
+		 * we have to assume that the connection attempt with the
+		 * higher NID is stuck in a connecting state and will never
+		 * recover.  As such, we pass through this if-block and let
+		 * the lower NID connection win so we can move forward.
+		 */
 		if (peer2->ibp_connecting &&
-		    nid < ni->ni_nid) {
+		    nid < ni->ni_nid && peer2->ibp_races <
+		    MAX_CONN_RACES_BEFORE_ABORT) {
+			peer2->ibp_races++;
 			write_unlock_irqrestore(g_lock, flags);
 
-			CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
+			CDEBUG(D_NET, "Conn race %s\n",
+			       libcfs_nid2str(peer2->ibp_nid));
 
 			kiblnd_peer_decref(peer);
 			rej.ibr_why = IBLND_REJECT_CONN_RACE;
 			goto failed;
 		}
-
+		if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
+			CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
+				libcfs_nid2str(peer2->ibp_nid),
+				MAX_CONN_RACES_BEFORE_ABORT);
 		/**
 		 * passive connection is allowed even this peer is waiting for
 		 * reconnection.
 		 */
 		peer2->ibp_reconnecting = 0;
+		peer2->ibp_races = 0;
 		peer2->ibp_accepting++;
 		kiblnd_peer_addref(peer2);
 
@@ -2494,7 +2501,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
 			sizeof(ackmsg->ibm_u.connparams));
 	ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
-	ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+	ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
 	ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
 	kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
@@ -2526,9 +2533,9 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 
  failed:
 	if (ni) {
-		lnet_ni_decref(ni);
 		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
 		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+		lnet_ni_decref(ni);
 	}
 
 	rej.ibr_version             = version;
@@ -2556,7 +2563,7 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
 
 	if (cp) {
 		msg_size = cp->ibcp_max_msg_size;
-		frag_num = cp->ibcp_max_frags;
+		frag_num	= cp->ibcp_max_frags << IBLND_FRAG_SHIFT;
 		queue_dep = cp->ibcp_queue_depth;
 	}
 
@@ -2821,11 +2828,11 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
 		goto failed;
 	}
 
-	if (msg->ibm_u.connparams.ibcp_max_frags >
+	if ((msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT) >
 	    conn->ibc_max_frags) {
 		CERROR("%s has incompatible max_frags %d (<=%d wanted)\n",
 		       libcfs_nid2str(peer->ibp_nid),
-		       msg->ibm_u.connparams.ibcp_max_frags,
+		       msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT,
 		       conn->ibc_max_frags);
 		rc = -EPROTO;
 		goto failed;
@@ -2859,7 +2866,7 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
 	conn->ibc_credits = msg->ibm_u.connparams.ibcp_queue_depth;
 	conn->ibc_reserved_credits = msg->ibm_u.connparams.ibcp_queue_depth;
 	conn->ibc_queue_depth = msg->ibm_u.connparams.ibcp_queue_depth;
-	conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags;
+	conn->ibc_max_frags = msg->ibm_u.connparams.ibcp_max_frags >> IBLND_FRAG_SHIFT;
 	LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
 		IBLND_OOB_MSGS(ver) <= IBLND_RX_MSGS(conn));
 
@@ -2916,7 +2923,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
 	memset(msg, 0, sizeof(*msg));
 	kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams));
 	msg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth;
-	msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
+	msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags << IBLND_FRAG_SHIFT;
 	msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
 	kiblnd_pack_msg(peer->ibp_ni, msg, version,
@@ -3233,7 +3240,11 @@ kiblnd_check_conns(int idx)
 	 */
 	list_for_each_entry_safe(conn, temp, &checksends, ibc_connd_list) {
 		list_del(&conn->ibc_connd_list);
-		kiblnd_check_sends(conn);
+
+		spin_lock(&conn->ibc_lock);
+		kiblnd_check_sends_locked(conn);
+		spin_unlock(&conn->ibc_lock);
+
 		kiblnd_conn_decref(conn);
 	}
 }
@@ -3419,6 +3430,12 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
 	case IB_EVENT_COMM_EST:
 		CDEBUG(D_NET, "%s established\n",
 		       libcfs_nid2str(conn->ibc_peer->ibp_nid));
+		/*
+		 * We received a packet but connection isn't established
+		 * probably handshake packet was lost, so free to
+		 * force make connection established
+		 */
+		rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
 		return;
 
 	default:
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
index 07ec540946cd..cbc9a9c5385f 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
@@ -1468,11 +1468,6 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
 
 		conn->ksnc_route = NULL;
 
-#if 0	   /* irrelevant with only eager routes */
-		/* make route least favourite */
-		list_del(&route->ksnr_list);
-		list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-#endif
 		ksocknal_route_decref(route);     /* drop conn's ref on route */
 	}
 
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index a56632b4ee37..e6ca0cf52691 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -86,8 +86,6 @@ struct ksock_sched {				/* per scheduler state */
 	int                     kss_nconns;     /* # connections assigned to
 						 * this scheduler */
 	struct ksock_sched_info *kss_info;	/* owner of it */
-	struct page             *kss_rx_scratch_pgs[LNET_MAX_IOV];
-	struct kvec             kss_scratch_iov[LNET_MAX_IOV];
 };
 
 struct ksock_sched_info {
@@ -616,9 +614,7 @@ void ksocknal_shutdown(lnet_ni_t *ni);
 int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
 int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
 int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
-		  int delayed, unsigned int niov,
-		  struct kvec *iov, lnet_kiov_t *kiov,
-		  unsigned int offset, unsigned int mlen, unsigned int rlen);
+		  int delayed, struct iov_iter *to, unsigned int rlen);
 int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
 
 int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
@@ -635,7 +631,7 @@ int  ksocknal_close_peer_conns_locked(struct ksock_peer *peer,
 int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why);
 int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr);
 struct ksock_conn *ksocknal_find_conn_locked(struct ksock_peer *peer,
-					struct ksock_tx *tx, int nonblk);
+					     struct ksock_tx *tx, int nonblk);
 
 int  ksocknal_launch_packet(lnet_ni_t *ni, struct ksock_tx *tx,
 			    lnet_process_id_t id);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
index 303576d815c6..c1c6f604e6ad 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c
@@ -35,8 +35,8 @@ ksocknal_alloc_tx(int type, int size)
 		spin_lock(&ksocknal_data.ksnd_tx_lock);
 
 		if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
-			tx = list_entry(ksocknal_data.ksnd_idle_noop_txs. \
-					    next, struct ksock_tx, tx_list);
+			tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next,
+					struct ksock_tx, tx_list);
 			LASSERT(tx->tx_desc_size == size);
 			list_del(&tx->tx_list);
 		}
@@ -164,13 +164,13 @@ ksocknal_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
 	do {
 		LASSERT(tx->tx_nkiov > 0);
 
-		if (nob < (int)kiov->kiov_len) {
-			kiov->kiov_offset += nob;
-			kiov->kiov_len -= nob;
+		if (nob < (int)kiov->bv_len) {
+			kiov->bv_offset += nob;
+			kiov->bv_len -= nob;
 			return rc;
 		}
 
-		nob -= (int)kiov->kiov_len;
+		nob -= (int)kiov->bv_len;
 		tx->tx_kiov = ++kiov;
 		tx->tx_nkiov--;
 	} while (nob);
@@ -326,13 +326,13 @@ ksocknal_recv_kiov(struct ksock_conn *conn)
 	do {
 		LASSERT(conn->ksnc_rx_nkiov > 0);
 
-		if (nob < (int)kiov->kiov_len) {
-			kiov->kiov_offset += nob;
-			kiov->kiov_len -= nob;
+		if (nob < (int)kiov->bv_len) {
+			kiov->bv_offset += nob;
+			kiov->bv_len -= nob;
 			return -EAGAIN;
 		}
 
-		nob -= kiov->kiov_len;
+		nob -= kiov->bv_len;
 		conn->ksnc_rx_kiov = ++kiov;
 		conn->ksnc_rx_nkiov--;
 	} while (nob);
@@ -1325,39 +1325,36 @@ ksocknal_process_receive(struct ksock_conn *conn)
 
 int
 ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
-	      unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
-	      unsigned int offset, unsigned int mlen, unsigned int rlen)
+	      struct iov_iter *to, unsigned int rlen)
 {
 	struct ksock_conn *conn = private;
 	struct ksock_sched *sched = conn->ksnc_scheduler;
 
-	LASSERT(mlen <= rlen);
-	LASSERT(niov <= LNET_MAX_IOV);
+	LASSERT(iov_iter_count(to) <= rlen);
+	LASSERT(to->nr_segs <= LNET_MAX_IOV);
 
 	conn->ksnc_cookie = msg;
-	conn->ksnc_rx_nob_wanted = mlen;
+	conn->ksnc_rx_nob_wanted = iov_iter_count(to);
 	conn->ksnc_rx_nob_left = rlen;
 
-	if (!mlen || iov) {
+	if (to->type & ITER_KVEC) {
 		conn->ksnc_rx_nkiov = 0;
 		conn->ksnc_rx_kiov = NULL;
 		conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
 		conn->ksnc_rx_niov =
 			lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
-					 niov, iov, offset, mlen);
+					 to->nr_segs, to->kvec,
+					 to->iov_offset, iov_iter_count(to));
 	} else {
 		conn->ksnc_rx_niov = 0;
 		conn->ksnc_rx_iov = NULL;
 		conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
 		conn->ksnc_rx_nkiov =
 			lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
-					  niov, kiov, offset, mlen);
+					 to->nr_segs, to->bvec,
+					 to->iov_offset, iov_iter_count(to));
 	}
 
-	LASSERT(mlen ==
-		lnet_iov_nob(conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
-		lnet_kiov_nob(conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
 	LASSERT(conn->ksnc_rx_scheduled);
 
 	spin_lock_bh(&sched->kss_lock);
@@ -2008,13 +2005,6 @@ ksocknal_connect(struct ksock_route *route)
 		list_splice_init(&peer->ksnp_tx_queue, &zombies);
 	}
 
-#if 0	   /* irrelevant with only eager routes */
-	if (!route->ksnr_deleted) {
-		/* make this route least-favourite for re-selection */
-		list_del(&route->ksnr_list);
-		list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-	}
-#endif
 	write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
 	ksocknal_peer_failed(peer);
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
index 6a17757fce1e..6c95e989ca12 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c
@@ -73,9 +73,9 @@ ksocknal_lib_zc_capable(struct ksock_conn *conn)
 int
 ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
 {
+	struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
 	struct socket *sock = conn->ksnc_sock;
-	int nob;
-	int rc;
+	int nob, i;
 
 	if (*ksocknal_tunables.ksnd_enable_csum	&& /* checksum enabled */
 	    conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection  */
@@ -83,34 +83,16 @@ ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx)
 	    !tx->tx_msg.ksm_csum)		     /* not checksummed  */
 		ksocknal_lib_csum_tx(tx);
 
-	/*
-	 * NB we can't trust socket ops to either consume our iovs
-	 * or leave them alone.
-	 */
-	{
-#if SOCKNAL_SINGLE_FRAG_TX
-		struct kvec scratch;
-		struct kvec *scratchiov = &scratch;
-		unsigned int niov = 1;
-#else
-		struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-		unsigned int niov = tx->tx_niov;
-#endif
-		struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
-		int i;
+	for (nob = i = 0; i < tx->tx_niov; i++)
+		nob += tx->tx_iov[i].iov_len;
 
-		for (nob = i = 0; i < niov; i++) {
-			scratchiov[i] = tx->tx_iov[i];
-			nob += scratchiov[i].iov_len;
-		}
+	if (!list_empty(&conn->ksnc_tx_queue) ||
+	    nob < tx->tx_resid)
+		msg.msg_flags |= MSG_MORE;
 
-		if (!list_empty(&conn->ksnc_tx_queue) ||
-		    nob < tx->tx_resid)
-			msg.msg_flags |= MSG_MORE;
-
-		rc = kernel_sendmsg(sock, &msg, scratchiov, niov, nob);
-	}
-	return rc;
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
+		      tx->tx_iov, tx->tx_niov, nob);
+	return sock_sendmsg(sock, &msg);
 }
 
 int
@@ -124,20 +106,16 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
 	/* Not NOOP message */
 	LASSERT(tx->tx_lnetmsg);
 
-	/*
-	 * NB we can't trust socket ops to either consume our iovs
-	 * or leave them alone.
-	 */
 	if (tx->tx_msg.ksm_zc_cookies[0]) {
 		/* Zero copy is enabled */
 		struct sock *sk = sock->sk;
-		struct page *page = kiov->kiov_page;
-		int offset = kiov->kiov_offset;
-		int fragsize = kiov->kiov_len;
+		struct page *page = kiov->bv_page;
+		int offset = kiov->bv_offset;
+		int fragsize = kiov->bv_len;
 		int msgflg = MSG_DONTWAIT;
 
 		CDEBUG(D_NET, "page %p + offset %x for %d\n",
-		       page, offset, kiov->kiov_len);
+		       page, offset, kiov->bv_len);
 
 		if (!list_empty(&conn->ksnc_tx_queue) ||
 		    fragsize < tx->tx_resid)
@@ -150,34 +128,19 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx)
 			rc = tcp_sendpage(sk, page, offset, fragsize, msgflg);
 		}
 	} else {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
-		struct kvec scratch;
-		struct kvec *scratchiov = &scratch;
-		unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
-		struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-		unsigned int niov = tx->tx_nkiov;
-#endif
 		struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
 		int i;
 
-		for (nob = i = 0; i < niov; i++) {
-			scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
-						 kiov[i].kiov_offset;
-			nob += scratchiov[i].iov_len = kiov[i].kiov_len;
-		}
+		for (nob = i = 0; i < tx->tx_nkiov; i++)
+			nob += kiov[i].bv_len;
 
 		if (!list_empty(&conn->ksnc_tx_queue) ||
 		    nob < tx->tx_resid)
 			msg.msg_flags |= MSG_MORE;
 
-		rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
-
-		for (i = 0; i < niov; i++)
-			kunmap(kiov[i].kiov_page);
+		iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC,
+			      kiov, tx->tx_nkiov, nob);
+		rc = sock_sendmsg(sock, &msg);
 	}
 	return rc;
 }
@@ -201,14 +164,7 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn)
 int
 ksocknal_lib_recv_iov(struct ksock_conn *conn)
 {
-#if SOCKNAL_SINGLE_FRAG_RX
-	struct kvec scratch;
-	struct kvec *scratchiov = &scratch;
-	unsigned int niov = 1;
-#else
-	struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
 	unsigned int niov = conn->ksnc_rx_niov;
-#endif
 	struct kvec *iov = conn->ksnc_rx_iov;
 	struct msghdr msg = {
 		.msg_flags = 0
@@ -220,20 +176,15 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
 	int sum;
 	__u32 saved_csum;
 
-	/*
-	 * NB we can't trust socket ops to either consume our iovs
-	 * or leave them alone.
-	 */
 	LASSERT(niov > 0);
 
-	for (nob = i = 0; i < niov; i++) {
-		scratchiov[i] = iov[i];
-		nob += scratchiov[i].iov_len;
-	}
+	for (nob = i = 0; i < niov; i++)
+		nob += iov[i].iov_len;
+
 	LASSERT(nob <= conn->ksnc_rx_nob_wanted);
 
-	rc = kernel_recvmsg(conn->ksnc_sock, &msg, scratchiov, niov, nob,
-			    MSG_DONTWAIT);
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob);
+	rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
 
 	saved_csum = 0;
 	if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -259,67 +210,10 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn)
 	return rc;
 }
 
-static void
-ksocknal_lib_kiov_vunmap(void *addr)
-{
-	if (!addr)
-		return;
-
-	vunmap(addr);
-}
-
-static void *
-ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
-		       struct kvec *iov, struct page **pages)
-{
-	void *addr;
-	int nob;
-	int i;
-
-	if (!*ksocknal_tunables.ksnd_zc_recv || !pages)
-		return NULL;
-
-	LASSERT(niov <= LNET_MAX_IOV);
-
-	if (niov < 2 ||
-	    niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
-		return NULL;
-
-	for (nob = i = 0; i < niov; i++) {
-		if ((kiov[i].kiov_offset && i > 0) ||
-		    (kiov[i].kiov_offset + kiov[i].kiov_len != PAGE_SIZE && i < niov - 1))
-			return NULL;
-
-		pages[i] = kiov[i].kiov_page;
-		nob += kiov[i].kiov_len;
-	}
-
-	addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
-	if (!addr)
-		return NULL;
-
-	iov->iov_base = addr + kiov[0].kiov_offset;
-	iov->iov_len = nob;
-
-	return addr;
-}
-
 int
 ksocknal_lib_recv_kiov(struct ksock_conn *conn)
 {
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
-	struct kvec scratch;
-	struct kvec *scratchiov = &scratch;
-	struct page **pages = NULL;
-	unsigned int niov = 1;
-#else
-#ifdef CONFIG_HIGHMEM
-#warning "XXX risk of kmap deadlock on multiple frags..."
-#endif
-	struct kvec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-	struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
 	unsigned int niov = conn->ksnc_rx_nkiov;
-#endif
 	lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
 	struct msghdr msg = {
 		.msg_flags = 0
@@ -328,63 +222,32 @@ ksocknal_lib_recv_kiov(struct ksock_conn *conn)
 	int i;
 	int rc;
 	void *base;
-	void *addr;
 	int sum;
 	int fragnob;
-	int n;
-
-	/*
-	 * NB we can't trust socket ops to either consume our iovs
-	 * or leave them alone.
-	 */
-	addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
-	if (addr) {
-		nob = scratchiov[0].iov_len;
-		n = 1;
 
-	} else {
-		for (nob = i = 0; i < niov; i++) {
-			nob += scratchiov[i].iov_len = kiov[i].kiov_len;
-			scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
-						 kiov[i].kiov_offset;
-		}
-		n = niov;
-	}
+	for (nob = i = 0; i < niov; i++)
+		nob += kiov[i].bv_len;
 
 	LASSERT(nob <= conn->ksnc_rx_nob_wanted);
 
-	rc = kernel_recvmsg(conn->ksnc_sock, &msg, (struct kvec *)scratchiov,
-			    n, nob, MSG_DONTWAIT);
+	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob);
+	rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT);
 
 	if (conn->ksnc_msg.ksm_csum) {
 		for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
 			LASSERT(i < niov);
 
-			/*
-			 * Dang! have to kmap again because I have nowhere to
-			 * stash the mapped address.  But by doing it while the
-			 * page is still mapped, the kernel just bumps the map
-			 * count and returns me the address it stashed.
-			 */
-			base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
-			fragnob = kiov[i].kiov_len;
+			base = kmap(kiov[i].bv_page) + kiov[i].bv_offset;
+			fragnob = kiov[i].bv_len;
 			if (fragnob > sum)
 				fragnob = sum;
 
 			conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum,
 							   base, fragnob);
 
-			kunmap(kiov[i].kiov_page);
+			kunmap(kiov[i].bv_page);
 		}
 	}
-
-	if (addr) {
-		ksocknal_lib_kiov_vunmap(addr);
-	} else {
-		for (i = 0; i < niov; i++)
-			kunmap(kiov[i].kiov_page);
-	}
-
 	return rc;
 }
 
@@ -406,12 +269,12 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx)
 
 	if (tx->tx_kiov) {
 		for (i = 0; i < tx->tx_nkiov; i++) {
-			base = kmap(tx->tx_kiov[i].kiov_page) +
-			       tx->tx_kiov[i].kiov_offset;
+			base = kmap(tx->tx_kiov[i].bv_page) +
+			       tx->tx_kiov[i].bv_offset;
 
-			csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len);
+			csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len);
 
-			kunmap(tx->tx_kiov[i].kiov_page);
+			kunmap(tx->tx_kiov[i].bv_page);
 		}
 	} else {
 		for (i = 1; i < tx->tx_niov; i++)
diff --git a/drivers/staging/lustre/lnet/libcfs/debug.c b/drivers/staging/lustre/lnet/libcfs/debug.c
index 42b15a769183..23b36b890964 100644
--- a/drivers/staging/lustre/lnet/libcfs/debug.c
+++ b/drivers/staging/lustre/lnet/libcfs/debug.c
@@ -328,15 +328,20 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
  */
 void libcfs_debug_dumplog_internal(void *arg)
 {
+	static time64_t last_dump_time;
+	time64_t current_time;
 	void *journal_info;
 
 	journal_info = current->journal_info;
 	current->journal_info = NULL;
+	current_time = ktime_get_real_seconds();
 
-	if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0) {
+	if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) &&
+	    current_time > last_dump_time) {
+		last_dump_time = current_time;
 		snprintf(debug_file_name, sizeof(debug_file_name) - 1,
 			 "%s.%lld.%ld", libcfs_debug_file_path_arr,
-			 (s64)ktime_get_real_seconds(), (long_ptr_t)arg);
+			 (s64)current_time, (long_ptr_t)arg);
 		pr_alert("LustreError: dumping log to %s\n", debug_file_name);
 		cfs_tracefile_dump_all_pages(debug_file_name);
 		libcfs_run_debug_log_upcall(debug_file_name);
diff --git a/drivers/staging/lustre/lnet/libcfs/fail.c b/drivers/staging/lustre/lnet/libcfs/fail.c
index 9288ee08d1f7..e4b1a0a86eae 100644
--- a/drivers/staging/lustre/lnet/libcfs/fail.c
+++ b/drivers/staging/lustre/lnet/libcfs/fail.c
@@ -90,8 +90,10 @@ int __cfs_fail_check_set(__u32 id, __u32 value, int set)
 		}
 	}
 
-	if ((set == CFS_FAIL_LOC_ORSET || set == CFS_FAIL_LOC_RESET) &&
-	    (value & CFS_FAIL_ONCE))
+	/* Take into account the current call for FAIL_ONCE for ORSET only,
+	 * as RESET is a new fail_loc, it does not change the current call
+	 */
+	if ((set == CFS_FAIL_LOC_ORSET) && (value & CFS_FAIL_ONCE))
 		set_bit(CFS_FAIL_ONCE_BIT, &cfs_fail_loc);
 	/* Lost race to set CFS_FAILED_BIT. */
 	if (test_and_set_bit(CFS_FAILED_BIT, &cfs_fail_loc)) {
diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
index fc697cdfcdaf..56a614d7713b 100644
--- a/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
+++ b/drivers/staging/lustre/lnet/libcfs/libcfs_string.c
@@ -229,8 +229,6 @@ cfs_str2num_check(char *str, int nob, unsigned *num,
 	char *endp, cache;
 	int rc;
 
-	str = cfs_trimwhite(str);
-
 	/**
 	 * kstrouint can only handle strings composed
 	 * of only numbers. We need to scan the string
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
index b52518c54efe..e8b1a61420de 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-cpu.c
@@ -74,6 +74,17 @@ struct cfs_cpt_data {
 
 static struct cfs_cpt_data	cpt_data;
 
+static void
+cfs_node_to_cpumask(int node, cpumask_t *mask)
+{
+	const cpumask_t *tmp = cpumask_of_node(node);
+
+	if (tmp)
+		cpumask_copy(mask, tmp);
+	else
+		cpumask_clear(mask);
+}
+
 void
 cfs_cpt_table_free(struct cfs_cpt_table *cptab)
 {
@@ -403,7 +414,7 @@ cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
 	mutex_lock(&cpt_data.cpt_mutex);
 
 	mask = cpt_data.cpt_cpumask;
-	cpumask_copy(mask, cpumask_of_node(node));
+	cfs_node_to_cpumask(node, mask);
 
 	rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
 
@@ -427,7 +438,7 @@ cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
 	mutex_lock(&cpt_data.cpt_mutex);
 
 	mask = cpt_data.cpt_cpumask;
-	cpumask_copy(mask, cpumask_of_node(node));
+	cfs_node_to_cpumask(node, mask);
 
 	cfs_cpt_unset_cpumask(cptab, cpt, mask);
 
@@ -749,7 +760,7 @@ cfs_cpt_table_create(int ncpt)
 	}
 
 	for_each_online_node(i) {
-		cpumask_copy(mask, cpumask_of_node(i));
+		cfs_node_to_cpumask(i, mask);
 
 		while (!cpumask_empty(mask)) {
 			struct cfs_cpu_partition *part;
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
index 5c0116ade909..7f56d2c9dd00 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-crypto.c
@@ -95,8 +95,8 @@ static int cfs_crypto_hash_alloc(enum cfs_crypto_hash_alg hash_alg,
 		err = crypto_ahash_setkey(tfm, key, key_len);
 	else if ((*type)->cht_key != 0)
 		err = crypto_ahash_setkey(tfm,
-					 (unsigned char *)&((*type)->cht_key),
-					 (*type)->cht_size);
+					  (unsigned char *)&((*type)->cht_key),
+					  (*type)->cht_size);
 
 	if (err != 0) {
 		ahash_request_free(*req);
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 346db892f275..4daf828198c3 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1286,6 +1286,25 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
 		       sizeof(*ni->ni_lnd_tunables));
 	}
 
+	/*
+	 * If given some LND tunable parameters, parse those now to
+	 * override the values in the NI structure.
+	 */
+	if (conf) {
+		if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
+			ni->ni_peerrtrcredits =
+				conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
+		if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
+			ni->ni_peertimeout =
+				conf->cfg_config_u.cfg_net.net_peer_timeout;
+		if (conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
+			ni->ni_peertxcredits =
+				conf->cfg_config_u.cfg_net.net_peer_tx_credits;
+		if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
+			ni->ni_maxtxcredits =
+				conf->cfg_config_u.cfg_net.net_max_tx_credits;
+	}
+
 	rc = lnd->lnd_startup(ni);
 
 	mutex_unlock(&the_lnet.ln_lnd_mutex);
@@ -1299,33 +1318,6 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
 		goto failed0;
 	}
 
-	/*
-	 * If given some LND tunable parameters, parse those now to
-	 * override the values in the NI structure.
-	 */
-	if (conf && conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) {
-		ni->ni_peerrtrcredits =
-			conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
-	}
-	if (conf && conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) {
-		ni->ni_peertimeout =
-			conf->cfg_config_u.cfg_net.net_peer_timeout;
-	}
-	/*
-	 * TODO
-	 * Note: For now, don't allow the user to change
-	 * peertxcredits as this number is used in the
-	 * IB LND to control queue depth.
-	 *
-	 * if (conf && conf->cfg_config_u.cfg_net.net_peer_tx_credits != -1)
-	 *	ni->ni_peertxcredits =
-	 *		conf->cfg_config_u.cfg_net.net_peer_tx_credits;
-	 */
-	if (conf && conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) {
-		ni->ni_maxtxcredits =
-			conf->cfg_config_u.cfg_net.net_max_tx_credits;
-	}
-
 	LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query);
 
 	lnet_net_lock(LNET_LOCK_EX);
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index a72afdf68bb2..9e2183ff847e 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -31,6 +31,8 @@
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
 #include "../../include/linux/lnet/lib-lnet.h"
 
 struct lnet_text_buf {	    /* tmp struct for parsing routes */
@@ -110,6 +112,11 @@ lnet_ni_free(struct lnet_ni *ni)
 		LIBCFS_FREE(ni->ni_interfaces[i],
 			    strlen(ni->ni_interfaces[i]) + 1);
 	}
+
+	/* release reference to net namespace */
+	if (ni->ni_net_ns)
+		put_net(ni->ni_net_ns);
+
 	LIBCFS_FREE(ni, sizeof(*ni));
 }
 
@@ -171,6 +178,13 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
 
 	/* LND will fill in the address part of the NID */
 	ni->ni_nid = LNET_MKNID(net, 0);
+
+	/* Store net namespace in which current ni is being created */
+	if (current->nsproxy->net_ns)
+		ni->ni_net_ns = get_net(current->nsproxy->net_ns);
+	else
+		ni->ni_net_ns = NULL;
+
 	ni->ni_last_alive = ktime_get_real_seconds();
 	list_add_tail(&ni->ni_list, nilist);
 	return ni;
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index 1834bf7a27ef..eab53cd57296 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -134,11 +134,11 @@ lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink)
 
 		for (i = 0; i < (int)niov; i++) {
 			/* We take the page pointer on trust */
-			if (lmd->md_iov.kiov[i].kiov_offset +
-			    lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE)
+			if (lmd->md_iov.kiov[i].bv_offset +
+			    lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
 				return -EINVAL; /* invalid length */
 
-			total_length += lmd->md_iov.kiov[i].kiov_len;
+			total_length += lmd->md_iov.kiov[i].bv_len;
 		}
 
 		lmd->md_length = total_length;
@@ -292,11 +292,12 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
 		return -ENOMEM;
 
 	rc = lnet_md_build(md, &umd, unlink);
+	if (rc)
+		goto out_free;
+
 	cpt = lnet_cpt_of_cookie(meh.cookie);
 
 	lnet_res_lock(cpt);
-	if (rc)
-		goto failed;
 
 	me = lnet_handle2me(&meh);
 	if (!me)
@@ -307,7 +308,7 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
 		rc = lnet_md_link(md, umd.eq_handle, cpt);
 
 	if (rc)
-		goto failed;
+		goto out_unlock;
 
 	/*
 	 * attach this MD to portal of ME and check if it matches any
@@ -324,10 +325,10 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd,
 
 	return 0;
 
- failed:
-	lnet_md_free(md);
-
+out_unlock:
 	lnet_res_unlock(cpt);
+out_free:
+	lnet_md_free(md);
 	return rc;
 }
 EXPORT_SYMBOL(LNetMDAttach);
@@ -370,24 +371,25 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle)
 		return -ENOMEM;
 
 	rc = lnet_md_build(md, &umd, unlink);
+	if (rc)
+		goto out_free;
 
 	cpt = lnet_res_lock_current();
-	if (rc)
-		goto failed;
 
 	rc = lnet_md_link(md, umd.eq_handle, cpt);
 	if (rc)
-		goto failed;
+		goto out_unlock;
 
 	lnet_md2handle(handle, md);
 
 	lnet_res_unlock(cpt);
 	return 0;
 
- failed:
+out_unlock:
+	lnet_res_unlock(cpt);
+out_free:
 	lnet_md_free(md);
 
-	lnet_res_unlock(cpt);
 	return rc;
 }
 EXPORT_SYMBOL(LNetMDBind);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index e6d3b801d87d..48e6f8f2392f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -37,6 +37,8 @@
 #define DEBUG_SUBSYSTEM S_LNET
 
 #include "../../include/linux/lnet/lib-lnet.h"
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
 
 static int local_nid_dist_zero = 1;
 module_param(local_nid_dist_zero, int, 0444);
@@ -166,25 +168,17 @@ lnet_iov_nob(unsigned int niov, struct kvec *iov)
 EXPORT_SYMBOL(lnet_iov_nob);
 
 void
-lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
-		  unsigned int nsiov, struct kvec *siov, unsigned int soffset,
-		  unsigned int nob)
+lnet_copy_iov2iter(struct iov_iter *to,
+		   unsigned int nsiov, const struct kvec *siov,
+		   unsigned int soffset, unsigned int nob)
 {
 	/* NB diov, siov are READ-ONLY */
-	unsigned int this_nob;
+	const char *s;
+	size_t left;
 
 	if (!nob)
 		return;
 
-	/* skip complete frags before 'doffset' */
-	LASSERT(ndiov > 0);
-	while (doffset >= diov->iov_len) {
-		doffset -= diov->iov_len;
-		diov++;
-		ndiov--;
-		LASSERT(ndiov > 0);
-	}
-
 	/* skip complete frags before 'soffset' */
 	LASSERT(nsiov > 0);
 	while (soffset >= siov->iov_len) {
@@ -194,39 +188,68 @@ lnet_copy_iov2iov(unsigned int ndiov, struct kvec *diov, unsigned int doffset,
 		LASSERT(nsiov > 0);
 	}
 
+	s = (char *)siov->iov_base + soffset;
+	left = siov->iov_len - soffset;
 	do {
-		LASSERT(ndiov > 0);
+		size_t n, copy = left;
 		LASSERT(nsiov > 0);
-		this_nob = min(diov->iov_len - doffset,
-			       siov->iov_len - soffset);
-		this_nob = min(this_nob, nob);
 
-		memcpy((char *)diov->iov_base + doffset,
-		       (char *)siov->iov_base + soffset, this_nob);
-		nob -= this_nob;
+		if (copy > nob)
+			copy = nob;
+		n = copy_to_iter(s, copy, to);
+		if (n != copy)
+			return;
+		nob -= n;
 
-		if (diov->iov_len > doffset + this_nob) {
-			doffset += this_nob;
-		} else {
-			diov++;
-			ndiov--;
-			doffset = 0;
-		}
+		siov++;
+		s = (char *)siov->iov_base;
+		left = siov->iov_len;
+		nsiov--;
+	} while (nob > 0);
+}
+EXPORT_SYMBOL(lnet_copy_iov2iter);
 
-		if (siov->iov_len > soffset + this_nob) {
-			soffset += this_nob;
-		} else {
-			siov++;
-			nsiov--;
-			soffset = 0;
-		}
+void
+lnet_copy_kiov2iter(struct iov_iter *to,
+		    unsigned int nsiov, const lnet_kiov_t *siov,
+		    unsigned int soffset, unsigned int nob)
+{
+	if (!nob)
+		return;
+
+	LASSERT(!in_interrupt());
+
+	LASSERT(nsiov > 0);
+	while (soffset >= siov->bv_len) {
+		soffset -= siov->bv_len;
+		siov++;
+		nsiov--;
+		LASSERT(nsiov > 0);
+	}
+
+	do {
+		size_t copy = siov->bv_len - soffset, n;
+
+		LASSERT(nsiov > 0);
+
+		if (copy > nob)
+			copy = nob;
+		n = copy_page_to_iter(siov->bv_page,
+				      siov->bv_offset + soffset,
+				      copy, to);
+		if (n != copy)
+			return;
+		nob -= n;
+		siov++;
+		nsiov--;
+		soffset = 0;
 	} while (nob > 0);
 }
-EXPORT_SYMBOL(lnet_copy_iov2iov);
+EXPORT_SYMBOL(lnet_copy_kiov2iter);
 
 int
 lnet_extract_iov(int dst_niov, struct kvec *dst,
-		 int src_niov, struct kvec *src,
+		 int src_niov, const struct kvec *src,
 		 unsigned int offset, unsigned int len)
 {
 	/*
@@ -280,238 +303,15 @@ lnet_kiov_nob(unsigned int niov, lnet_kiov_t *kiov)
 
 	LASSERT(!niov || kiov);
 	while (niov-- > 0)
-		nob += (kiov++)->kiov_len;
+		nob += (kiov++)->bv_len;
 
 	return nob;
 }
 EXPORT_SYMBOL(lnet_kiov_nob);
 
-void
-lnet_copy_kiov2kiov(unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset,
-		    unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset,
-		    unsigned int nob)
-{
-	/* NB diov, siov are READ-ONLY */
-	unsigned int this_nob;
-	char *daddr = NULL;
-	char *saddr = NULL;
-
-	if (!nob)
-		return;
-
-	LASSERT(!in_interrupt());
-
-	LASSERT(ndiov > 0);
-	while (doffset >= diov->kiov_len) {
-		doffset -= diov->kiov_len;
-		diov++;
-		ndiov--;
-		LASSERT(ndiov > 0);
-	}
-
-	LASSERT(nsiov > 0);
-	while (soffset >= siov->kiov_len) {
-		soffset -= siov->kiov_len;
-		siov++;
-		nsiov--;
-		LASSERT(nsiov > 0);
-	}
-
-	do {
-		LASSERT(ndiov > 0);
-		LASSERT(nsiov > 0);
-		this_nob = min(diov->kiov_len - doffset,
-			       siov->kiov_len - soffset);
-		this_nob = min(this_nob, nob);
-
-		if (!daddr)
-			daddr = ((char *)kmap(diov->kiov_page)) +
-				diov->kiov_offset + doffset;
-		if (!saddr)
-			saddr = ((char *)kmap(siov->kiov_page)) +
-				siov->kiov_offset + soffset;
-
-		/*
-		 * Vanishing risk of kmap deadlock when mapping 2 pages.
-		 * However in practice at least one of the kiovs will be mapped
-		 * kernel pages and the map/unmap will be NOOPs
-		 */
-		memcpy(daddr, saddr, this_nob);
-		nob -= this_nob;
-
-		if (diov->kiov_len > doffset + this_nob) {
-			daddr += this_nob;
-			doffset += this_nob;
-		} else {
-			kunmap(diov->kiov_page);
-			daddr = NULL;
-			diov++;
-			ndiov--;
-			doffset = 0;
-		}
-
-		if (siov->kiov_len > soffset + this_nob) {
-			saddr += this_nob;
-			soffset += this_nob;
-		} else {
-			kunmap(siov->kiov_page);
-			saddr = NULL;
-			siov++;
-			nsiov--;
-			soffset = 0;
-		}
-	} while (nob > 0);
-
-	if (daddr)
-		kunmap(diov->kiov_page);
-	if (saddr)
-		kunmap(siov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2kiov);
-
-void
-lnet_copy_kiov2iov(unsigned int niov, struct kvec *iov, unsigned int iovoffset,
-		   unsigned int nkiov, lnet_kiov_t *kiov,
-		   unsigned int kiovoffset, unsigned int nob)
-{
-	/* NB iov, kiov are READ-ONLY */
-	unsigned int this_nob;
-	char *addr = NULL;
-
-	if (!nob)
-		return;
-
-	LASSERT(!in_interrupt());
-
-	LASSERT(niov > 0);
-	while (iovoffset >= iov->iov_len) {
-		iovoffset -= iov->iov_len;
-		iov++;
-		niov--;
-		LASSERT(niov > 0);
-	}
-
-	LASSERT(nkiov > 0);
-	while (kiovoffset >= kiov->kiov_len) {
-		kiovoffset -= kiov->kiov_len;
-		kiov++;
-		nkiov--;
-		LASSERT(nkiov > 0);
-	}
-
-	do {
-		LASSERT(niov > 0);
-		LASSERT(nkiov > 0);
-		this_nob = min(iov->iov_len - iovoffset,
-			       (__kernel_size_t)kiov->kiov_len - kiovoffset);
-		this_nob = min(this_nob, nob);
-
-		if (!addr)
-			addr = ((char *)kmap(kiov->kiov_page)) +
-				kiov->kiov_offset + kiovoffset;
-
-		memcpy((char *)iov->iov_base + iovoffset, addr, this_nob);
-		nob -= this_nob;
-
-		if (iov->iov_len > iovoffset + this_nob) {
-			iovoffset += this_nob;
-		} else {
-			iov++;
-			niov--;
-			iovoffset = 0;
-		}
-
-		if (kiov->kiov_len > kiovoffset + this_nob) {
-			addr += this_nob;
-			kiovoffset += this_nob;
-		} else {
-			kunmap(kiov->kiov_page);
-			addr = NULL;
-			kiov++;
-			nkiov--;
-			kiovoffset = 0;
-		}
-
-	} while (nob > 0);
-
-	if (addr)
-		kunmap(kiov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_kiov2iov);
-
-void
-lnet_copy_iov2kiov(unsigned int nkiov, lnet_kiov_t *kiov,
-		   unsigned int kiovoffset, unsigned int niov,
-		   struct kvec *iov, unsigned int iovoffset,
-		   unsigned int nob)
-{
-	/* NB kiov, iov are READ-ONLY */
-	unsigned int this_nob;
-	char *addr = NULL;
-
-	if (!nob)
-		return;
-
-	LASSERT(!in_interrupt());
-
-	LASSERT(nkiov > 0);
-	while (kiovoffset >= kiov->kiov_len) {
-		kiovoffset -= kiov->kiov_len;
-		kiov++;
-		nkiov--;
-		LASSERT(nkiov > 0);
-	}
-
-	LASSERT(niov > 0);
-	while (iovoffset >= iov->iov_len) {
-		iovoffset -= iov->iov_len;
-		iov++;
-		niov--;
-		LASSERT(niov > 0);
-	}
-
-	do {
-		LASSERT(nkiov > 0);
-		LASSERT(niov > 0);
-		this_nob = min((__kernel_size_t)kiov->kiov_len - kiovoffset,
-			       iov->iov_len - iovoffset);
-		this_nob = min(this_nob, nob);
-
-		if (!addr)
-			addr = ((char *)kmap(kiov->kiov_page)) +
-				kiov->kiov_offset + kiovoffset;
-
-		memcpy(addr, (char *)iov->iov_base + iovoffset, this_nob);
-		nob -= this_nob;
-
-		if (kiov->kiov_len > kiovoffset + this_nob) {
-			addr += this_nob;
-			kiovoffset += this_nob;
-		} else {
-			kunmap(kiov->kiov_page);
-			addr = NULL;
-			kiov++;
-			nkiov--;
-			kiovoffset = 0;
-		}
-
-		if (iov->iov_len > iovoffset + this_nob) {
-			iovoffset += this_nob;
-		} else {
-			iov++;
-			niov--;
-			iovoffset = 0;
-		}
-	} while (nob > 0);
-
-	if (addr)
-		kunmap(kiov->kiov_page);
-}
-EXPORT_SYMBOL(lnet_copy_iov2kiov);
-
 int
 lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
-		  int src_niov, lnet_kiov_t *src,
+		  int src_niov, const lnet_kiov_t *src,
 		  unsigned int offset, unsigned int len)
 {
 	/*
@@ -526,8 +326,8 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
 		return 0;		     /* no frags */
 
 	LASSERT(src_niov > 0);
-	while (offset >= src->kiov_len) {      /* skip initial frags */
-		offset -= src->kiov_len;
+	while (offset >= src->bv_len) {      /* skip initial frags */
+		offset -= src->bv_len;
 		src_niov--;
 		src++;
 		LASSERT(src_niov > 0);
@@ -538,19 +338,19 @@ lnet_extract_kiov(int dst_niov, lnet_kiov_t *dst,
 		LASSERT(src_niov > 0);
 		LASSERT((int)niov <= dst_niov);
 
-		frag_len = src->kiov_len - offset;
-		dst->kiov_page = src->kiov_page;
-		dst->kiov_offset = src->kiov_offset + offset;
+		frag_len = src->bv_len - offset;
+		dst->bv_page = src->bv_page;
+		dst->bv_offset = src->bv_offset + offset;
 
 		if (len <= frag_len) {
-			dst->kiov_len = len;
-			LASSERT(dst->kiov_offset + dst->kiov_len
+			dst->bv_len = len;
+			LASSERT(dst->bv_offset + dst->bv_len
 					<= PAGE_SIZE);
 			return niov;
 		}
 
-		dst->kiov_len = frag_len;
-		LASSERT(dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
+		dst->bv_len = frag_len;
+		LASSERT(dst->bv_offset + dst->bv_len <= PAGE_SIZE);
 
 		len -= frag_len;
 		dst++;
@@ -569,6 +369,7 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
 	unsigned int niov = 0;
 	struct kvec *iov = NULL;
 	lnet_kiov_t *kiov = NULL;
+	struct iov_iter to;
 	int rc;
 
 	LASSERT(!in_interrupt());
@@ -594,8 +395,14 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
 		}
 	}
 
-	rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed,
-				  niov, iov, kiov, offset, mlen, rlen);
+	if (iov) {
+		iov_iter_kvec(&to, ITER_KVEC | READ, iov, niov, mlen + offset);
+		iov_iter_advance(&to, offset);
+	} else {
+		iov_iter_bvec(&to, ITER_BVEC | READ, kiov, niov, mlen + offset);
+		iov_iter_advance(&to, offset);
+	}
+	rc = ni->ni_lnd->lnd_recv(ni, private, msg, delayed, &to, rlen);
 	if (rc < 0)
 		lnet_finalize(ni, msg, rc);
 }
@@ -2002,6 +1809,9 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
 		       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
 		       lnet_msgtyp2str(type), rc);
 		lnet_msg_free(msg);
+		if (rc == -ESHUTDOWN)
+			/* We are shutting down. Don't do anything more */
+			return 0;
 		goto drop;
 	}
 
@@ -2512,6 +2322,15 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
 		}
 
 		if (LNET_NIDNET(ni->ni_nid) == dstnet) {
+			/*
+			 * Check if ni was originally created in
+			 * current net namespace.
+			 * If not, assign order above 0xffff0000,
+			 * to make this ni not a priority.
+			 */
+			if (!net_eq(ni->ni_net_ns, current->nsproxy->net_ns))
+				order += 0xffff0000;
+
 			if (srcnidp)
 				*srcnidp = ni->ni_nid;
 			if (orderp)
diff --git a/drivers/staging/lustre/lnet/lnet/lib-msg.c b/drivers/staging/lustre/lnet/lnet/lib-msg.c
index 910e106e221d..0897e588bd54 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-msg.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-msg.c
@@ -449,23 +449,7 @@ lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int status)
 
 	if (!msg)
 		return;
-#if 0
-	CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n",
-	       lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target),
-	       msg->msg_target_is_router ? "t" : "",
-	       msg->msg_routing ? "X" : "",
-	       msg->msg_ack ? "A" : "",
-	       msg->msg_sending ? "S" : "",
-	       msg->msg_receiving ? "R" : "",
-	       msg->msg_delayed ? "d" : "",
-	       msg->msg_txcredit ? "C" : "",
-	       msg->msg_peertxcredit ? "c" : "",
-	       msg->msg_rtrcredit ? "F" : "",
-	       msg->msg_peerrtrcredit ? "f" : "",
-	       msg->msg_onactivelist ? "!" : "",
-	       !msg->msg_txpeer ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid),
-	       !msg->msg_rxpeer ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid));
-#endif
+
 	msg->msg_ev.status = status;
 
 	if (msg->msg_md) {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c
index 891fd59401d7..4e6dd5149b4f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-socket.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c
@@ -265,21 +265,17 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
 	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
 	unsigned long then;
 	struct timeval tv;
+	struct kvec  iov = { .iov_base = buffer, .iov_len  = nob };
+	struct msghdr msg = {NULL,};
 
 	LASSERT(nob > 0);
 	/*
 	 * Caller may pass a zero timeout if she thinks the socket buffer is
 	 * empty enough to take the whole message immediately
 	 */
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
 	for (;;) {
-		struct kvec  iov = {
-			.iov_base = buffer,
-			.iov_len  = nob
-		};
-		struct msghdr msg = {
-			.msg_flags      = !timeout ? MSG_DONTWAIT : 0
-		};
-
+		msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
 		if (timeout) {
 			/* Set send timeout to remaining time */
 			jiffies_to_timeval(jiffies_left, &tv);
@@ -296,9 +292,6 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
 		rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
 		jiffies_left -= jiffies - then;
 
-		if (rc == nob)
-			return 0;
-
 		if (rc < 0)
 			return rc;
 
@@ -307,11 +300,11 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
 			return -ECONNABORTED;
 		}
 
+		if (!msg_data_left(&msg))
+			break;
+
 		if (jiffies_left <= 0)
 			return -EAGAIN;
-
-		buffer = ((char *)buffer) + rc;
-		nob -= rc;
 	}
 	return 0;
 }
diff --git a/drivers/staging/lustre/lnet/lnet/lo.c b/drivers/staging/lustre/lnet/lnet/lo.c
index 08402712a452..cb213b8f51cf 100644
--- a/drivers/staging/lustre/lnet/lnet/lo.c
+++ b/drivers/staging/lustre/lnet/lnet/lo.c
@@ -42,36 +42,23 @@ lolnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
 
 static int
 lolnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
-	   int delayed, unsigned int niov,
-	   struct kvec *iov, lnet_kiov_t *kiov,
-	   unsigned int offset, unsigned int mlen, unsigned int rlen)
+	   int delayed, struct iov_iter *to, unsigned int rlen)
 {
 	lnet_msg_t *sendmsg = private;
 
 	if (lntmsg) {		   /* not discarding */
-		if (sendmsg->msg_iov) {
-			if (iov)
-				lnet_copy_iov2iov(niov, iov, offset,
-						  sendmsg->msg_niov,
-						  sendmsg->msg_iov,
-						  sendmsg->msg_offset, mlen);
-			else
-				lnet_copy_iov2kiov(niov, kiov, offset,
-						   sendmsg->msg_niov,
-						   sendmsg->msg_iov,
-						   sendmsg->msg_offset, mlen);
-		} else {
-			if (iov)
-				lnet_copy_kiov2iov(niov, iov, offset,
-						   sendmsg->msg_niov,
-						   sendmsg->msg_kiov,
-						   sendmsg->msg_offset, mlen);
-			else
-				lnet_copy_kiov2kiov(niov, kiov, offset,
-						    sendmsg->msg_niov,
-						    sendmsg->msg_kiov,
-						    sendmsg->msg_offset, mlen);
-		}
+		if (sendmsg->msg_iov)
+			lnet_copy_iov2iter(to,
+					   sendmsg->msg_niov,
+					   sendmsg->msg_iov,
+					   sendmsg->msg_offset,
+					   iov_iter_count(to));
+		else
+			lnet_copy_kiov2iter(to,
+					    sendmsg->msg_niov,
+					    sendmsg->msg_kiov,
+					    sendmsg->msg_offset,
+					    iov_iter_count(to));
 
 		lnet_finalize(ni, lntmsg, 0);
 	}
diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c
index 063543233035..063ad55ec950 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -1307,7 +1307,7 @@ lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
 	int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
 
 	while (--npages >= 0)
-		__free_page(rb->rb_kiov[npages].kiov_page);
+		__free_page(rb->rb_kiov[npages].bv_page);
 
 	LIBCFS_FREE(rb, sz);
 }
@@ -1333,15 +1333,15 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
 				GFP_KERNEL | __GFP_ZERO, 0);
 		if (!page) {
 			while (--i >= 0)
-				__free_page(rb->rb_kiov[i].kiov_page);
+				__free_page(rb->rb_kiov[i].bv_page);
 
 			LIBCFS_FREE(rb, sz);
 			return NULL;
 		}
 
-		rb->rb_kiov[i].kiov_len = PAGE_SIZE;
-		rb->rb_kiov[i].kiov_offset = 0;
-		rb->rb_kiov[i].kiov_page = page;
+		rb->rb_kiov[i].bv_len = PAGE_SIZE;
+		rb->rb_kiov[i].bv_offset = 0;
+		rb->rb_kiov[i].bv_page = page;
 	}
 
 	return rb;
@@ -1693,7 +1693,7 @@ lnet_rtrpools_adjust(int tiny, int small, int large)
 int
 lnet_rtrpools_enable(void)
 {
-	int rc;
+	int rc = 0;
 
 	if (the_lnet.ln_routing)
 		return 0;
@@ -1706,9 +1706,9 @@ lnet_rtrpools_enable(void)
 		 * if we are just configuring this for the first
 		 * time.
 		 */
-		return lnet_rtrpools_alloc(1);
-
-	rc = lnet_rtrpools_adjust_helper(0, 0, 0);
+		rc = lnet_rtrpools_alloc(1);
+	else
+		rc = lnet_rtrpools_adjust_helper(0, 0, 0);
 	if (rc)
 		return rc;
 
@@ -1718,7 +1718,7 @@ lnet_rtrpools_enable(void)
 	the_lnet.ln_ping_info->pi_features &= ~LNET_PING_FEAT_RTE_DISABLED;
 	lnet_net_unlock(LNET_LOCK_EX);
 
-	return 0;
+	return rc;
 }
 
 void
diff --git a/drivers/staging/lustre/lnet/selftest/brw_test.c b/drivers/staging/lustre/lnet/selftest/brw_test.c
index 13d0454e7fcb..b20c5d394e3b 100644
--- a/drivers/staging/lustre/lnet/selftest/brw_test.c
+++ b/drivers/staging/lustre/lnet/selftest/brw_test.c
@@ -226,7 +226,7 @@ brw_fill_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
 	struct page *pg;
 
 	for (i = 0; i < bk->bk_niov; i++) {
-		pg = bk->bk_iovs[i].kiov_page;
+		pg = bk->bk_iovs[i].bv_page;
 		brw_fill_page(pg, pattern, magic);
 	}
 }
@@ -238,7 +238,7 @@ brw_check_bulk(struct srpc_bulk *bk, int pattern, __u64 magic)
 	struct page *pg;
 
 	for (i = 0; i < bk->bk_niov; i++) {
-		pg = bk->bk_iovs[i].kiov_page;
+		pg = bk->bk_iovs[i].bv_page;
 		if (brw_check_page(pg, pattern, magic)) {
 			CERROR("Bulk page %p (%d/%d) is corrupted!\n",
 			       pg, i, bk->bk_niov);
diff --git a/drivers/staging/lustre/lnet/selftest/conrpc.c b/drivers/staging/lustre/lnet/selftest/conrpc.c
index 1be3cad727ae..55afb53b0743 100644
--- a/drivers/staging/lustre/lnet/selftest/conrpc.c
+++ b/drivers/staging/lustre/lnet/selftest/conrpc.c
@@ -152,10 +152,10 @@ lstcon_rpc_put(struct lstcon_rpc *crpc)
 	LASSERT(list_empty(&crpc->crp_link));
 
 	for (i = 0; i < bulk->bk_niov; i++) {
-		if (!bulk->bk_iovs[i].kiov_page)
+		if (!bulk->bk_iovs[i].bv_page)
 			continue;
 
-		__free_page(bulk->bk_iovs[i].kiov_page);
+		__free_page(bulk->bk_iovs[i].bv_page);
 	}
 
 	srpc_client_rpc_decref(crpc->crp_rpc);
@@ -705,7 +705,7 @@ lstcon_next_id(int idx, int nkiov, lnet_kiov_t *kiov)
 
 	LASSERT(i < nkiov);
 
-	pid = (lnet_process_id_packed_t *)page_address(kiov[i].kiov_page);
+	pid = (lnet_process_id_packed_t *)page_address(kiov[i].bv_page);
 
 	return &pid[idx % SFW_ID_PER_PAGE];
 }
@@ -849,12 +849,11 @@ lstcon_testrpc_prep(struct lstcon_node *nd, int transop, unsigned feats,
 			      min_t(int, nob, PAGE_SIZE);
 			nob -= len;
 
-			bulk->bk_iovs[i].kiov_offset = 0;
-			bulk->bk_iovs[i].kiov_len = len;
-			bulk->bk_iovs[i].kiov_page =
-				alloc_page(GFP_KERNEL);
+			bulk->bk_iovs[i].bv_offset = 0;
+			bulk->bk_iovs[i].bv_len = len;
+			bulk->bk_iovs[i].bv_page = alloc_page(GFP_KERNEL);
 
-			if (!bulk->bk_iovs[i].kiov_page) {
+			if (!bulk->bk_iovs[i].bv_page) {
 				lstcon_rpc_put(*crpc);
 				return -ENOMEM;
 			}
diff --git a/drivers/staging/lustre/lnet/selftest/console.c b/drivers/staging/lustre/lnet/selftest/console.c
index 4c33621f06da..a0fcbf3bcc95 100644
--- a/drivers/staging/lustre/lnet/selftest/console.c
+++ b/drivers/staging/lustre/lnet/selftest/console.c
@@ -1993,8 +1993,6 @@ static void lstcon_init_acceptor_service(void)
 	lstcon_acceptor_service.sv_wi_total = SFW_FRWK_WI_MAX;
 }
 
-extern int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
-
 static DECLARE_IOCTL_HANDLER(lstcon_ioctl_handler, lstcon_ioctl_entry);
 
 /* initialize console */
diff --git a/drivers/staging/lustre/lnet/selftest/console.h b/drivers/staging/lustre/lnet/selftest/console.h
index 78b147732615..78388a611c22 100644
--- a/drivers/staging/lustre/lnet/selftest/console.h
+++ b/drivers/staging/lustre/lnet/selftest/console.h
@@ -184,6 +184,7 @@ lstcon_id2hash(lnet_process_id_t id, struct list_head *hash)
 	return &hash[idx];
 }
 
+int lstcon_ioctl_entry(unsigned int cmd, struct libcfs_ioctl_hdr *hdr);
 int lstcon_console_init(void);
 int lstcon_console_fini(void);
 int lstcon_session_match(lst_sid_t sid);
diff --git a/drivers/staging/lustre/lnet/selftest/framework.c b/drivers/staging/lustre/lnet/selftest/framework.c
index c2f121f44d33..abbd6287b4bd 100644
--- a/drivers/staging/lustre/lnet/selftest/framework.c
+++ b/drivers/staging/lustre/lnet/selftest/framework.c
@@ -784,8 +784,8 @@ sfw_add_test_instance(struct sfw_batch *tsb, struct srpc_server_rpc *rpc)
 		lnet_process_id_packed_t id;
 		int j;
 
-		dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].kiov_page);
-		LASSERT(dests);		/* my pages are within KVM always */
+		dests = page_address(bk->bk_iovs[i / SFW_ID_PER_PAGE].bv_page);
+		LASSERT(dests);  /* my pages are within KVM always */
 		id = dests[i % SFW_ID_PER_PAGE];
 		if (msg->msg_magic != SRPC_MSG_MAGIC)
 			sfw_unpack_id(id);
diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 3b26d6eb4240..f5619d8744ef 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -91,9 +91,9 @@ srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int nob)
 	LASSERT(nob > 0);
 	LASSERT(i >= 0 && i < bk->bk_niov);
 
-	bk->bk_iovs[i].kiov_offset = 0;
-	bk->bk_iovs[i].kiov_page = pg;
-	bk->bk_iovs[i].kiov_len = nob;
+	bk->bk_iovs[i].bv_offset = 0;
+	bk->bk_iovs[i].bv_page = pg;
+	bk->bk_iovs[i].bv_len = nob;
 	return nob;
 }
 
@@ -106,7 +106,7 @@ srpc_free_bulk(struct srpc_bulk *bk)
 	LASSERT(bk);
 
 	for (i = 0; i < bk->bk_niov; i++) {
-		pg = bk->bk_iovs[i].kiov_page;
+		pg = bk->bk_iovs[i].bv_page;
 		if (!pg)
 			break;
 
diff --git a/drivers/staging/lustre/lustre/fid/fid_lib.c b/drivers/staging/lustre/lustre/fid/fid_lib.c
index 99ae7eb6720e..4e49cb356d64 100644
--- a/drivers/staging/lustre/lustre/fid/fid_lib.c
+++ b/drivers/staging/lustre/lustre/fid/fid_lib.c
@@ -63,14 +63,12 @@ const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE = {
 	FID_SEQ_NORMAL,
 	(__u64)~0ULL
 };
-EXPORT_SYMBOL(LUSTRE_SEQ_SPACE_RANGE);
 
 /* Zero range, used for init and other purposes. */
 const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE = {
 	0,
 	0
 };
-EXPORT_SYMBOL(LUSTRE_SEQ_ZERO_RANGE);
 
 /* Lustre Big Fs Lock fid. */
 const struct lu_fid LUSTRE_BFL_FID = { .f_seq = FID_SEQ_SPECIAL,
diff --git a/drivers/staging/lustre/lustre/fid/fid_request.c b/drivers/staging/lustre/lustre/fid/fid_request.c
index 454744d25956..edd72b926f81 100644
--- a/drivers/staging/lustre/lustre/fid/fid_request.c
+++ b/drivers/staging/lustre/lustre/fid/fid_request.c
@@ -125,19 +125,19 @@ static int seq_client_rpc(struct lu_client_seq *seq,
 
 	if (!range_is_sane(output)) {
 		CERROR("%s: Invalid range received from server: "
-		       DRANGE"\n", seq->lcs_name, PRANGE(output));
+		       DRANGE "\n", seq->lcs_name, PRANGE(output));
 		rc = -EINVAL;
 		goto out_req;
 	}
 
 	if (range_is_exhausted(output)) {
 		CERROR("%s: Range received from server is exhausted: "
-		       DRANGE"]\n", seq->lcs_name, PRANGE(output));
+		       DRANGE "]\n", seq->lcs_name, PRANGE(output));
 		rc = -EINVAL;
 		goto out_req;
 	}
 
-	CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence "DRANGE"]\n",
+	CDEBUG_LIMIT(debug_mask, "%s: Allocated %s-sequence " DRANGE "]\n",
 		     seq->lcs_name, opcname, PRANGE(output));
 
 out_req:
@@ -179,7 +179,7 @@ static int seq_client_alloc_seq(const struct lu_env *env,
 			       seq->lcs_name, rc);
 			return rc;
 		}
-		CDEBUG(D_INFO, "%s: New range - "DRANGE"\n",
+		CDEBUG(D_INFO, "%s: New range - " DRANGE "\n",
 		       seq->lcs_name, PRANGE(&seq->lcs_space));
 	} else {
 		rc = 0;
diff --git a/drivers/staging/lustre/lustre/fid/lproc_fid.c b/drivers/staging/lustre/lustre/fid/lproc_fid.c
index 81b7ca9ea2fd..3ed32d77f38b 100644
--- a/drivers/staging/lustre/lustre/fid/lproc_fid.c
+++ b/drivers/staging/lustre/lustre/fid/lproc_fid.c
@@ -105,7 +105,7 @@ ldebugfs_fid_space_seq_write(struct file *file,
 	rc = ldebugfs_fid_write_common(buffer, count, &seq->lcs_space);
 
 	if (rc == 0) {
-		CDEBUG(D_INFO, "%s: Space: "DRANGE"\n",
+		CDEBUG(D_INFO, "%s: Space: " DRANGE "\n",
 		       seq->lcs_name, PRANGE(&seq->lcs_space));
 	}
 
diff --git a/drivers/staging/lustre/lustre/fld/fld_internal.h b/drivers/staging/lustre/lustre/fld/fld_internal.h
index f0efe5b9fbec..08eaec735d6f 100644
--- a/drivers/staging/lustre/lustre/fld/fld_internal.h
+++ b/drivers/staging/lustre/lustre/fld/fld_internal.h
@@ -31,6 +31,25 @@
  *
  * lustre/fld/fld_internal.h
  *
+ * Subsystem Description:
+ * FLD is FID Location Database, which stores where (IE, on which MDT)
+ * FIDs are located.
+ * The database is basically a record file, each record consists of a FID
+ * sequence range, MDT/OST index, and flags. The FLD for the whole FS
+ * is only stored on the sequence controller(MDT0) right now, but each target
+ * also has its local FLD, which only stores the local sequence.
+ *
+ * The FLD subsystem usually has two tasks:
+ * 1. maintain the database, i.e. when the sequence controller allocates
+ * new sequence ranges to some nodes, it will call the FLD API to insert the
+ * location information <sequence_range, node_index> in FLDB.
+ *
+ * 2. Handle requests from other nodes, i.e. if client needs to know where
+ * the FID is located, if it can not find the information in the local cache,
+ * it will send a FLD lookup RPC to the FLD service, and the FLD service will
+ * look up the FLDB entry and return the location information to client.
+ *
+ *
  * Author: Yury Umanets <umka@clusterfs.com>
  * Author: Tom WangDi <wangdi@clusterfs.com>
  */
diff --git a/drivers/staging/lustre/lustre/fld/fld_request.c b/drivers/staging/lustre/lustre/fld/fld_request.c
index e59d626a1548..0de72b717ce5 100644
--- a/drivers/staging/lustre/lustre/fld/fld_request.c
+++ b/drivers/staging/lustre/lustre/fld/fld_request.c
@@ -53,57 +53,6 @@
 #include "../include/lustre_mdc.h"
 #include "fld_internal.h"
 
-/* TODO: these 3 functions are copies of flow-control code from mdc_lib.c
- * It should be common thing. The same about mdc RPC lock
- */
-static int fld_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
-{
-	int rc;
-
-	spin_lock(&cli->cl_loi_list_lock);
-	rc = list_empty(&mcw->mcw_entry);
-	spin_unlock(&cli->cl_loi_list_lock);
-	return rc;
-};
-
-static void fld_enter_request(struct client_obd *cli)
-{
-	struct mdc_cache_waiter mcw;
-	struct l_wait_info lwi = { 0 };
-
-	spin_lock(&cli->cl_loi_list_lock);
-	if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
-		list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
-		init_waitqueue_head(&mcw.mcw_waitq);
-		spin_unlock(&cli->cl_loi_list_lock);
-		l_wait_event(mcw.mcw_waitq, fld_req_avail(cli, &mcw), &lwi);
-	} else {
-		cli->cl_r_in_flight++;
-		spin_unlock(&cli->cl_loi_list_lock);
-	}
-}
-
-static void fld_exit_request(struct client_obd *cli)
-{
-	struct list_head *l, *tmp;
-	struct mdc_cache_waiter *mcw;
-
-	spin_lock(&cli->cl_loi_list_lock);
-	cli->cl_r_in_flight--;
-	list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
-		if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
-			/* No free request slots anymore */
-			break;
-		}
-
-		mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
-		list_del_init(&mcw->mcw_entry);
-		cli->cl_r_in_flight++;
-		wake_up(&mcw->mcw_waitq);
-	}
-	spin_unlock(&cli->cl_loi_list_lock);
-}
-
 static int fld_rrb_hash(struct lu_client_fld *fld, u64 seq)
 {
 	LASSERT(fld->lcf_count > 0);
@@ -270,7 +219,6 @@ int fld_client_del_target(struct lu_client_fld *fld, __u64 idx)
 	spin_unlock(&fld->lcf_lock);
 	return -ENOENT;
 }
-EXPORT_SYMBOL(fld_client_del_target);
 
 static struct dentry *fld_debugfs_dir;
 
@@ -439,9 +387,9 @@ int fld_client_rpc(struct obd_export *exp,
 	req->rq_reply_portal = MDC_REPLY_PORTAL;
 	ptlrpc_at_set_req_timeout(req);
 
-	fld_enter_request(&exp->exp_obd->u.cli);
+	obd_get_request_slot(&exp->exp_obd->u.cli);
 	rc = ptlrpc_queue_wait(req);
-	fld_exit_request(&exp->exp_obd->u.cli);
+	obd_put_request_slot(&exp->exp_obd->u.cli);
 	if (rc)
 		goto out_req;
 
@@ -505,7 +453,6 @@ void fld_client_flush(struct lu_client_fld *fld)
 {
 	fld_cache_flush(fld->lcf_cache);
 }
-EXPORT_SYMBOL(fld_client_flush);
 
 static int __init fld_init(void)
 {
diff --git a/drivers/staging/lustre/lustre/include/cl_object.h b/drivers/staging/lustre/lustre/include/cl_object.h
index 3cd4a2577d90..89292c93dcd5 100644
--- a/drivers/staging/lustre/lustre/include/cl_object.h
+++ b/drivers/staging/lustre/lustre/include/cl_object.h
@@ -93,8 +93,8 @@
  * super-class definitions.
  */
 #include "lu_object.h"
+#include "lustre_compat.h"
 #include <linux/atomic.h>
-#include "linux/lustre_compat25.h"
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
 #include <linux/spinlock.h>
@@ -191,6 +191,9 @@ struct cl_attr {
 	 * Group identifier for quota purposes.
 	 */
 	gid_t  cat_gid;
+
+	/* nlink of the directory */
+	__u64  cat_nlink;
 };
 
 /**
@@ -320,7 +323,7 @@ struct cl_object_operations {
 	 *	 to be used instead of newly created.
 	 */
 	int  (*coo_page_init)(const struct lu_env *env, struct cl_object *obj,
-				struct cl_page *page, pgoff_t index);
+			      struct cl_page *page, pgoff_t index);
 	/**
 	 * Initialize lock slice for this layer. Called top-to-bottom through
 	 * every object layer when a new cl_lock is instantiated. Layer
@@ -366,8 +369,8 @@ struct cl_object_operations {
 	 * \return the same convention as for
 	 * cl_object_operations::coo_attr_get() is used.
 	 */
-	int (*coo_attr_set)(const struct lu_env *env, struct cl_object *obj,
-			    const struct cl_attr *attr, unsigned valid);
+	int (*coo_attr_update)(const struct lu_env *env, struct cl_object *obj,
+			       const struct cl_attr *attr, unsigned int valid);
 	/**
 	 * Update object configuration. Called top-to-bottom to modify object
 	 * configuration.
@@ -392,6 +395,11 @@ struct cl_object_operations {
 	 * mainly pages and locks.
 	 */
 	int (*coo_prune)(const struct lu_env *env, struct cl_object *obj);
+	/**
+	 * Object getstripe method.
+	 */
+	int (*coo_getstripe)(const struct lu_env *env, struct cl_object *obj,
+			     struct lov_user_md __user *lum);
 };
 
 /**
@@ -687,17 +695,6 @@ enum cl_page_type {
 };
 
 /**
- * Flags maintained for every cl_page.
- */
-enum cl_page_flags {
-	/**
-	 * Set when pagein completes. Used for debugging (read completes at
-	 * most once for a page).
-	 */
-	CPF_READ_COMPLETED = 1 << 0
-};
-
-/**
  * Fields are protected by the lock on struct page, except for atomics and
  * immutables.
  *
@@ -711,24 +708,19 @@ struct cl_page {
 	atomic_t	     cp_ref;
 	/** An object this page is a part of. Immutable after creation. */
 	struct cl_object	*cp_obj;
-	/** List of slices. Immutable after creation. */
-	struct list_head	       cp_layers;
 	/** vmpage */
 	struct page		*cp_vmpage;
+	/** Linkage of pages within group. Pages must be owned */
+	struct list_head	 cp_batch;
+	/** List of slices. Immutable after creation. */
+	struct list_head	 cp_layers;
+	/** Linkage of pages within cl_req. */
+	struct list_head         cp_flight;
 	/**
 	 * Page state. This field is const to avoid accidental update, it is
 	 * modified only internally within cl_page.c. Protected by a VM lock.
 	 */
 	const enum cl_page_state cp_state;
-	/** Linkage of pages within group. Protected by cl_page::cp_mutex. */
-	struct list_head		cp_batch;
-	/** Mutex serializing membership of a page in a batch. */
-	struct mutex		cp_mutex;
-	/** Linkage of pages within cl_req. */
-	struct list_head	       cp_flight;
-	/** Transfer error. */
-	int		      cp_error;
-
 	/**
 	 * Page type. Only CPT_TRANSIENT is used so far. Immutable after
 	 * creation.
@@ -741,10 +733,6 @@ struct cl_page {
 	 */
 	struct cl_io	    *cp_owner;
 	/**
-	 * Debug information, the task is owning the page.
-	 */
-	struct task_struct	*cp_task;
-	/**
 	 * Owning IO request in cl_page_state::CPS_PAGEOUT and
 	 * cl_page_state::CPS_PAGEIN states. This field is maintained only in
 	 * the top-level pages. Protected by a VM lock.
@@ -756,8 +744,6 @@ struct cl_page {
 	struct lu_ref_link       cp_obj_ref;
 	/** Link to a queue, for debugging. */
 	struct lu_ref_link       cp_queue_ref;
-	/** Per-page flags from enum cl_page_flags. Protected by a VM lock. */
-	unsigned                 cp_flags;
 	/** Assigned if doing a sync_io */
 	struct cl_sync_io       *cp_sync_io;
 };
@@ -1056,23 +1042,32 @@ do {									  \
 	}								     \
 } while (0)
 
-static inline int __page_in_use(const struct cl_page *page, int refc)
-{
-	if (page->cp_type == CPT_CACHEABLE)
-		++refc;
-	LASSERT(atomic_read(&page->cp_ref) > 0);
-	return (atomic_read(&page->cp_ref) > refc);
-}
-
-#define cl_page_in_use(pg)       __page_in_use(pg, 1)
-#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
-
 static inline struct page *cl_page_vmpage(struct cl_page *page)
 {
 	LASSERT(page->cp_vmpage);
 	return page->cp_vmpage;
 }
 
+/**
+ * Check if a cl_page is in use.
+ *
+ * Client cache holds a refcount, this refcount will be dropped when
+ * the page is taken out of cache, see vvp_page_delete().
+ */
+static inline bool __page_in_use(const struct cl_page *page, int refc)
+{
+	return (atomic_read(&page->cp_ref) > refc + 1);
+}
+
+/**
+ * Caller itself holds a refcount of cl_page.
+ */
+#define cl_page_in_use(pg)	 __page_in_use(pg, 1)
+/**
+ * Caller doesn't hold a refcount.
+ */
+#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+
 /** @} cl_page */
 
 /** \addtogroup cl_lock cl_lock
@@ -1771,12 +1766,14 @@ struct cl_io {
 		struct cl_setattr_io {
 			struct ost_lvb   sa_attr;
 			unsigned int     sa_valid;
+			int		sa_stripe_index;
+			struct lu_fid  *sa_parent_fid;
 		} ci_setattr;
 		struct cl_fault_io {
 			/** page index within file. */
 			pgoff_t	 ft_index;
 			/** bytes valid byte on a faulted page. */
-			int	     ft_nob;
+			size_t	     ft_nob;
 			/** writable page? for nopage() only */
 			int	     ft_writable;
 			/** page of an executable? */
@@ -1909,7 +1906,7 @@ struct cl_req_attr {
 	/** Generic attributes for the server consumption. */
 	struct obdo	*cra_oa;
 	/** Jobid */
-	char		 cra_jobid[JOBSTATS_JOBID_SIZE];
+	char		 cra_jobid[LUSTRE_JOBID_SIZE];
 };
 
 /**
@@ -2176,14 +2173,16 @@ void cl_object_attr_lock(struct cl_object *o);
 void cl_object_attr_unlock(struct cl_object *o);
 int  cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
 			struct cl_attr *attr);
-int  cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
-			const struct cl_attr *attr, unsigned valid);
+int  cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
+			   const struct cl_attr *attr, unsigned int valid);
 int  cl_object_glimpse(const struct lu_env *env, struct cl_object *obj,
 		       struct ost_lvb *lvb);
 int  cl_conf_set(const struct lu_env *env, struct cl_object *obj,
 		 const struct cl_object_conf *conf);
 int cl_object_prune(const struct lu_env *env, struct cl_object *obj);
 void cl_object_kill(const struct lu_env *env, struct cl_object *obj);
+int  cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+			 struct lov_user_md __user *lum);
 
 /**
  * Returns true, iff \a o0 and \a o1 are slices of the same object.
@@ -2197,6 +2196,7 @@ static inline void cl_object_page_init(struct cl_object *clob, int size)
 {
 	clob->co_slice_off = cl_object_header(clob)->coh_page_bufsize;
 	cl_object_header(clob)->coh_page_bufsize += cfs_size_round(size);
+	WARN_ON(cl_object_header(clob)->coh_page_bufsize > 512);
 }
 
 static inline void *cl_object_page_slice(struct cl_object *clob,
@@ -2263,6 +2263,8 @@ void cl_page_unassume(const struct lu_env *env,
 		      struct cl_io *io, struct cl_page *pg);
 void cl_page_disown(const struct lu_env *env,
 		    struct cl_io *io, struct cl_page *page);
+void cl_page_disown0(const struct lu_env *env,
+		     struct cl_io *io, struct cl_page *pg);
 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io);
 
 /** @} ownership */
@@ -2304,7 +2306,7 @@ int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
 			  struct cl_page *page, pgoff_t *max_index);
 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx);
 pgoff_t cl_index(const struct cl_object *obj, loff_t offset);
-int cl_page_size(const struct cl_object *obj);
+size_t cl_page_size(const struct cl_object *obj);
 int cl_pages_prune(const struct lu_env *env, struct cl_object *obj);
 
 void cl_lock_print(const struct lu_env *env, void *cookie,
@@ -2333,7 +2335,7 @@ struct cl_client_cache {
 	/**
 	 * # of LRU entries available
 	 */
-	atomic_t		ccc_lru_left;
+	atomic_long_t		ccc_lru_left;
 	/**
 	 * List of entities(OSCs) for this LRU cache
 	 */
@@ -2347,14 +2349,18 @@ struct cl_client_cache {
 	 */
 	spinlock_t		ccc_lru_lock;
 	/**
+	 * Set if unstable check is enabled
+	 */
+	unsigned int		ccc_unstable_check:1;
+	/**
 	 * # of unstable pages for this mount point
 	 */
-	atomic_t		ccc_unstable_nr;
+	atomic_long_t		ccc_unstable_nr;
 	/**
 	 * Waitq for awaiting unstable pages to reach zero.
 	 * Used at umounting time and signaled on BRW commit
 	 */
-        wait_queue_head_t	ccc_unstable_waitq;
+	wait_queue_head_t	ccc_unstable_waitq;
 
 };
 
diff --git a/drivers/staging/lustre/lustre/include/interval_tree.h b/drivers/staging/lustre/lustre/include/interval_tree.h
index 4a15228b5570..5d387d372547 100644
--- a/drivers/staging/lustre/lustre/include/interval_tree.h
+++ b/drivers/staging/lustre/lustre/include/interval_tree.h
@@ -63,6 +63,11 @@ static inline int interval_is_intree(struct interval_node *node)
 	return node->in_intree == 1;
 }
 
+static inline __u64 interval_low(struct interval_node *node)
+{
+	return node->in_extent.start;
+}
+
 static inline __u64 interval_high(struct interval_node *node)
 {
 	return node->in_extent.end;
@@ -77,8 +82,29 @@ static inline void interval_set(struct interval_node *node,
 	node->in_max_high = end;
 }
 
+/*
+ * Rules to write an interval callback.
+ *  - the callback returns INTERVAL_ITER_STOP when it thinks the iteration
+ *    should be stopped. It will then cause the iteration function to return
+ *    immediately with return value INTERVAL_ITER_STOP.
+ *  - callbacks for interval_iterate and interval_iterate_reverse: Every
+ *    nodes in the tree will be set to @node before the callback being called
+ *  - callback for interval_search: Only overlapped node will be set to @node
+ *    before the callback being called.
+ */
+typedef enum interval_iter (*interval_callback_t)(struct interval_node *node,
+						  void *args);
+
 struct interval_node *interval_insert(struct interval_node *node,
 				      struct interval_node **root);
 void interval_erase(struct interval_node *node, struct interval_node **root);
 
+/*
+ * Search the extents in the tree and call @func for each overlapped
+ * extents.
+ */
+enum interval_iter interval_search(struct interval_node *root,
+				   struct interval_node_extent *ex,
+				   interval_callback_t func, void *data);
+
 #endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h b/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
deleted file mode 100644
index d18e8a76bb25..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/lustre_lite.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LINUX_LL_H
-#define _LINUX_LL_H
-
-#ifndef _LL_H
-#error Do not #include this file directly. #include <lustre_lite.h> instead
-#endif
-
-#include <linux/statfs.h>
-
-#include <linux/fs.h>
-#include <linux/dcache.h>
-
-#include "../obd_class.h"
-#include "../lustre_net.h"
-#include "../lustre_ha.h"
-
-#include <linux/rbtree.h>
-#include "../../include/linux/lustre_compat25.h"
-#include <linux/pagemap.h>
-
-/* lprocfs.c */
-enum {
-	 LPROC_LL_DIRTY_HITS = 0,
-	 LPROC_LL_DIRTY_MISSES,
-	 LPROC_LL_READ_BYTES,
-	 LPROC_LL_WRITE_BYTES,
-	 LPROC_LL_BRW_READ,
-	 LPROC_LL_BRW_WRITE,
-	 LPROC_LL_OSC_READ,
-	 LPROC_LL_OSC_WRITE,
-	 LPROC_LL_IOCTL,
-	 LPROC_LL_OPEN,
-	 LPROC_LL_RELEASE,
-	 LPROC_LL_MAP,
-	 LPROC_LL_LLSEEK,
-	 LPROC_LL_FSYNC,
-	 LPROC_LL_READDIR,
-	 LPROC_LL_SETATTR,
-	 LPROC_LL_TRUNC,
-	 LPROC_LL_FLOCK,
-	 LPROC_LL_GETATTR,
-	 LPROC_LL_CREATE,
-	 LPROC_LL_LINK,
-	 LPROC_LL_UNLINK,
-	 LPROC_LL_SYMLINK,
-	 LPROC_LL_MKDIR,
-	 LPROC_LL_RMDIR,
-	 LPROC_LL_MKNOD,
-	 LPROC_LL_RENAME,
-	 LPROC_LL_STAFS,
-	 LPROC_LL_ALLOC_INODE,
-	 LPROC_LL_SETXATTR,
-	 LPROC_LL_GETXATTR,
-	 LPROC_LL_GETXATTR_HITS,
-	 LPROC_LL_LISTXATTR,
-	 LPROC_LL_REMOVEXATTR,
-	 LPROC_LL_INODE_PERM,
-	 LPROC_LL_FILE_OPCODES
-};
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_user.h b/drivers/staging/lustre/lustre/include/linux/lustre_user.h
deleted file mode 100644
index e967950e8536..000000000000
--- a/drivers/staging/lustre/lustre/include/linux/lustre_user.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lustre/include/linux/lustre_user.h
- *
- * Lustre public user-space interface definitions.
- */
-
-#ifndef _LINUX_LUSTRE_USER_H
-#define _LINUX_LUSTRE_USER_H
-
-# include <linux/quota.h>
-
-/*
- * asm-x86_64/processor.h on some SLES 9 distros seems to use
- * kernel-only typedefs.  fortunately skipping it altogether is ok
- * (for now).
- */
-#define __ASM_X86_64_PROCESSOR_H
-
-#include <linux/string.h>
-
-/*
- * We need to always use 64bit version because the structure
- * is shared across entire cluster where 32bit and 64bit machines
- * are co-existing.
- */
-#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
-typedef struct stat64   lstat_t;
-#define lstat_f	 lstat64
-#else
-typedef struct stat     lstat_t;
-#define lstat_f	 lstat
-#endif
-
-#define HAVE_LOV_USER_MDS_DATA
-
-#endif /* _LUSTRE_USER_H */
diff --git a/drivers/staging/lustre/lustre/include/lprocfs_status.h b/drivers/staging/lustre/lustre/include/lprocfs_status.h
index d68e60e7fef7..cc0713ef8ae5 100644
--- a/drivers/staging/lustre/lustre/include/lprocfs_status.h
+++ b/drivers/staging/lustre/lustre/include/lprocfs_status.h
@@ -165,8 +165,10 @@ struct lprocfs_percpu {
 	struct lprocfs_counter lp_cntr[0];
 };
 
-#define LPROCFS_GET_NUM_CPU 0x0001
-#define LPROCFS_GET_SMP_ID  0x0002
+enum lprocfs_stats_lock_ops {
+	LPROCFS_GET_NUM_CPU	= 0x0001, /* number allocated per-CPU stats */
+	LPROCFS_GET_SMP_ID	= 0x0002, /* current stat to be updated */
+};
 
 enum lprocfs_stats_flags {
 	LPROCFS_STATS_FLAG_NONE     = 0x0000, /* per cpu counter */
@@ -363,82 +365,99 @@ static inline void s2dhms(struct dhms *ts, time64_t secs64)
 #define JOBSTATS_PROCNAME_UID		"procname_uid"
 #define JOBSTATS_NODELOCAL		"nodelocal"
 
+/* obd_config.c */
+void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
+
 int lprocfs_write_frac_helper(const char __user *buffer,
 			      unsigned long count, int *val, int mult);
 int lprocfs_read_frac_helper(char *buffer, unsigned long count,
 			     long val, int mult);
 int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid);
-/*
- * \return value
- *      < 0     : on error (only possible for opc as LPROCFS_GET_SMP_ID)
+
+/**
+ * Lock statistics structure for access, possibly only on this CPU.
+ *
+ * The statistics struct may be allocated with per-CPU structures for
+ * efficient concurrent update (usually only on server-wide stats), or
+ * as a single global struct (e.g. for per-client or per-job statistics),
+ * so the required locking depends on the type of structure allocated.
+ *
+ * For per-CPU statistics, pin the thread to the current cpuid so that
+ * will only access the statistics for that CPU.  If the stats structure
+ * for the current CPU has not been allocated (or previously freed),
+ * allocate it now.  The per-CPU statistics do not need locking since
+ * the thread is pinned to the CPU during update.
+ *
+ * For global statistics, lock the stats structure to prevent concurrent update.
+ *
+ * \param[in] stats	statistics structure to lock
+ * \param[in] opc	type of operation:
+ *			LPROCFS_GET_SMP_ID: "lock" and return current CPU index
+ *				for incrementing statistics for that CPU
+ *			LPROCFS_GET_NUM_CPU: "lock" and return number of used
+ *				CPU indices to iterate over all indices
+ * \param[out] flags	CPU interrupt saved state for IRQ-safe locking
+ *
+ * \retval cpuid of current thread or number of allocated structs
+ * \retval negative on error (only for opc LPROCFS_GET_SMP_ID + per-CPU stats)
  */
-static inline int lprocfs_stats_lock(struct lprocfs_stats *stats, int opc,
+static inline int lprocfs_stats_lock(struct lprocfs_stats *stats,
+				     enum lprocfs_stats_lock_ops opc,
 				     unsigned long *flags)
 {
-	int		rc = 0;
+	if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+		if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+			spin_lock_irqsave(&stats->ls_lock, *flags);
+		else
+			spin_lock(&stats->ls_lock);
+		return opc == LPROCFS_GET_NUM_CPU ? 1 : 0;
+	}
 
 	switch (opc) {
-	default:
-		LBUG();
+	case LPROCFS_GET_SMP_ID: {
+		unsigned int cpuid = get_cpu();
 
-	case LPROCFS_GET_SMP_ID:
-		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-				spin_lock_irqsave(&stats->ls_lock, *flags);
-			else
-				spin_lock(&stats->ls_lock);
-			return 0;
-		} else {
-			unsigned int cpuid = get_cpu();
-
-			if (unlikely(!stats->ls_percpu[cpuid])) {
-				rc = lprocfs_stats_alloc_one(stats, cpuid);
-				if (rc < 0) {
-					put_cpu();
-					return rc;
-				}
+		if (unlikely(!stats->ls_percpu[cpuid])) {
+			int rc = lprocfs_stats_alloc_one(stats, cpuid);
+
+			if (rc < 0) {
+				put_cpu();
+				return rc;
 			}
-			return cpuid;
 		}
-
+		return cpuid;
+	}
 	case LPROCFS_GET_NUM_CPU:
-		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-				spin_lock_irqsave(&stats->ls_lock, *flags);
-			else
-				spin_lock(&stats->ls_lock);
-			return 1;
-		}
 		return stats->ls_biggest_alloc_num;
+	default:
+		LBUG();
 	}
 }
 
-static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats, int opc,
+/**
+ * Unlock statistics structure after access.
+ *
+ * Unlock the lock acquired via lprocfs_stats_lock() for global statistics,
+ * or unpin this thread from the current cpuid for per-CPU statistics.
+ *
+ * This function must be called using the same arguments as used when calling
+ * lprocfs_stats_lock() so that the correct operation can be performed.
+ *
+ * \param[in] stats	statistics structure to unlock
+ * \param[in] opc	type of operation (current cpuid or number of structs)
+ * \param[in] flags	CPU interrupt saved state for IRQ-safe locking
+ */
+static inline void lprocfs_stats_unlock(struct lprocfs_stats *stats,
+					enum lprocfs_stats_lock_ops opc,
 					unsigned long *flags)
 {
-	switch (opc) {
-	default:
-		LBUG();
-
-	case LPROCFS_GET_SMP_ID:
-		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-				spin_unlock_irqrestore(&stats->ls_lock, *flags);
-			else
-				spin_unlock(&stats->ls_lock);
-		} else {
-			put_cpu();
-		}
-		return;
-
-	case LPROCFS_GET_NUM_CPU:
-		if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
-			if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
-				spin_unlock_irqrestore(&stats->ls_lock, *flags);
-			else
-				spin_unlock(&stats->ls_lock);
-		}
-		return;
+	if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) {
+		if (stats->ls_flags & LPROCFS_STATS_FLAG_IRQ_SAFE)
+			spin_unlock_irqrestore(&stats->ls_lock, *flags);
+		else
+			spin_unlock(&stats->ls_lock);
+	} else if (opc == LPROCFS_GET_SMP_ID) {
+		put_cpu();
 	}
 }
 
@@ -496,7 +515,7 @@ static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats,
 					    int idx,
 					    enum lprocfs_fields_flags field)
 {
-	int	      i;
+	unsigned int i;
 	unsigned int  num_cpu;
 	unsigned long flags	= 0;
 	__u64	      ret	= 0;
@@ -681,6 +700,12 @@ static struct lustre_attr lustre_attr_##name = __ATTR(name, mode, show, store)
 
 extern const struct sysfs_ops lustre_sysfs_ops;
 
+struct root_squash_info;
+int lprocfs_wr_root_squash(const char *buffer, unsigned long count,
+			   struct root_squash_info *squash, char *name);
+int lprocfs_wr_nosquash_nids(const char *buffer, unsigned long count,
+			     struct root_squash_info *squash, char *name);
+
 /* all quota proc functions */
 int lprocfs_quota_rd_bunit(char *page, char **start,
 			   loff_t off, int count,
diff --git a/drivers/staging/lustre/lustre/include/lu_object.h b/drivers/staging/lustre/lustre/include/lu_object.h
index 6e25c1bb6aa3..260643ee0d48 100644
--- a/drivers/staging/lustre/lustre/include/lu_object.h
+++ b/drivers/staging/lustre/lustre/include/lu_object.h
@@ -327,7 +327,7 @@ struct lu_device_type {
 	/**
 	 * Number of existing device type instances.
 	 */
-	unsigned				ldt_device_nr;
+	atomic_t				ldt_device_nr;
 	/**
 	 * Linkage into a global list of all device types.
 	 *
@@ -602,7 +602,7 @@ struct lu_site {
 	/**
 	 * index of bucket on hash table while purging
 	 */
-	int		       ls_purge_start;
+	unsigned int		ls_purge_start;
 	/**
 	 * Top-level device for this stack.
 	 */
@@ -623,6 +623,11 @@ struct lu_site {
 	spinlock_t		ls_ld_lock;
 
 	/**
+	 * Lock to serialize site purge.
+	 */
+	struct mutex		ls_purge_mutex;
+
+	/**
 	 * lu_site stats
 	 */
 	struct lprocfs_stats	*ls_stats;
@@ -673,7 +678,6 @@ void lu_object_add(struct lu_object *before, struct lu_object *o);
 
 int  lu_device_type_init(struct lu_device_type *ldt);
 void lu_device_type_fini(struct lu_device_type *ldt);
-void lu_types_stop(void);
 
 /** @} ctors */
 
@@ -1025,7 +1029,8 @@ enum lu_context_tag {
 	/**
 	 * Contexts usable in cache shrinker thread.
 	 */
-	LCT_SHRINKER  = LCT_MD_THREAD|LCT_DT_THREAD|LCT_CL_THREAD|LCT_NOREF
+	LCT_SHRINKER  = LCT_MD_THREAD | LCT_DT_THREAD | LCT_CL_THREAD |
+			LCT_NOREF
 };
 
 /**
@@ -1264,12 +1269,28 @@ struct lu_name {
 };
 
 /**
+ * Validate names (path components)
+ *
+ * To be valid \a name must be non-empty, '\0' terminated of length \a
+ * name_len, and not contain '/'. The maximum length of a name (before
+ * say -ENAMETOOLONG will be returned) is really controlled by llite
+ * and the server. We only check for something insane coming from bad
+ * integer handling here.
+ */
+static inline bool lu_name_is_valid_2(const char *name, size_t name_len)
+{
+	return name && name_len > 0 && name_len < INT_MAX &&
+	       name[name_len] == '\0' && strlen(name) == name_len &&
+	       !memchr(name, '/', name_len);
+}
+
+/**
  * Common buffer structure to be passed around for various xattr_{s,g}et()
  * methods.
  */
 struct lu_buf {
 	void   *lb_buf;
-	ssize_t lb_len;
+	size_t	lb_len;
 };
 
 #define DLUBUF "(%p %zu)"
@@ -1298,5 +1319,12 @@ struct lu_kmem_descr {
 int  lu_kmem_init(struct lu_kmem_descr *caches);
 void lu_kmem_fini(struct lu_kmem_descr *caches);
 
+void lu_buf_free(struct lu_buf *buf);
+void lu_buf_alloc(struct lu_buf *buf, size_t size);
+void lu_buf_realloc(struct lu_buf *buf, size_t size);
+
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len);
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len);
+
 /** @} lu */
 #endif /* __LUSTRE_LU_OBJECT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
index 051864c23b5b..72eaee95c6b8 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h
@@ -93,6 +93,7 @@
 /* Defn's shared with user-space. */
 #include "lustre_user.h"
 #include "lustre_errno.h"
+#include "../lustre_ver.h"
 
 /*
  *  GENERAL STUFF
@@ -196,12 +197,12 @@ static inline unsigned fld_range_type(const struct lu_seq_range *range)
 	return range->lsr_flags & LU_SEQ_RANGE_MASK;
 }
 
-static inline int fld_range_is_ost(const struct lu_seq_range *range)
+static inline bool fld_range_is_ost(const struct lu_seq_range *range)
 {
 	return fld_range_type(range) == LU_SEQ_RANGE_OST;
 }
 
-static inline int fld_range_is_mdt(const struct lu_seq_range *range)
+static inline bool fld_range_is_mdt(const struct lu_seq_range *range)
 {
 	return fld_range_type(range) == LU_SEQ_RANGE_MDT;
 }
@@ -260,23 +261,23 @@ static inline void range_init(struct lu_seq_range *range)
  * check if given seq id \a s is within given range \a r
  */
 
-static inline int range_within(const struct lu_seq_range *range,
-			       __u64 s)
+static inline bool range_within(const struct lu_seq_range *range,
+				__u64 s)
 {
 	return s >= range->lsr_start && s < range->lsr_end;
 }
 
-static inline int range_is_sane(const struct lu_seq_range *range)
+static inline bool range_is_sane(const struct lu_seq_range *range)
 {
 	return (range->lsr_end >= range->lsr_start);
 }
 
-static inline int range_is_zero(const struct lu_seq_range *range)
+static inline bool range_is_zero(const struct lu_seq_range *range)
 {
 	return (range->lsr_start == 0 && range->lsr_end == 0);
 }
 
-static inline int range_is_exhausted(const struct lu_seq_range *range)
+static inline bool range_is_exhausted(const struct lu_seq_range *range)
 
 {
 	return range_space(range) == 0;
@@ -437,69 +438,69 @@ enum dot_lustre_oid {
 	FID_OID_DOT_LUSTRE_OBF = 2UL,
 };
 
-static inline int fid_seq_is_mdt0(__u64 seq)
+static inline bool fid_seq_is_mdt0(__u64 seq)
 {
 	return (seq == FID_SEQ_OST_MDT0);
 }
 
-static inline int fid_seq_is_mdt(const __u64 seq)
+static inline bool fid_seq_is_mdt(__u64 seq)
 {
 	return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
 };
 
-static inline int fid_seq_is_echo(__u64 seq)
+static inline bool fid_seq_is_echo(__u64 seq)
 {
 	return (seq == FID_SEQ_ECHO);
 }
 
-static inline int fid_is_echo(const struct lu_fid *fid)
+static inline bool fid_is_echo(const struct lu_fid *fid)
 {
 	return fid_seq_is_echo(fid_seq(fid));
 }
 
-static inline int fid_seq_is_llog(__u64 seq)
+static inline bool fid_seq_is_llog(__u64 seq)
 {
 	return (seq == FID_SEQ_LLOG);
 }
 
-static inline int fid_is_llog(const struct lu_fid *fid)
+static inline bool fid_is_llog(const struct lu_fid *fid)
 {
 	/* file with OID == 0 is not llog but contains last oid */
 	return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
 }
 
-static inline int fid_seq_is_rsvd(const __u64 seq)
+static inline bool fid_seq_is_rsvd(__u64 seq)
 {
 	return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD);
 };
 
-static inline int fid_seq_is_special(const __u64 seq)
+static inline bool fid_seq_is_special(__u64 seq)
 {
 	return seq == FID_SEQ_SPECIAL;
 };
 
-static inline int fid_seq_is_local_file(const __u64 seq)
+static inline bool fid_seq_is_local_file(__u64 seq)
 {
 	return seq == FID_SEQ_LOCAL_FILE ||
 	       seq == FID_SEQ_LOCAL_NAME;
 };
 
-static inline int fid_seq_is_root(const __u64 seq)
+static inline bool fid_seq_is_root(__u64 seq)
 {
 	return seq == FID_SEQ_ROOT;
 }
 
-static inline int fid_seq_is_dot(const __u64 seq)
+static inline bool fid_seq_is_dot(__u64 seq)
 {
 	return seq == FID_SEQ_DOT_LUSTRE;
 }
 
-static inline int fid_seq_is_default(const __u64 seq)
+static inline bool fid_seq_is_default(__u64 seq)
 {
 	return seq == FID_SEQ_LOV_DEFAULT;
 }
 
-static inline int fid_is_mdt0(const struct lu_fid *fid)
+static inline bool fid_is_mdt0(const struct lu_fid *fid)
 {
 	return fid_seq_is_mdt0(fid_seq(fid));
 }
@@ -516,12 +517,12 @@ static inline void lu_root_fid(struct lu_fid *fid)
  * \param fid the fid to be tested.
  * \return true if the fid is a igif; otherwise false.
  */
-static inline int fid_seq_is_igif(const __u64 seq)
+static inline bool fid_seq_is_igif(__u64 seq)
 {
 	return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
 }
 
-static inline int fid_is_igif(const struct lu_fid *fid)
+static inline bool fid_is_igif(const struct lu_fid *fid)
 {
 	return fid_seq_is_igif(fid_seq(fid));
 }
@@ -531,27 +532,27 @@ static inline int fid_is_igif(const struct lu_fid *fid)
  * \param fid the fid to be tested.
  * \return true if the fid is a idif; otherwise false.
  */
-static inline int fid_seq_is_idif(const __u64 seq)
+static inline bool fid_seq_is_idif(__u64 seq)
 {
 	return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
 }
 
-static inline int fid_is_idif(const struct lu_fid *fid)
+static inline bool fid_is_idif(const struct lu_fid *fid)
 {
 	return fid_seq_is_idif(fid_seq(fid));
 }
 
-static inline int fid_is_local_file(const struct lu_fid *fid)
+static inline bool fid_is_local_file(const struct lu_fid *fid)
 {
 	return fid_seq_is_local_file(fid_seq(fid));
 }
 
-static inline int fid_seq_is_norm(const __u64 seq)
+static inline bool fid_seq_is_norm(__u64 seq)
 {
 	return (seq >= FID_SEQ_NORMAL);
 }
 
-static inline int fid_is_norm(const struct lu_fid *fid)
+static inline bool fid_is_norm(const struct lu_fid *fid)
 {
 	return fid_seq_is_norm(fid_seq(fid));
 }
@@ -658,7 +659,7 @@ static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
 		oi->oi_fid.f_oid = oid;
 		oi->oi_fid.f_ver = oid >> 48;
 	} else {
-		if (oid > OBIF_MAX_OID) {
+		if (oid >= OBIF_MAX_OID) {
 			CERROR("Bad %llu to set " DOSTID "\n", oid, POSTID(oi));
 			return;
 		}
@@ -683,7 +684,7 @@ static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
 		fid->f_oid = oid;
 		fid->f_ver = oid >> 48;
 	} else {
-		if (oid > OBIF_MAX_OID) {
+		if (oid >= OBIF_MAX_OID) {
 			CERROR("Too large OID %#llx to set REG "DFID"\n",
 			       (unsigned long long)oid, PFID(fid));
 			return -EBADF;
@@ -769,7 +770,7 @@ static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid)
 }
 
 /* Check whether the fid is for LAST_ID */
-static inline int fid_is_last_id(const struct lu_fid *fid)
+static inline bool fid_is_last_id(const struct lu_fid *fid)
 {
 	return (fid_oid(fid) == 0);
 }
@@ -838,7 +839,7 @@ static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 	dst->f_ver = be32_to_cpu(fid_ver(src));
 }
 
-static inline int fid_is_sane(const struct lu_fid *fid)
+static inline bool fid_is_sane(const struct lu_fid *fid)
 {
 	return fid &&
 	       ((fid_seq(fid) >= FID_SEQ_START && fid_ver(fid) == 0) ||
@@ -846,15 +847,10 @@ static inline int fid_is_sane(const struct lu_fid *fid)
 		fid_seq_is_rsvd(fid_seq(fid)));
 }
 
-static inline int fid_is_zero(const struct lu_fid *fid)
-{
-	return fid_seq(fid) == 0 && fid_oid(fid) == 0;
-}
-
 void lustre_swab_lu_fid(struct lu_fid *fid);
 void lustre_swab_lu_seq_range(struct lu_seq_range *range);
 
-static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
+static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
 {
 	return memcmp(f0, f1, sizeof(*f0)) == 0;
 }
@@ -1017,12 +1013,12 @@ static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
 	return next;
 }
 
-static inline int lu_dirent_calc_size(int namelen, __u16 attr)
+static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
 {
-	int size;
+	size_t size;
 
 	if (attr & LUDA_TYPE) {
-		const unsigned align = sizeof(struct luda_type) - 1;
+		const size_t align = sizeof(struct luda_type) - 1;
 
 		size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
 		size += sizeof(struct luda_type);
@@ -1033,15 +1029,6 @@ static inline int lu_dirent_calc_size(int namelen, __u16 attr)
 	return (size + 7) & ~7;
 }
 
-static inline int lu_dirent_size(struct lu_dirent *ent)
-{
-	if (le16_to_cpu(ent->lde_reclen) == 0) {
-		return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
-					   le32_to_cpu(ent->lde_attrs));
-	}
-	return le16_to_cpu(ent->lde_reclen);
-}
-
 #define MDS_DIR_END_OFF 0xfffffffffffffffeULL
 
 /**
@@ -1067,19 +1054,19 @@ struct lustre_handle {
 
 #define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabeULL
 
-static inline int lustre_handle_is_used(struct lustre_handle *lh)
+static inline bool lustre_handle_is_used(const struct lustre_handle *lh)
 {
 	return lh->cookie != 0ull;
 }
 
-static inline int lustre_handle_equal(const struct lustre_handle *lh1,
-				      const struct lustre_handle *lh2)
+static inline bool lustre_handle_equal(const struct lustre_handle *lh1,
+				       const struct lustre_handle *lh2)
 {
 	return lh1->cookie == lh2->cookie;
 }
 
 static inline void lustre_handle_copy(struct lustre_handle *tgt,
-				      struct lustre_handle *src)
+				      const struct lustre_handle *src)
 {
 	tgt->cookie = src->cookie;
 }
@@ -1105,7 +1092,7 @@ struct lustre_msg_v2 {
 
 /* without gss, ptlrpc_body is put at the first buffer. */
 #define PTLRPC_NUM_VERSIONS     4
-#define JOBSTATS_JOBID_SIZE     32  /* 32 bytes string */
+
 struct ptlrpc_body_v3 {
 	struct lustre_handle pb_handle;
 	__u32 pb_type;
@@ -1127,7 +1114,7 @@ struct ptlrpc_body_v3 {
 	__u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
 	/* padding for future needs */
 	__u64 pb_padding[4];
-	char  pb_jobid[JOBSTATS_JOBID_SIZE];
+	char  pb_jobid[LUSTRE_JOBID_SIZE];
 };
 
 #define ptlrpc_body     ptlrpc_body_v3
@@ -1293,6 +1280,9 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_OPEN_BY_FID	0x20000000000000ULL	/* open by fid won't pack
 							 * name in request
 							 */
+#define OBD_CONNECT_LFSCK	0x40000000000000ULL/* support online LFSCK */
+#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
+#define OBD_CONNECT_DIR_STRIPE	 0x400000000000000ULL/* striped DNE dir */
 
 /* XXX README XXX:
  * Please DO NOT add flag values here before first ensuring that this same
@@ -1318,14 +1308,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
 				 OBD_CONNECT_FULL20)
 
-#define OBD_OCD_VERSION(major, minor, patch, fix) (((major)<<24) + \
-						  ((minor)<<16) + \
-						  ((patch)<<8) + (fix))
-#define OBD_OCD_VERSION_MAJOR(version) ((int)((version)>>24)&255)
-#define OBD_OCD_VERSION_MINOR(version) ((int)((version)>>16)&255)
-#define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255)
-#define OBD_OCD_VERSION_FIX(version)   ((int)(version)&255)
-
 /* This structure is used for both request and reply.
  *
  * If we eventually have separate connect data for different types, which we
@@ -1478,10 +1460,23 @@ enum obdo_flags {
 	OBD_FL_LOCAL_MASK   = 0xF0000000,
 };
 
-#define LOV_MAGIC_V1      0x0BD10BD0
-#define LOV_MAGIC	 LOV_MAGIC_V1
-#define LOV_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_MAGIC_V3      0x0BD30BD0
+/*
+ * All LOV EA magics should have the same postfix, if some new version
+ * Lustre instroduces new LOV EA magic, then when down-grade to an old
+ * Lustre, even though the old version system does not recognizes such
+ * new magic, it still can distinguish the corrupted cases by checking
+ * the magic's postfix.
+ */
+#define LOV_MAGIC_MAGIC 0x0BD0
+#define LOV_MAGIC_MASK  0xFFFF
+
+#define LOV_MAGIC_V1		(0x0BD10000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_JOIN_V1	(0x0BD20000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_V3		(0x0BD30000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_MIGRATE	(0x0BD40000 | LOV_MAGIC_MAGIC)
+/* reserved for specifying OSTs */
+#define LOV_MAGIC_SPECIFIC	(0x0BD50000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC		LOV_MAGIC_V1
 
 /*
  * magic for fully defined striping
@@ -1498,14 +1493,6 @@ enum obdo_flags {
 #define LOV_MAGIC_V1_DEF  0x0CD10BD0
 #define LOV_MAGIC_V3_DEF  0x0CD30BD0
 
-#define LOV_PATTERN_RAID0	0x001   /* stripes are used round-robin */
-#define LOV_PATTERN_RAID1	0x002   /* stripes are mirrors of each other */
-#define LOV_PATTERN_FIRST	0x100   /* first stripe is not in round-robin */
-#define LOV_PATTERN_CMOBD	0x200
-
-#define LOV_PATTERN_F_MASK	0xffff0000
-#define LOV_PATTERN_F_RELEASED	0x80000000 /* HSM released file */
-
 #define lov_pattern(pattern)		(pattern & ~LOV_PATTERN_F_MASK)
 #define lov_pattern_flags(pattern)	(pattern & LOV_PATTERN_F_MASK)
 
@@ -1569,25 +1556,25 @@ static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
 	oi->oi.oi_id = oid;
 }
 
-static inline __u64 lmm_oi_id(struct ost_id *oi)
+static inline __u64 lmm_oi_id(const struct ost_id *oi)
 {
 	return oi->oi.oi_id;
 }
 
-static inline __u64 lmm_oi_seq(struct ost_id *oi)
+static inline __u64 lmm_oi_seq(const struct ost_id *oi)
 {
 	return oi->oi.oi_seq;
 }
 
 static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
-				    struct ost_id *src_oi)
+				    const struct ost_id *src_oi)
 {
 	dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
 	dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
 }
 
 static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
-				    struct ost_id *src_oi)
+				    const struct ost_id *src_oi)
 {
 	dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
 	dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
@@ -1610,6 +1597,7 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
 #define XATTR_NAME_LOV	  "trusted.lov"
 #define XATTR_NAME_LMA	  "trusted.lma"
 #define XATTR_NAME_LMV	  "trusted.lmv"
+#define XATTR_NAME_DEFAULT_LMV	"trusted.dmv"
 #define XATTR_NAME_LINK	 "trusted.link"
 #define XATTR_NAME_FID	  "trusted.fid"
 #define XATTR_NAME_VERSION      "trusted.version"
@@ -1625,7 +1613,7 @@ struct lov_mds_md_v3 {	    /* LOV EA mds/wire data (little-endian) */
 	/* lmm_stripe_count used to be __u32 */
 	__u16 lmm_stripe_count;   /* num stripes in use for this object */
 	__u16 lmm_layout_gen;     /* layout generation number */
-	char  lmm_pool_name[LOV_MAXPOOLNAME]; /* must be 32bit aligned */
+	char  lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* must be 32bit aligned */
 	struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 };
 
@@ -1727,6 +1715,8 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
 #define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
 #define OBD_MD_FLRELEASED    (0x0020000000000000ULL) /* file released */
 
+#define OBD_MD_DEFAULT_MEA   (0x0040000000000000ULL) /* default MEA */
+
 #define OBD_MD_FLGETATTR (OBD_MD_FLID    | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
 			  OBD_MD_FLCTIME | OBD_MD_FLSIZE  | OBD_MD_FLBLKSZ | \
 			  OBD_MD_FLMODE  | OBD_MD_FLTYPE  | OBD_MD_FLUID   | \
@@ -1782,7 +1772,7 @@ void lustre_swab_obd_statfs(struct obd_statfs *os);
 				      * it to sync quickly
 				      */
 
-#define OBD_OBJECT_EOF 0xffffffffffffffffULL
+#define OBD_OBJECT_EOF	LUSTRE_EOF
 
 #define OST_MIN_PRECREATE 32
 #define OST_MAX_PRECREATE 20000
@@ -1806,9 +1796,9 @@ void lustre_swab_obd_ioobj(struct obd_ioobj *ioo);
 
 /* multiple of 8 bytes => can array */
 struct niobuf_remote {
-	__u64 offset;
-	__u32 len;
-	__u32 flags;
+	__u64	rnb_offset;
+	__u32	rnb_len;
+	__u32	rnb_flags;
 };
 
 void lustre_swab_niobuf_remote(struct niobuf_remote *nbr);
@@ -1878,12 +1868,6 @@ struct obd_quotactl {
 
 void lustre_swab_obd_quotactl(struct obd_quotactl *q);
 
-#define Q_QUOTACHECK	0x800100 /* deprecated as of 2.4 */
-#define Q_INITQUOTA	0x800101 /* deprecated as of 2.4  */
-#define Q_GETOINFO	0x800102 /* get obd quota info */
-#define Q_GETOQUOTA	0x800103 /* get obd quotas */
-#define Q_FINVALIDATE	0x800104 /* deprecated as of 2.4 */
-
 #define Q_COPY(out, in, member) (out)->member = (in)->member
 
 #define QCTL_COPY(out, in)		\
@@ -1946,8 +1930,8 @@ enum mds_cmd {
 	MDS_DISCONNECT		= 39,
 	MDS_GETSTATUS		= 40,
 	MDS_STATFS		= 41,
-	MDS_PIN			= 42,
-	MDS_UNPIN		= 43,
+	MDS_PIN			= 42, /* obsolete, never used in a release */
+	MDS_UNPIN		= 43, /* obsolete, never used in a release */
 	MDS_SYNC		= 44,
 	MDS_DONE_WRITING	= 45,
 	MDS_SET_INFO		= 46,
@@ -1956,7 +1940,7 @@ enum mds_cmd {
 	MDS_GETXATTR		= 49,
 	MDS_SETXATTR		= 50, /* obsolete, now it's MDS_REINT op */
 	MDS_WRITEPAGE		= 51,
-	MDS_IS_SUBDIR		= 52,
+	MDS_IS_SUBDIR		= 52, /* obsolete, never used in a release */
 	MDS_GET_INFO		= 53,
 	MDS_HSM_STATE_GET	= 54,
 	MDS_HSM_STATE_SET	= 55,
@@ -1984,7 +1968,7 @@ enum mdt_reint_cmd {
 	REINT_OPEN     = 6,
 	REINT_SETXATTR = 7,
 	REINT_RMENTRY  = 8,
-/*      REINT_WRITE    = 9, */
+	REINT_MIGRATE  = 9,
 	REINT_MAX
 };
 
@@ -2003,6 +1987,7 @@ void lustre_swab_generic_32s(__u32 *val);
 #define DISP_OPEN_LOCK       0x02000000
 #define DISP_OPEN_LEASE      0x04000000
 #define DISP_OPEN_STRIPE     0x08000000
+#define DISP_OPEN_DENY		0x10000000
 
 /* INODE LOCK PARTS */
 #define MDS_INODELOCK_LOOKUP 0x000001	/* For namespace, dentry etc, and also
@@ -2028,7 +2013,7 @@ void lustre_swab_generic_32s(__u32 *val);
 
 #define MDS_INODELOCK_MAXSHIFT 5
 /* This FULL lock is useful to take on unlink sort of operations */
-#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
+#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
 
 /* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
  * but was moved into name[1] along with the OID to avoid consuming the
@@ -2108,43 +2093,43 @@ enum md_transient_state {
 };
 
 struct mdt_body {
-	struct lu_fid  fid1;
-	struct lu_fid  fid2;
-	struct lustre_handle handle;
-	__u64	  valid;
-	__u64	  size;   /* Offset, in the case of MDS_READPAGE */
-	__s64	  mtime;
-	__s64	  atime;
-	__s64	  ctime;
-	__u64	  blocks; /* XID, in the case of MDS_READPAGE */
-	__u64	  ioepoch;
-	__u64	  t_state; /* transient file state defined in
-			    * enum md_transient_state
-			    * was "ino" until 2.4.0
-			    */
-	__u32	  fsuid;
-	__u32	  fsgid;
-	__u32	  capability;
-	__u32	  mode;
-	__u32	  uid;
-	__u32	  gid;
-	__u32	  flags; /* from vfs for pin/unpin, LUSTRE_BFLAG close */
-	__u32	  rdev;
-	__u32	  nlink; /* #bytes to read in the case of MDS_READPAGE */
-	__u32	  unused2; /* was "generation" until 2.4.0 */
-	__u32	  suppgid;
-	__u32	  eadatasize;
-	__u32	  aclsize;
-	__u32	  max_mdsize;
-	__u32	  max_cookiesize;
-	__u32	  uid_h; /* high 32-bits of uid, for FUID */
-	__u32	  gid_h; /* high 32-bits of gid, for FUID */
-	__u32	  padding_5; /* also fix lustre_swab_mdt_body */
-	__u64	  padding_6;
-	__u64	  padding_7;
-	__u64	  padding_8;
-	__u64	  padding_9;
-	__u64	  padding_10;
+	struct lu_fid mbo_fid1;
+	struct lu_fid mbo_fid2;
+	struct lustre_handle mbo_handle;
+	__u64	mbo_valid;
+	__u64	mbo_size;	/* Offset, in the case of MDS_READPAGE */
+	__s64	mbo_mtime;
+	__s64	mbo_atime;
+	__s64	mbo_ctime;
+	__u64	mbo_blocks;	/* XID, in the case of MDS_READPAGE */
+	__u64	mbo_ioepoch;
+	__u64	mbo_t_state;	/* transient file state defined in
+				 * enum md_transient_state
+				 * was "ino" until 2.4.0
+				 */
+	__u32	mbo_fsuid;
+	__u32	mbo_fsgid;
+	__u32	mbo_capability;
+	__u32	mbo_mode;
+	__u32	mbo_uid;
+	__u32	mbo_gid;
+	__u32	mbo_flags;
+	__u32	mbo_rdev;
+	__u32	mbo_nlink;	/* #bytes to read in the case of MDS_READPAGE */
+	__u32	mbo_unused2;	/* was "generation" until 2.4.0 */
+	__u32	mbo_suppgid;
+	__u32	mbo_eadatasize;
+	__u32	mbo_aclsize;
+	__u32	mbo_max_mdsize;
+	__u32	mbo_max_cookiesize;
+	__u32	mbo_uid_h;	/* high 32-bits of uid, for FUID */
+	__u32	mbo_gid_h;	/* high 32-bits of gid, for FUID */
+	__u32	mbo_padding_5;	/* also fix lustre_swab_mdt_body */
+	__u64	mbo_padding_6;
+	__u64	mbo_padding_7;
+	__u64	mbo_padding_8;
+	__u64	mbo_padding_9;
+	__u64	mbo_padding_10;
 }; /* 216 */
 
 void lustre_swab_mdt_body(struct mdt_body *b);
@@ -2263,6 +2248,11 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa);
 					      */
 #define MDS_OPEN_RELEASE   02000000000000ULL /* Open the file for HSM release */
 
+#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |	\
+			      MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |	\
+			      MDS_OPEN_BY_FID | MDS_OPEN_LEASE |	\
+			      MDS_OPEN_RELEASE)
+
 enum mds_op_bias {
 	MDS_CHECK_SPLIT		= 1 << 0,
 	MDS_CROSS_REF		= 1 << 1,
@@ -2277,6 +2267,7 @@ enum mds_op_bias {
 	MDS_CREATE_VOLATILE	= 1 << 10,
 	MDS_OWNEROVERRIDE	= 1 << 11,
 	MDS_HSM_RELEASE		= 1 << 12,
+	MDS_RENAME_MIGRATE	= BIT(13),
 };
 
 /* instance of mdt_reint_rec */
@@ -2472,7 +2463,7 @@ struct lmv_desc {
 	__u32 ld_tgt_count;		/* how many MDS's */
 	__u32 ld_active_tgt_count;	 /* how many active */
 	__u32 ld_default_stripe_count;     /* how many objects are used */
-	__u32 ld_pattern;		  /* default MEA_MAGIC_* */
+	__u32 ld_pattern;		  /* default hash pattern */
 	__u64 ld_default_hash_size;
 	__u64 ld_padding_1;		/* also fix lustre_swab_lmv_desc */
 	__u32 ld_padding_2;		/* also fix lustre_swab_lmv_desc */
@@ -2482,23 +2473,129 @@ struct lmv_desc {
 	struct obd_uuid ld_uuid;
 };
 
-/* TODO: lmv_stripe_md should contain mds capabilities for all slave fids */
-struct lmv_stripe_md {
-	__u32	 mea_magic;
-	__u32	 mea_count;
-	__u32	 mea_master;
-	__u32	 mea_padding;
-	char	  mea_pool_name[LOV_MAXPOOLNAME];
-	struct lu_fid mea_ids[0];
+/* LMV layout EA, and it will be stored both in master and slave object */
+struct lmv_mds_md_v1 {
+	__u32 lmv_magic;
+	__u32 lmv_stripe_count;
+	__u32 lmv_master_mdt_index;	/* On master object, it is master
+					 * MDT index, on slave object, it
+					 * is stripe index of the slave obj
+					 */
+	__u32 lmv_hash_type;		/* dir stripe policy, i.e. indicate
+					 * which hash function to be used,
+					 * Note: only lower 16 bits is being
+					 * used for now. Higher 16 bits will
+					 * be used to mark the object status,
+					 * for example migrating or dead.
+					 */
+	__u32 lmv_layout_version;	/* Used for directory restriping */
+	__u32 lmv_padding1;
+	__u64 lmv_padding2;
+	__u64 lmv_padding3;
+	char lmv_pool_name[LOV_MAXPOOLNAME + 1];/* pool name */
+	struct lu_fid lmv_stripe_fids[0];	/* FIDs for each stripe */
 };
 
-#define MEA_MAGIC_LAST_CHAR      0xb2221ca1
-#define MEA_MAGIC_ALL_CHARS      0xb222a11c
-#define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
+#define LMV_MAGIC_V1	 0x0CD20CD0	/* normal stripe lmv magic */
+#define LMV_MAGIC	 LMV_MAGIC_V1
+
+/* #define LMV_USER_MAGIC 0x0CD30CD0 */
+#define LMV_MAGIC_STRIPE 0x0CD40CD0	/* magic for dir sub_stripe */
+
+/*
+ *Right now only the lower part(0-16bits) of lmv_hash_type is being used,
+ * and the higher part will be the flag to indicate the status of object,
+ * for example the object is being migrated. And the hash function
+ * might be interpreted differently with different flags.
+ */
+#define LMV_HASH_TYPE_MASK		0x0000ffff
+
+#define LMV_HASH_FLAG_MIGRATION		0x80000000
+#define LMV_HASH_FLAG_DEAD		0x40000000
 
-#define MAX_HASH_SIZE_32	 0x7fffffffUL
-#define MAX_HASH_SIZE	    0x7fffffffffffffffULL
-#define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
+/**
+ * The FNV-1a hash algorithm is as follows:
+ *     hash = FNV_offset_basis
+ *     for each octet_of_data to be hashed
+ *             hash = hash XOR octet_of_data
+ *             hash = hash × FNV_prime
+ *     return hash
+ * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function#FNV-1a_hash
+ *
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html#FNV-reference-source
+ * FNV_prime is 2^40 + 2^8 + 0xb3 = 0x100000001b3ULL
+ **/
+#define LUSTRE_FNV_1A_64_PRIME		0x100000001b3ULL
+#define LUSTRE_FNV_1A_64_OFFSET_BIAS	0xcbf29ce484222325ULL
+static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size)
+{
+	__u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS;
+	const unsigned char *p = buf;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		hash ^= p[i];
+		hash *= LUSTRE_FNV_1A_64_PRIME;
+	}
+
+	return hash;
+}
+
+union lmv_mds_md {
+	__u32			lmv_magic;
+	struct lmv_mds_md_v1	lmv_md_v1;
+	struct lmv_user_md	lmv_user_md;
+};
+
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
+
+static inline ssize_t lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
+{
+	ssize_t len = -EINVAL;
+
+	switch (lmm_magic) {
+	case LMV_MAGIC_V1: {
+		struct lmv_mds_md_v1 *lmm1;
+
+		len = sizeof(*lmm1);
+		len += stripe_count * sizeof(lmm1->lmv_stripe_fids[0]);
+		break; }
+	default:
+		break;
+	}
+	return len;
+}
+
+static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm)
+{
+	switch (le32_to_cpu(lmm->lmv_magic)) {
+	case LMV_MAGIC_V1:
+		return le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
+	case LMV_USER_MAGIC:
+		return le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
+	default:
+		return -EINVAL;
+	}
+}
+
+static inline int lmv_mds_md_stripe_count_set(union lmv_mds_md *lmm,
+					      unsigned int stripe_count)
+{
+	int rc = 0;
+
+	switch (le32_to_cpu(lmm->lmv_magic)) {
+	case LMV_MAGIC_V1:
+		lmm->lmv_md_v1.lmv_stripe_count = cpu_to_le32(stripe_count);
+		break;
+	case LMV_USER_MAGIC:
+		lmm->lmv_user_md.lum_stripe_count = cpu_to_le32(stripe_count);
+		break;
+	default:
+		rc = -EINVAL;
+		break;
+	}
+	return rc;
+}
 
 enum fld_rpc_opc {
 	FLD_QUERY	= 900,
@@ -2582,8 +2679,8 @@ struct ldlm_res_id {
 #define PLDLMRES(res)	(res)->lr_name.name[0], (res)->lr_name.name[1], \
 			(res)->lr_name.name[2], (res)->lr_name.name[3]
 
-static inline int ldlm_res_eq(const struct ldlm_res_id *res0,
-			      const struct ldlm_res_id *res1)
+static inline bool ldlm_res_eq(const struct ldlm_res_id *res0,
+			       const struct ldlm_res_id *res1)
 {
 	return !memcmp(res0, res1, sizeof(*res0));
 }
@@ -2620,17 +2717,15 @@ struct ldlm_extent {
 	__u64 gid;
 };
 
-#define LDLM_GID_ANY ((__u64)-1)
-
-static inline int ldlm_extent_overlap(struct ldlm_extent *ex1,
-				      struct ldlm_extent *ex2)
+static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
+				      const struct ldlm_extent *ex2)
 {
 	return (ex1->start <= ex2->end) && (ex2->start <= ex1->end);
 }
 
 /* check if @ex1 contains @ex2 */
-static inline int ldlm_extent_contain(struct ldlm_extent *ex1,
-				      struct ldlm_extent *ex2)
+static inline int ldlm_extent_contain(const struct ldlm_extent *ex1,
+				      const struct ldlm_extent *ex2)
 {
 	return (ex1->start <= ex2->start) && (ex1->end >= ex2->end);
 }
@@ -2833,7 +2928,29 @@ enum obd_cmd {
 };
 #define OBD_FIRST_OPC OBD_PING
 
-/* catalog of log objects */
+/**
+ * llog contexts indices.
+ *
+ * There is compatibility problem with indexes below, they are not
+ * continuous and must keep their numbers for compatibility needs.
+ * See LU-5218 for details.
+ */
+enum llog_ctxt_id {
+	LLOG_CONFIG_ORIG_CTXT  =  0,
+	LLOG_CONFIG_REPL_CTXT = 1,
+	LLOG_MDS_OST_ORIG_CTXT = 2,
+	LLOG_MDS_OST_REPL_CTXT = 3, /* kept just to avoid re-assignment */
+	LLOG_SIZE_ORIG_CTXT = 4,
+	LLOG_SIZE_REPL_CTXT = 5,
+	LLOG_TEST_ORIG_CTXT = 8,
+	LLOG_TEST_REPL_CTXT = 9, /* kept just to avoid re-assignment */
+	LLOG_CHANGELOG_ORIG_CTXT = 12, /**< changelog generation on mdd */
+	LLOG_CHANGELOG_REPL_CTXT = 13, /**< changelog access on clients */
+	/* for multiple changelog consumers */
+	LLOG_CHANGELOG_USER_ORIG_CTXT = 14,
+	LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */
+	LLOG_MAX_CTXTS
+};
 
 /** Identifier for a single log object */
 struct llog_logid {
@@ -2939,7 +3056,7 @@ struct llog_setattr64_rec {
 	__u32			lsr_uid_h;
 	__u32			lsr_gid;
 	__u32			lsr_gid_h;
-	__u64			lsr_padding;
+	__u64			lsr_valid;
 	struct llog_rec_tail    lsr_tail;
 } __packed;
 
@@ -2963,15 +3080,9 @@ struct changelog_setinfo {
 
 /** changelog record */
 struct llog_changelog_rec {
-	struct llog_rec_hdr  cr_hdr;
-	struct changelog_rec cr;
-	struct llog_rec_tail cr_tail; /**< for_sizezof_only */
-} __packed;
-
-struct llog_changelog_ext_rec {
-	struct llog_rec_hdr      cr_hdr;
-	struct changelog_ext_rec cr;
-	struct llog_rec_tail     cr_tail; /**< for_sizezof_only */
+	struct llog_rec_hdr	cr_hdr;
+	struct changelog_rec	cr;		/**< Variable length field */
+	struct llog_rec_tail	cr_do_not_use;	/**< for_sizezof_only */
 } __packed;
 
 struct llog_changelog_user_rec {
@@ -2990,7 +3101,7 @@ enum agent_req_status {
 	ARS_SUCCEED,
 };
 
-static inline char *agent_req_status2name(enum agent_req_status ars)
+static inline const char *agent_req_status2name(const enum agent_req_status ars)
 {
 	switch (ars) {
 	case ARS_WAITING:
@@ -3056,6 +3167,9 @@ enum llog_flag {
 	LLOG_F_ZAP_WHEN_EMPTY	= 0x1,
 	LLOG_F_IS_CAT		= 0x2,
 	LLOG_F_IS_PLAIN		= 0x4,
+	LLOG_F_EXT_JOBID        = BIT(3),
+
+	LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID,
 };
 
 struct llog_log_hdr {
@@ -3068,8 +3182,8 @@ struct llog_log_hdr {
 	__u32		   llh_cat_idx;
 	/* for a catalog the first plain slot is next to it */
 	struct obd_uuid	 llh_tgtuuid;
-	__u32		   llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32) - 23];
-	__u32		   llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+	__u32		   llh_reserved[LLOG_HEADER_SIZE / sizeof(__u32) - 23];
+	__u32		   llh_bitmap[LLOG_BITMAP_BYTES / sizeof(__u32)];
 	struct llog_rec_tail    llh_tail;
 } __packed;
 
@@ -3166,7 +3280,7 @@ struct obdo {
 #define o_cksum   o_nlink
 #define o_grant_used o_data_version
 
-static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
+static inline void lustre_set_wire_obdo(const struct obd_connect_data *ocd,
 					struct obdo *wobdo,
 					const struct obdo *lobdo)
 {
@@ -3185,7 +3299,7 @@ static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
 	}
 }
 
-static inline void lustre_get_wire_obdo(struct obd_connect_data *ocd,
+static inline void lustre_get_wire_obdo(const struct obd_connect_data *ocd,
 					struct obdo *lobdo,
 					const struct obdo *wobdo)
 {
@@ -3284,17 +3398,17 @@ void lustre_swab_lustre_capa(struct lustre_capa *c);
 
 /** lustre_capa::lc_opc */
 enum {
-	CAPA_OPC_BODY_WRITE   = 1<<0,  /**< write object data */
-	CAPA_OPC_BODY_READ    = 1<<1,  /**< read object data */
-	CAPA_OPC_INDEX_LOOKUP = 1<<2,  /**< lookup object fid */
-	CAPA_OPC_INDEX_INSERT = 1<<3,  /**< insert object fid */
-	CAPA_OPC_INDEX_DELETE = 1<<4,  /**< delete object fid */
-	CAPA_OPC_OSS_WRITE    = 1<<5,  /**< write oss object data */
-	CAPA_OPC_OSS_READ     = 1<<6,  /**< read oss object data */
-	CAPA_OPC_OSS_TRUNC    = 1<<7,  /**< truncate oss object */
-	CAPA_OPC_OSS_DESTROY  = 1<<8,  /**< destroy oss object */
-	CAPA_OPC_META_WRITE   = 1<<9,  /**< write object meta data */
-	CAPA_OPC_META_READ    = 1<<10, /**< read object meta data */
+	CAPA_OPC_BODY_WRITE   = 1 << 0,  /**< write object data */
+	CAPA_OPC_BODY_READ    = 1 << 1,  /**< read object data */
+	CAPA_OPC_INDEX_LOOKUP = 1 << 2,  /**< lookup object fid */
+	CAPA_OPC_INDEX_INSERT = 1 << 3,  /**< insert object fid */
+	CAPA_OPC_INDEX_DELETE = 1 << 4,  /**< delete object fid */
+	CAPA_OPC_OSS_WRITE    = 1 << 5,  /**< write oss object data */
+	CAPA_OPC_OSS_READ     = 1 << 6,  /**< read oss object data */
+	CAPA_OPC_OSS_TRUNC    = 1 << 7,  /**< truncate oss object */
+	CAPA_OPC_OSS_DESTROY  = 1 << 8,  /**< destroy oss object */
+	CAPA_OPC_META_WRITE   = 1 << 9,  /**< write object meta data */
+	CAPA_OPC_META_READ    = 1 << 10, /**< read object meta data */
 };
 
 #define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
@@ -3346,6 +3460,14 @@ struct getinfo_fid2path {
 
 void lustre_swab_fid2path(struct getinfo_fid2path *gf);
 
+/** path2parent request/reply structures */
+struct getparent {
+	struct lu_fid	gp_fid;		/**< parent FID */
+	__u32		gp_linkno;	/**< hardlink number */
+	__u32		gp_name_size;	/**< size of the name field */
+	char		gp_name[0];	/**< zero-terminated link name */
+} __packed;
+
 enum {
 	LAYOUT_INTENT_ACCESS    = 0,
 	LAYOUT_INTENT_READ      = 1,
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
new file mode 100644
index 000000000000..f3d7c94c3b50
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_ioctl.h
@@ -0,0 +1,412 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2015, Intel Corporation.
+ */
+#ifndef LUSTRE_IOCTL_H_
+#define LUSTRE_IOCTL_H_
+
+#include <linux/types.h>
+#include "../../../include/linux/libcfs/libcfs.h"
+#include "lustre_idl.h"
+
+#ifdef __KERNEL__
+# include <linux/ioctl.h>
+# include <linux/string.h>
+# include "../obd_support.h"
+#else /* __KERNEL__ */
+# include <malloc.h>
+# include <string.h>
+#include <libcfs/util/ioctl.h>
+#endif /* !__KERNEL__ */
+
+#if !defined(__KERNEL__) && !defined(LUSTRE_UTILS)
+# error This file is for Lustre internal use only.
+#endif
+
+enum md_echo_cmd {
+	ECHO_MD_CREATE		= 1, /* Open/Create file on MDT */
+	ECHO_MD_MKDIR		= 2, /* Mkdir on MDT */
+	ECHO_MD_DESTROY		= 3, /* Unlink file on MDT */
+	ECHO_MD_RMDIR		= 4, /* Rmdir on MDT */
+	ECHO_MD_LOOKUP		= 5, /* Lookup on MDT */
+	ECHO_MD_GETATTR		= 6, /* Getattr on MDT */
+	ECHO_MD_SETATTR		= 7, /* Setattr on MDT */
+	ECHO_MD_ALLOC_FID	= 8, /* Get FIDs from MDT */
+};
+
+#define OBD_DEV_ID 1
+#define OBD_DEV_NAME "obd"
+#define OBD_DEV_PATH "/dev/" OBD_DEV_NAME
+#define OBD_DEV_MAJOR 10
+#define OBD_DEV_MINOR 241
+
+#define OBD_IOCTL_VERSION	0x00010004
+#define OBD_DEV_BY_DEVNAME	0xffffd0de
+#define OBD_MAX_IOCTL_BUFFER	CONFIG_LUSTRE_OBD_MAX_IOCTL_BUFFER
+
+struct obd_ioctl_data {
+	__u32		ioc_len;
+	__u32		ioc_version;
+
+	union {
+		__u64	ioc_cookie;
+		__u64	ioc_u64_1;
+	};
+	union {
+		__u32	ioc_conn1;
+		__u32	ioc_u32_1;
+	};
+	union {
+		__u32	ioc_conn2;
+		__u32	ioc_u32_2;
+	};
+
+	struct obdo	ioc_obdo1;
+	struct obdo	ioc_obdo2;
+
+	__u64		ioc_count;
+	__u64		ioc_offset;
+	__u32		ioc_dev;
+	__u32		ioc_command;
+
+	__u64		ioc_nid;
+	__u32		ioc_nal;
+	__u32		ioc_type;
+
+	/* buffers the kernel will treat as user pointers */
+	__u32		ioc_plen1;
+	char __user    *ioc_pbuf1;
+	__u32		ioc_plen2;
+	char __user    *ioc_pbuf2;
+
+	/* inline buffers for various arguments */
+	__u32		ioc_inllen1;
+	char	       *ioc_inlbuf1;
+	__u32		ioc_inllen2;
+	char	       *ioc_inlbuf2;
+	__u32		ioc_inllen3;
+	char	       *ioc_inlbuf3;
+	__u32		ioc_inllen4;
+	char	       *ioc_inlbuf4;
+
+	char		ioc_bulk[0];
+};
+
+struct obd_ioctl_hdr {
+	__u32		ioc_len;
+	__u32		ioc_version;
+};
+
+static inline __u32 obd_ioctl_packlen(struct obd_ioctl_data *data)
+{
+	__u32 len = cfs_size_round(sizeof(*data));
+
+	len += cfs_size_round(data->ioc_inllen1);
+	len += cfs_size_round(data->ioc_inllen2);
+	len += cfs_size_round(data->ioc_inllen3);
+	len += cfs_size_round(data->ioc_inllen4);
+
+	return len;
+}
+
+static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
+{
+	if (data->ioc_len > (1 << 30)) {
+		CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
+		return 1;
+	}
+
+	if (data->ioc_inllen1 > (1 << 30)) {
+		CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
+		return 1;
+	}
+
+	if (data->ioc_inllen2 > (1 << 30)) {
+		CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
+		return 1;
+	}
+
+	if (data->ioc_inllen3 > (1 << 30)) {
+		CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
+		return 1;
+	}
+
+	if (data->ioc_inllen4 > (1 << 30)) {
+		CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
+		return 1;
+	}
+
+	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+		CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
+		return 1;
+	}
+
+	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+		CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
+		return 1;
+	}
+
+	if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
+		CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
+		return 1;
+	}
+
+	if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
+		CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
+		return 1;
+	}
+
+	if (data->ioc_pbuf1 && !data->ioc_plen1) {
+		CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
+		return 1;
+	}
+
+	if (data->ioc_pbuf2 && !data->ioc_plen2) {
+		CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
+		return 1;
+	}
+
+	if (!data->ioc_pbuf1 && data->ioc_plen1) {
+		CERROR("OBD ioctl: plen1 set but NULL pointer\n");
+		return 1;
+	}
+
+	if (!data->ioc_pbuf2 && data->ioc_plen2) {
+		CERROR("OBD ioctl: plen2 set but NULL pointer\n");
+		return 1;
+	}
+
+	if (obd_ioctl_packlen(data) > data->ioc_len) {
+		CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
+		       obd_ioctl_packlen(data), data->ioc_len);
+		return 1;
+	}
+
+	return 0;
+}
+
+#ifdef __KERNEL__
+
+int obd_ioctl_getdata(char **buf, int *len, void __user *arg);
+int obd_ioctl_popdata(void __user *arg, void *data, int len);
+
+static inline void obd_ioctl_freedata(char *buf, size_t len)
+{
+	kvfree(buf);
+}
+
+#else /* __KERNEL__ */
+
+static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
+				 int max_len)
+{
+	char *ptr;
+	struct obd_ioctl_data *overlay;
+
+	data->ioc_len = obd_ioctl_packlen(data);
+	data->ioc_version = OBD_IOCTL_VERSION;
+
+	if (*pbuf && data->ioc_len > max_len) {
+		fprintf(stderr, "pbuf = %p, ioc_len = %u, max_len = %d\n",
+			*pbuf, data->ioc_len, max_len);
+		return -EINVAL;
+	}
+
+	if (!*pbuf)
+		*pbuf = malloc(data->ioc_len);
+
+	if (!*pbuf)
+		return -ENOMEM;
+
+	overlay = (struct obd_ioctl_data *)*pbuf;
+	memcpy(*pbuf, data, sizeof(*data));
+
+	ptr = overlay->ioc_bulk;
+	if (data->ioc_inlbuf1)
+		LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+
+	if (data->ioc_inlbuf2)
+		LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+
+	if (data->ioc_inlbuf3)
+		LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+
+	if (data->ioc_inlbuf4)
+		LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+
+	if (obd_ioctl_is_invalid(overlay)) {
+		fprintf(stderr, "invalid ioctl data: ioc_len = %u, max_len = %d\n",
+			data->ioc_len, max_len);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int
+obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, int max_len)
+{
+	char *ptr;
+	struct obd_ioctl_data *overlay;
+
+	if (!pbuf)
+		return 1;
+
+	overlay = (struct obd_ioctl_data *)pbuf;
+
+	/* Preserve the caller's buffer pointers */
+	overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
+	overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
+	overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
+	overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
+
+	memcpy(data, pbuf, sizeof(*data));
+
+	ptr = overlay->ioc_bulk;
+	if (data->ioc_inlbuf1)
+		LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+
+	if (data->ioc_inlbuf2)
+		LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+
+	if (data->ioc_inlbuf3)
+		LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+
+	if (data->ioc_inlbuf4)
+		LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+
+	return 0;
+}
+
+#endif /* !__KERNEL__ */
+
+/*
+ * OBD_IOC_DATA_TYPE is only for compatibility reasons with older
+ * Linux Lustre user tools. New ioctls should NOT use this macro as
+ * the ioctl "size". Instead the ioctl should get a "size" argument
+ * which is the actual data type used by the ioctl, to ensure the
+ * ioctl interface is versioned correctly.
+ */
+#define OBD_IOC_DATA_TYPE	long
+
+/*	IOC_LDLM_TEST		_IOWR('f', 40, long) */
+/*	IOC_LDLM_DUMP		_IOWR('f', 41, long) */
+/*	IOC_LDLM_REGRESS_START	_IOWR('f', 42, long) */
+/*	IOC_LDLM_REGRESS_STOP	_IOWR('f', 43, long) */
+
+#define OBD_IOC_CREATE		_IOWR('f', 101, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY		_IOW('f', 104, OBD_IOC_DATA_TYPE)
+/*	OBD_IOC_PREALLOCATE	_IOWR('f', 105, OBD_IOC_DATA_TYPE) */
+
+#define OBD_IOC_SETATTR		_IOW('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETATTR		_IOWR('f', 108, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ		_IOWR('f', 109, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_WRITE		_IOWR('f', 110, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_STATFS		_IOWR('f', 113, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC		_IOW('f', 114, OBD_IOC_DATA_TYPE)
+/*	OBD_IOC_READ2		_IOWR('f', 115, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_FORMAT		_IOWR('f', 116, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_PARTITION	_IOWR('f', 117, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_COPY		_IOWR('f', 120, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_MIGR		_IOWR('f', 121, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_PUNCH		_IOWR('f', 122, OBD_IOC_DATA_TYPE) */
+
+/*	OBD_IOC_MODULE_DEBUG	_IOWR('f', 124, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_BRW_READ	_IOWR('f', 125, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_WRITE	_IOWR('f', 126, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NAME2DEV	_IOWR('f', 127, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_UUID2DEV	_IOWR('f', 130, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETNAME		_IOWR('f', 131, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETMDNAME	_IOR('f', 131, char[MAX_OBD_NAME])
+#define OBD_IOC_GETDTNAME	OBD_IOC_GETNAME
+#define OBD_IOC_LOV_GET_CONFIG	_IOWR('f', 132, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER	_IOW('f', 133, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PING_TARGET	_IOW('f', 136, OBD_IOC_DATA_TYPE)
+
+/*	OBD_IOC_DEC_FS_USE_COUNT _IO('f', 139) */
+#define OBD_IOC_NO_TRANSNO	_IOW('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY	_IOW('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY	_IOR('f', 142, OBD_IOC_DATA_TYPE)
+/*	OBD_IOC_ROOT_SQUASH	_IOWR('f', 143, OBD_IOC_DATA_TYPE) */
+#define OBD_GET_VERSION		_IOWR('f', 144, OBD_IOC_DATA_TYPE)
+/*	OBD_IOC_GSS_SUPPORT	_IOWR('f', 145, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_CLOSE_UUID	_IOWR('f', 147, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_CHANGELOG_SEND	_IOW('f', 148, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETDEVICE	_IOWR('f', 149, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FID2PATH	_IOWR('f', 150, OBD_IOC_DATA_TYPE)
+/*	lustre/lustre_user.h	151-153 */
+/*	OBD_IOC_LOV_SETSTRIPE	154 LL_IOC_LOV_SETSTRIPE */
+/*	OBD_IOC_LOV_GETSTRIPE	155 LL_IOC_LOV_GETSTRIPE */
+/*	OBD_IOC_LOV_SETEA	156 LL_IOC_LOV_SETEA */
+/*	lustre/lustre_user.h	157-159 */
+#define	OBD_IOC_QUOTACHECK	_IOW('f', 160, int)
+#define	OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *)
+#define OBD_IOC_QUOTACTL	_IOWR('f', 162, struct if_quotactl)
+/*	lustre/lustre_user.h	163-176 */
+#define OBD_IOC_CHANGELOG_REG	_IOW('f', 177, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_DEREG	_IOW('f', 178, struct obd_ioctl_data)
+#define OBD_IOC_CHANGELOG_CLEAR	_IOW('f', 179, struct obd_ioctl_data)
+/*	OBD_IOC_RECORD		_IOWR('f', 180, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_ENDRECORD	_IOWR('f', 181, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_PARSE		_IOWR('f', 182, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_DORECORD	_IOWR('f', 183, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_PROCESS_CFG	_IOWR('f', 184, OBD_IOC_DATA_TYPE)
+/*	OBD_IOC_DUMP_LOG	_IOWR('f', 185, OBD_IOC_DATA_TYPE) */
+/*	OBD_IOC_CLEAR_LOG	_IOWR('f', 186, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_PARAM		_IOW('f', 187, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_POOL		_IOWR('f', 188, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_REPLACE_NIDS	_IOWR('f', 189, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CATLOGLIST	_IOWR('f', 190, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_INFO	_IOWR('f', 191, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_PRINT	_IOWR('f', 192, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CANCEL	_IOWR('f', 193, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_REMOVE	_IOWR('f', 194, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CHECK	_IOWR('f', 195, OBD_IOC_DATA_TYPE)
+/*	OBD_IOC_LLOG_CATINFO	_IOWR('f', 196, OBD_IOC_DATA_TYPE) */
+#define OBD_IOC_NODEMAP		_IOWR('f', 197, OBD_IOC_DATA_TYPE)
+
+/*	ECHO_IOC_GET_STRIPE	_IOWR('f', 200, OBD_IOC_DATA_TYPE) */
+/*	ECHO_IOC_SET_STRIPE	_IOWR('f', 201, OBD_IOC_DATA_TYPE) */
+/*	ECHO_IOC_ENQUEUE	_IOWR('f', 202, OBD_IOC_DATA_TYPE) */
+/*	ECHO_IOC_CANCEL		_IOWR('f', 203, OBD_IOC_DATA_TYPE) */
+
+#define OBD_IOC_GET_OBJ_VERSION	_IOR('f', 210, OBD_IOC_DATA_TYPE)
+
+/*	lustre/lustre_user.h	212-217 */
+#define OBD_IOC_GET_MNTOPT	_IOW('f', 220, mntopt_t)
+#define OBD_IOC_ECHO_MD		_IOR('f', 221, struct obd_ioctl_data)
+#define OBD_IOC_ECHO_ALLOC_SEQ	_IOWR('f', 222, struct obd_ioctl_data)
+#define OBD_IOC_START_LFSCK	_IOWR('f', 230, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_STOP_LFSCK	_IOW('f', 231, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_QUERY_LFSCK	_IOR('f', 232, struct obd_ioctl_data)
+/*	lustre/lustre_user.h	240-249 */
+/*	LIBCFS_IOC_DEBUG_MASK	250 */
+
+#define IOC_OSC_SET_ACTIVE	_IOWR('h', 21, void *)
+
+#endif /* LUSTRE_IOCTL_H_ */
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index ef6f38ff359e..6fc985571cba 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -42,8 +42,35 @@
  * @{
  */
 
+#ifdef __KERNEL__
+# include <linux/quota.h>
+# include <linux/string.h> /* snprintf() */
+# include <linux/version.h>
+#else /* !__KERNEL__ */
+# define NEED_QUOTA_DEFS
+# include <stdio.h> /* snprintf() */
+# include <string.h>
+# include <sys/quota.h>
+# include <sys/stat.h>
+#endif /* __KERNEL__ */
 #include "ll_fiemap.h"
-#include "../linux/lustre_user.h"
+
+/*
+ * We need to always use 64bit version because the structure
+ * is shared across entire cluster where 32bit and 64bit machines
+ * are co-existing.
+ */
+#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
+typedef struct stat64   lstat_t;
+#define lstat_f  lstat64
+#else
+typedef struct stat     lstat_t;
+#define lstat_f  lstat
+#endif
+
+#define HAVE_LOV_USER_MDS_DATA
+
+#define LUSTRE_EOF 0xffffffffffffffffULL
 
 /* for statfs() */
 #define LL_SUPER_MAGIC 0x0BD00BD0
@@ -117,6 +144,11 @@ struct lu_fid {
 	__u32 f_ver;
 };
 
+static inline bool fid_is_zero(const struct lu_fid *fid)
+{
+	return !fid->f_seq && !fid->f_oid;
+}
+
 struct filter_fid {
 	struct lu_fid	ff_parent;  /* ff_parent.f_ver == file stripe number */
 };
@@ -167,7 +199,7 @@ struct lustre_mdt_attrs {
  */
 struct ost_id {
 	union {
-		struct ostid {
+		struct {
 			__u64	oi_id;
 			__u64	oi_seq;
 		} oi;
@@ -188,26 +220,20 @@ struct ost_id {
  * *STRIPE* - set/get lov_user_md
  * *INFO    - set/get lov_user_mds_data
  */
-/* see <lustre_lib.h> for ioctl numberss 101-150 */
+/*	lustre_ioctl.h			101-150 */
 #define LL_IOC_GETFLAGS		 _IOR('f', 151, long)
 #define LL_IOC_SETFLAGS		 _IOW('f', 152, long)
 #define LL_IOC_CLRFLAGS		 _IOW('f', 153, long)
-/* LL_IOC_LOV_SETSTRIPE: See also OBD_IOC_LOV_SETSTRIPE */
 #define LL_IOC_LOV_SETSTRIPE	    _IOW('f', 154, long)
-/* LL_IOC_LOV_GETSTRIPE: See also OBD_IOC_LOV_GETSTRIPE */
 #define LL_IOC_LOV_GETSTRIPE	    _IOW('f', 155, long)
-/* LL_IOC_LOV_SETEA: See also OBD_IOC_LOV_SETEA */
 #define LL_IOC_LOV_SETEA		_IOW('f', 156, long)
-#define LL_IOC_RECREATE_OBJ	     _IOW('f', 157, long)
-#define LL_IOC_RECREATE_FID	     _IOW('f', 157, struct lu_fid)
+/*	LL_IOC_RECREATE_OBJ		157 obsolete */
+/*	LL_IOC_RECREATE_FID		158 obsolete */
 #define LL_IOC_GROUP_LOCK	       _IOW('f', 158, long)
 #define LL_IOC_GROUP_UNLOCK	     _IOW('f', 159, long)
-/* LL_IOC_QUOTACHECK: See also OBD_IOC_QUOTACHECK */
-#define LL_IOC_QUOTACHECK	       _IOW('f', 160, int)
-/* LL_IOC_POLL_QUOTACHECK: See also OBD_IOC_POLL_QUOTACHECK */
-#define LL_IOC_POLL_QUOTACHECK	  _IOR('f', 161, struct if_quotacheck *)
-/* LL_IOC_QUOTACTL: See also OBD_IOC_QUOTACTL */
-#define LL_IOC_QUOTACTL		 _IOWR('f', 162, struct if_quotactl)
+/* #define LL_IOC_QUOTACHECK		160 OBD_IOC_QUOTACHECK */
+/* #define LL_IOC_POLL_QUOTACHECK	161 OBD_IOC_POLL_QUOTACHECK */
+/* #define LL_IOC_QUOTACTL		162 OBD_IOC_QUOTACTL */
 #define IOC_OBD_STATFS		  _IOWR('f', 164, struct obd_statfs *)
 #define IOC_LOV_GETINFO		 _IOWR('f', 165, struct lov_user_mds_data *)
 #define LL_IOC_FLUSHCTX		 _IOW('f', 166, long)
@@ -221,8 +247,7 @@ struct ost_id {
 #define LL_IOC_GET_CONNECT_FLAGS	_IOWR('f', 174, __u64 *)
 #define LL_IOC_GET_MDTIDX	       _IOR('f', 175, int)
 
-/* see <lustre_lib.h> for ioctl numbers 177-210 */
-
+/*	lustre_ioctl.h			177-210 */
 #define LL_IOC_HSM_STATE_GET		_IOR('f', 211, struct hsm_user_state)
 #define LL_IOC_HSM_STATE_SET		_IOW('f', 212, struct hsm_state_set)
 #define LL_IOC_HSM_CT_START		_IOW('f', 213, struct lustre_kernelcomm)
@@ -242,6 +267,17 @@ struct ost_id {
 #define LL_IOC_SET_LEASE		_IOWR('f', 243, long)
 #define LL_IOC_GET_LEASE		_IO('f', 244)
 #define LL_IOC_HSM_IMPORT		_IOWR('f', 245, struct hsm_user_import)
+#define LL_IOC_LMV_SET_DEFAULT_STRIPE	_IOWR('f', 246, struct lmv_user_md)
+#define LL_IOC_MIGRATE			_IOR('f', 247, int)
+#define LL_IOC_FID2MDTIDX		_IOWR('f', 248, struct lu_fid)
+#define LL_IOC_GETPARENT		_IOWR('f', 249, struct getparent)
+
+/* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
+enum ll_lease_type {
+	LL_LEASE_RDLCK	= 0x1,
+	LL_LEASE_WRLCK	= 0x2,
+	LL_LEASE_UNLCK	= 0x4,
+};
 
 #define LL_STATFS_LMV	   1
 #define LL_STATFS_LOV	   2
@@ -253,10 +289,6 @@ struct ost_id {
 #define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
 #define LL_IOC_MDC_GETINFO      _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
 
-/* Keep these for backward compartability. */
-#define LL_IOC_OBD_STATFS       IOC_OBD_STATFS
-#define IOC_MDC_GETSTRIPE       IOC_MDC_GETFILESTRIPE
-
 #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
 
 /* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
@@ -273,20 +305,26 @@ struct ost_id {
 #define LL_FILE_LOCKLESS_IO     0x00000010 /* server-side locks with cio */
 #define LL_FILE_RMTACL	  0x00000020
 
-#define LOV_USER_MAGIC_V1 0x0BD10BD0
-#define LOV_USER_MAGIC    LOV_USER_MAGIC_V1
-#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_USER_MAGIC_V3 0x0BD30BD0
+#define LOV_USER_MAGIC_V1	0x0BD10BD0
+#define LOV_USER_MAGIC		LOV_USER_MAGIC_V1
+#define LOV_USER_MAGIC_JOIN_V1	0x0BD20BD0
+#define LOV_USER_MAGIC_V3	0x0BD30BD0
+/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
+#define LOV_USER_MAGIC_SPECIFIC	0x0BD50BD0	/* for specific OSTs */
+
+#define LMV_USER_MAGIC    0x0CD30CD0    /*default lmv magic*/
 
-#define LMV_MAGIC_V1      0x0CD10CD0    /*normal stripe lmv magic */
-#define LMV_USER_MAGIC    0x0CD20CD0    /*default lmv magic*/
+#define LOV_PATTERN_RAID0	0x001
+#define LOV_PATTERN_RAID1	0x002
+#define LOV_PATTERN_FIRST	0x100
+#define LOV_PATTERN_CMOBD	0x200
 
-#define LOV_PATTERN_RAID0 0x001
-#define LOV_PATTERN_RAID1 0x002
-#define LOV_PATTERN_FIRST 0x100
+#define LOV_PATTERN_F_MASK	0xffff0000
+#define LOV_PATTERN_F_HOLE	0x40000000 /* there is hole in LOV EA */
+#define LOV_PATTERN_F_RELEASED	0x80000000 /* HSM released file */
 
-#define LOV_MAXPOOLNAME 16
-#define LOV_POOLNAMEF "%.16s"
+#define LOV_MAXPOOLNAME 15
+#define LOV_POOLNAMEF "%.15s"
 
 #define LOV_MIN_STRIPE_BITS 16   /* maximum PAGE_SIZE (ia64), power of 2 */
 #define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
@@ -344,18 +382,17 @@ struct lov_user_md_v3 {	   /* LOV EA user data (host-endian) */
 					   * used when reading
 					   */
 	};
-	char  lmm_pool_name[LOV_MAXPOOLNAME]; /* pool name */
+	char  lmm_pool_name[LOV_MAXPOOLNAME + 1];   /* pool name */
 	struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 } __packed;
 
 static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
 {
-	if (lmm_magic == LOV_USER_MAGIC_V3)
-		return sizeof(struct lov_user_md_v3) +
-				stripes * sizeof(struct lov_user_ost_data_v1);
-	else
+	if (lmm_magic == LOV_USER_MAGIC_V1)
 		return sizeof(struct lov_user_md_v1) +
 				stripes * sizeof(struct lov_user_ost_data_v1);
+	return sizeof(struct lov_user_md_v3) +
+	       stripes * sizeof(struct lov_user_ost_data_v1);
 }
 
 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
@@ -374,19 +411,26 @@ struct lov_user_mds_data_v3 {
 } __packed;
 #endif
 
-/* keep this to be the same size as lov_user_ost_data_v1 */
 struct lmv_user_mds_data {
 	struct lu_fid	lum_fid;
 	__u32		lum_padding;
 	__u32		lum_mds;
 };
 
-/* lum_type */
-enum {
-	LMV_STRIPE_TYPE = 0,
-	LMV_DEFAULT_TYPE = 1,
+enum lmv_hash_type {
+	LMV_HASH_TYPE_UNKNOWN	= 0,	/* 0 is reserved for testing purpose */
+	LMV_HASH_TYPE_ALL_CHARS = 1,
+	LMV_HASH_TYPE_FNV_1A_64 = 2,
 };
 
+#define LMV_HASH_NAME_ALL_CHARS		"all_char"
+#define LMV_HASH_NAME_FNV_1A_64		"fnv_1a_64"
+
+/*
+ * Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
+ * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
+ */
+#define LMV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
 #define lmv_user_md lmv_user_md_v1
 struct lmv_user_md_v1 {
 	__u32	lum_magic;	 /* must be the first field */
@@ -397,9 +441,9 @@ struct lmv_user_md_v1 {
 	__u32	lum_padding1;
 	__u32	lum_padding2;
 	__u32	lum_padding3;
-	char	lum_pool_name[LOV_MAXPOOLNAME];
+	char	lum_pool_name[LOV_MAXPOOLNAME + 1];
 	struct	lmv_user_mds_data  lum_objects[0];
-};
+} __packed;
 
 static inline int lmv_user_md_size(int stripes, int lmm_magic)
 {
@@ -407,6 +451,8 @@ static inline int lmv_user_md_size(int stripes, int lmm_magic)
 		      stripes * sizeof(struct lmv_user_mds_data);
 }
 
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum);
+
 struct ll_recreate_obj {
 	__u64 lrc_id;
 	__u32 lrc_ost_idx;
@@ -498,6 +544,12 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
 
 /********* Quotas **********/
 
+#define Q_QUOTACHECK   0x800100 /* deprecated as of 2.4 */
+#define Q_INITQUOTA    0x800101 /* deprecated as of 2.4  */
+#define Q_GETOINFO     0x800102 /* get obd quota info */
+#define Q_GETOQUOTA    0x800103 /* get obd quotas */
+#define Q_FINVALIDATE  0x800104 /* deprecated as of 2.4 */
+
 /* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
 #define LUSTRE_Q_QUOTAON    0x800002     /* turn quotas on */
 #define LUSTRE_Q_QUOTAOFF   0x800003     /* turn quotas off */
@@ -648,11 +700,16 @@ static inline const char *changelog_type2str(int type)
 }
 
 /* per-record flags */
-#define CLF_VERSION     0x1000
-#define CLF_EXT_VERSION 0x2000
 #define CLF_FLAGSHIFT   12
 #define CLF_FLAGMASK    ((1U << CLF_FLAGSHIFT) - 1)
 #define CLF_VERMASK     (~CLF_FLAGMASK)
+enum changelog_rec_flags {
+	CLF_VERSION	= 0x1000,
+	CLF_RENAME	= 0x2000,
+	CLF_JOBID	= 0x4000,
+	CLF_SUPPORTED	= CLF_VERSION | CLF_RENAME | CLF_JOBID
+};
+
 /* Anything under the flagmask may be per-type (if desired) */
 /* Flags for unlink */
 #define CLF_UNLINK_LAST       0x0001 /* Unlink of last hardlink */
@@ -736,12 +793,35 @@ static inline void hsm_set_cl_error(int *flags, int error)
 	*flags |= (error << CLF_HSM_ERR_L);
 }
 
-#define CR_MAXSIZE cfs_size_round(2*NAME_MAX + 1 + \
-				  sizeof(struct changelog_ext_rec))
+enum changelog_send_flag {
+	/* Not yet implemented */
+	CHANGELOG_FLAG_FOLLOW	= BIT(0),
+	/*
+	 * Blocking IO makes sense in case of slow user parsing of the records,
+	 * but it also prevents us from cleaning up if the records are not
+	 * consumed.
+	 */
+	CHANGELOG_FLAG_BLOCK	= BIT(1),
+	/* Pack jobid into the changelog records if available. */
+	CHANGELOG_FLAG_JOBID	= BIT(2),
+};
+
+#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
+				  changelog_rec_offset(CLF_SUPPORTED))
+
+/* 31 usable bytes string + null terminator. */
+#define LUSTRE_JOBID_SIZE	32
 
+/*
+ * This is the minimal changelog record. It can contain extensions
+ * such as rename fields or process jobid. Its exact content is described
+ * by the cr_flags.
+ *
+ * Extensions are packed in the same order as their corresponding flags.
+ */
 struct changelog_rec {
 	__u16		 cr_namelen;
-	__u16		 cr_flags; /**< (flags&CLF_FLAGMASK)|CLF_VERSION */
+	__u16		 cr_flags; /**< \a changelog_rec_flags */
 	__u32		 cr_type;  /**< \a changelog_rec_type */
 	__u64		 cr_index; /**< changelog record number */
 	__u64		 cr_prev;  /**< last index for this target fid */
@@ -751,55 +831,138 @@ struct changelog_rec {
 		__u32	 cr_markerflags; /**< CL_MARK flags */
 	};
 	struct lu_fid	    cr_pfid;	/**< parent fid */
-	char		  cr_name[0];     /**< last element */
 } __packed;
 
-/* changelog_ext_rec is 2*sizeof(lu_fid) bigger than changelog_rec, to save
- * space, only rename uses changelog_ext_rec, while others use changelog_rec to
- * store records.
- */
-struct changelog_ext_rec {
-	__u16			cr_namelen;
-	__u16			cr_flags; /**< (flags & CLF_FLAGMASK) |
-					   *	CLF_EXT_VERSION
-					   */
-	__u32			cr_type;  /**< \a changelog_rec_type */
-	__u64			cr_index; /**< changelog record number */
-	__u64			cr_prev;  /**< last index for this target fid */
-	__u64			cr_time;
-	union {
-		struct lu_fid	cr_tfid;	/**< target fid */
-		__u32		cr_markerflags; /**< CL_MARK flags */
-	};
-	struct lu_fid		cr_pfid;	/**< target parent fid */
-	struct lu_fid		cr_sfid;	/**< source fid, or zero */
-	struct lu_fid		cr_spfid;     /**< source parent fid, or zero */
-	char			cr_name[0];     /**< last element */
-} __packed;
+/* Changelog extension for RENAME. */
+struct changelog_ext_rename {
+	struct lu_fid	cr_sfid;	/**< source fid, or zero */
+	struct lu_fid	cr_spfid;	/**< source parent fid, or zero */
+};
 
-#define CHANGELOG_REC_EXTENDED(rec) \
-	(((rec)->cr_flags & CLF_VERMASK) == CLF_EXT_VERSION)
+/* Changelog extension to include JOBID. */
+struct changelog_ext_jobid {
+	char	cr_jobid[LUSTRE_JOBID_SIZE];	/**< zero-terminated string. */
+};
+
+static inline size_t changelog_rec_offset(enum changelog_rec_flags crf)
+{
+	size_t size = sizeof(struct changelog_rec);
+
+	if (crf & CLF_RENAME)
+		size += sizeof(struct changelog_ext_rename);
+
+	if (crf & CLF_JOBID)
+		size += sizeof(struct changelog_ext_jobid);
 
-static inline int changelog_rec_size(struct changelog_rec *rec)
+	return size;
+}
+
+static inline size_t changelog_rec_size(struct changelog_rec *rec)
 {
-	return CHANGELOG_REC_EXTENDED(rec) ? sizeof(struct changelog_ext_rec) :
-					     sizeof(*rec);
+	return changelog_rec_offset(rec->cr_flags);
+}
+
+static inline size_t changelog_rec_varsize(struct changelog_rec *rec)
+{
+	return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
+}
+
+static inline
+struct changelog_ext_rename *changelog_rec_rename(struct changelog_rec *rec)
+{
+	enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION;
+
+	return (struct changelog_ext_rename *)((char *)rec +
+					       changelog_rec_offset(crf));
+}
+
+/* The jobid follows the rename extension, if present */
+static inline
+struct changelog_ext_jobid *changelog_rec_jobid(struct changelog_rec *rec)
+{
+	enum changelog_rec_flags crf = rec->cr_flags &
+				       (CLF_VERSION | CLF_RENAME);
+
+	return (struct changelog_ext_jobid *)((char *)rec +
+					      changelog_rec_offset(crf));
 }
 
+/* The name follows the rename and jobid extensions, if present */
 static inline char *changelog_rec_name(struct changelog_rec *rec)
 {
-	return CHANGELOG_REC_EXTENDED(rec) ?
-		((struct changelog_ext_rec *)rec)->cr_name : rec->cr_name;
+	return (char *)rec + changelog_rec_offset(rec->cr_flags &
+						  CLF_SUPPORTED);
 }
 
-static inline int changelog_rec_snamelen(struct changelog_ext_rec *rec)
+static inline size_t changelog_rec_snamelen(struct changelog_rec *rec)
 {
-	return rec->cr_namelen - strlen(rec->cr_name) - 1;
+	return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1;
 }
 
-static inline char *changelog_rec_sname(struct changelog_ext_rec *rec)
+static inline char *changelog_rec_sname(struct changelog_rec *rec)
 {
-	return rec->cr_name + strlen(rec->cr_name) + 1;
+	char *cr_name = changelog_rec_name(rec);
+
+	return cr_name + strlen(cr_name) + 1;
+}
+
+/**
+ * Remap a record to the desired format as specified by the crf flags.
+ * The record must be big enough to contain the final remapped version.
+ * Superfluous extension fields are removed and missing ones are added
+ * and zeroed. The flags of the record are updated accordingly.
+ *
+ * The jobid and rename extensions can be added to a record, to match the
+ * format an application expects, typically. In this case, the newly added
+ * fields will be zeroed.
+ * The Jobid field can be removed, to guarantee compatibility with older
+ * clients that don't expect this field in the records they process.
+ *
+ * The following assumptions are being made:
+ *	- CLF_RENAME will not be removed
+ *	- CLF_JOBID will not be added without CLF_RENAME being added too
+ *
+ * @param[in,out]  rec		The record to remap.
+ * @param[in]	   crf_wanted	Flags describing the desired extensions.
+ */
+static inline void changelog_remap_rec(struct changelog_rec *rec,
+				       enum changelog_rec_flags crf_wanted)
+{
+	char *jid_mov, *rnm_mov;
+
+	crf_wanted &= CLF_SUPPORTED;
+
+	if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted)
+		return;
+
+	/* First move the variable-length name field */
+	memmove((char *)rec + changelog_rec_offset(crf_wanted),
+		changelog_rec_name(rec), rec->cr_namelen);
+
+	/* Locations of jobid and rename extensions in the remapped record */
+	jid_mov = (char *)rec +
+		  changelog_rec_offset(crf_wanted & ~CLF_JOBID);
+	rnm_mov = (char *)rec +
+		  changelog_rec_offset(crf_wanted & ~(CLF_JOBID | CLF_RENAME));
+
+	/* Move the extension fields to the desired positions */
+	if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
+		memmove(jid_mov, changelog_rec_jobid(rec),
+			sizeof(struct changelog_ext_jobid));
+
+	if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
+		memmove(rnm_mov, changelog_rec_rename(rec),
+			sizeof(struct changelog_ext_rename));
+
+	/* Clear newly added fields */
+	if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
+		memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
+
+	if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
+		memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
+
+	/* Update the record's flags accordingly */
+	rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
 }
 
 struct ioc_changelog {
@@ -978,7 +1141,7 @@ struct hsm_user_request {
 /** Return pointer to data field in a hsm user request */
 static inline void *hur_data(struct hsm_user_request *hur)
 {
-	return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
+	return &hur->hur_user_item[hur->hur_request.hr_itemcount];
 }
 
 /**
diff --git a/drivers/staging/lustre/lustre/include/lustre_cfg.h b/drivers/staging/lustre/lustre/include/lustre_cfg.h
index 95a0be13c0fb..8eb394e64b25 100644
--- a/drivers/staging/lustre/lustre/include/lustre_cfg.h
+++ b/drivers/staging/lustre/lustre/include/lustre_cfg.h
@@ -151,13 +151,11 @@ static inline void lustre_cfg_bufs_reset(struct lustre_cfg_bufs *bufs, char *nam
 		lustre_cfg_bufs_set_string(bufs, 0, name);
 }
 
-static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
+static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, __u32 index)
 {
-	int i;
-	int offset;
-	int bufcount;
-
-	LASSERT(index >= 0);
+	__u32 i;
+	size_t offset;
+	__u32 bufcount;
 
 	bufcount = lcfg->lcfg_bufcount;
 	if (index >= bufcount)
@@ -172,7 +170,7 @@ static inline void *lustre_cfg_buf(struct lustre_cfg *lcfg, int index)
 static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
 					struct lustre_cfg *lcfg)
 {
-	int i;
+	__u32 i;
 
 	bufs->lcfg_bufcount = lcfg->lcfg_bufcount;
 	for (i = 0; i < bufs->lcfg_bufcount; i++) {
@@ -181,7 +179,7 @@ static inline void lustre_cfg_bufs_init(struct lustre_cfg_bufs *bufs,
 	}
 }
 
-static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
+static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, __u32 index)
 {
 	char *s;
 
@@ -197,8 +195,8 @@ static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
 	 * of data.  Try to use the padding first though.
 	 */
 	if (s[lcfg->lcfg_buflens[index] - 1] != '\0') {
-		int last = min((int)lcfg->lcfg_buflens[index],
-			       cfs_size_round(lcfg->lcfg_buflens[index]) - 1);
+		size_t last = min((size_t)lcfg->lcfg_buflens[index],
+				  cfs_size_round(lcfg->lcfg_buflens[index]) - 1);
 		char lost = s[last];
 
 		s[last] = '\0';
@@ -210,10 +208,10 @@ static inline char *lustre_cfg_string(struct lustre_cfg *lcfg, int index)
 	return s;
 }
 
-static inline int lustre_cfg_len(__u32 bufcount, __u32 *buflens)
+static inline __u32 lustre_cfg_len(__u32 bufcount, __u32 *buflens)
 {
-	int i;
-	int len;
+	__u32 i;
+	__u32 len;
 
 	len = LCFG_HDR_SIZE(bufcount);
 	for (i = 0; i < bufcount; i++)
@@ -254,7 +252,7 @@ static inline void lustre_cfg_free(struct lustre_cfg *lcfg)
 	return;
 }
 
-static inline int lustre_cfg_sanity_check(void *buf, int len)
+static inline int lustre_cfg_sanity_check(void *buf, size_t len)
 {
 	struct lustre_cfg *lcfg = (struct lustre_cfg *)buf;
 
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 1eb64ec4bed4..567c438e93cb 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -30,8 +30,8 @@
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
-#ifndef _LINUX_COMPAT25_H
-#define _LINUX_COMPAT25_H
+#ifndef _LUSTRE_COMPAT_H
+#define _LUSTRE_COMPAT_H
 
 #include <linux/fs_struct.h>
 #include <linux/namei.h>
@@ -74,4 +74,4 @@
 # define ext2_find_next_zero_bit  find_next_zero_bit_le
 #endif
 
-#endif /* _COMPAT25_H */
+#endif /* _LUSTRE_COMPAT_H */
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm.h b/drivers/staging/lustre/lustre/include/lustre_dlm.h
index 60051a5cfe20..d03534432624 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm.h
@@ -573,6 +573,11 @@ enum lvb_type {
 };
 
 /**
+ * LDLM_GID_ANY is used to match any group id in ldlm_lock_match().
+ */
+#define LDLM_GID_ANY	((__u64)-1)
+
+/**
  * LDLM lock structure
  *
  * Represents a single LDLM lock and its state in memory. Each lock is
@@ -968,6 +973,7 @@ struct ldlm_enqueue_info {
 	void *ei_cb_cp;  /** lock completion callback */
 	void *ei_cb_gl;  /** lock glimpse callback */
 	void *ei_cbdata; /** Data to be passed into callbacks. */
+	unsigned int ei_enq_slave:1; /* whether enqueue slave stripes */
 };
 
 extern struct obd_ops ldlm_obd_ops;
@@ -1281,16 +1287,6 @@ int ldlm_cli_cancel_list(struct list_head *head, int count,
 int intent_disposition(struct ldlm_reply *rep, int flag);
 void intent_set_disposition(struct ldlm_reply *rep, int flag);
 
-/* ioctls for trying requests */
-#define IOC_LDLM_TYPE		   'f'
-#define IOC_LDLM_MIN_NR		 40
-
-#define IOC_LDLM_TEST		   _IOWR('f', 40, long)
-#define IOC_LDLM_DUMP		   _IOWR('f', 41, long)
-#define IOC_LDLM_REGRESS_START	  _IOWR('f', 42, long)
-#define IOC_LDLM_REGRESS_STOP	   _IOWR('f', 43, long)
-#define IOC_LDLM_MAX_NR		 43
-
 /**
  * "Modes" of acquiring lock_res, necessary to tell lockdep that taking more
  * than one lock_res is dead-lock safe.
diff --git a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
index e7e0c21a9b40..a0f064d237c9 100644
--- a/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
+++ b/drivers/staging/lustre/lustre/include/lustre_dlm_flags.h
@@ -28,21 +28,6 @@
 /** l_flags bits marked as "all_flags" bits */
 #define LDLM_FL_ALL_FLAGS_MASK          0x00FFFFFFC08F932FULL
 
-/** l_flags bits marked as "ast" bits */
-#define LDLM_FL_AST_MASK                0x0000000080008000ULL
-
-/** l_flags bits marked as "blocked" bits */
-#define LDLM_FL_BLOCKED_MASK            0x000000000000000EULL
-
-/** l_flags bits marked as "gone" bits */
-#define LDLM_FL_GONE_MASK               0x0006004000000000ULL
-
-/** l_flags bits marked as "inherit" bits */
-#define LDLM_FL_INHERIT_MASK            0x0000000000800000ULL
-
-/** l_flags bits marked as "off_wire" bits */
-#define LDLM_FL_OFF_WIRE_MASK           0x00FFFFFF00000000ULL
-
 /** extent, mode, or resource changed */
 #define LDLM_FL_LOCK_CHANGED            0x0000000000000001ULL /* bit 0 */
 #define ldlm_is_lock_changed(_l)        LDLM_TEST_FLAG((_l), 1ULL <<  0)
@@ -372,6 +357,27 @@
 #define ldlm_set_excl(_l)               LDLM_SET_FLAG((_l), 1ULL << 55)
 #define ldlm_clear_excl(_l)             LDLM_CLEAR_FLAG((_l), 1ULL << 55)
 
+/** l_flags bits marked as "ast" bits */
+#define LDLM_FL_AST_MASK		(LDLM_FL_FLOCK_DEADLOCK		|\
+					 LDLM_FL_AST_DISCARD_DATA)
+
+/** l_flags bits marked as "blocked" bits */
+#define LDLM_FL_BLOCKED_MASK		(LDLM_FL_BLOCK_GRANTED		|\
+					 LDLM_FL_BLOCK_CONV		|\
+					 LDLM_FL_BLOCK_WAIT)
+
+/** l_flags bits marked as "gone" bits */
+#define LDLM_FL_GONE_MASK		(LDLM_FL_DESTROYED		|\
+					 LDLM_FL_FAILED)
+
+/** l_flags bits marked as "inherit" bits */
+/* Flags inherited from wire on enqueue/reply between client/server. */
+/* NO_TIMEOUT flag to force ldlm_lock_match() to wait with no timeout. */
+/* TEST_LOCK flag to not let TEST lock to be granted. */
+#define LDLM_FL_INHERIT_MASK		(LDLM_FL_CANCEL_ON_BLOCK	|\
+					 LDLM_FL_NO_TIMEOUT		|\
+					 LDLM_FL_TEST_LOCK)
+
 /** test for ldlm_lock flag bit set */
 #define LDLM_TEST_FLAG(_l, _b)        (((_l)->l_flags & (_b)) != 0)
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_eacl.h b/drivers/staging/lustre/lustre/include/lustre_eacl.h
index d1039e1ff70d..1e71a8638186 100644
--- a/drivers/staging/lustre/lustre/include/lustre_eacl.h
+++ b/drivers/staging/lustre/lustre/include/lustre_eacl.h
@@ -46,6 +46,7 @@
 
 #ifdef CONFIG_FS_POSIX_ACL
 
+#include <linux/fs.h>
 #include <linux/posix_acl_xattr.h>
 
 typedef struct {
diff --git a/drivers/staging/lustre/lustre/include/lustre_fid.h b/drivers/staging/lustre/lustre/include/lustre_fid.h
index 743671a547ef..316780693193 100644
--- a/drivers/staging/lustre/lustre/include/lustre_fid.h
+++ b/drivers/staging/lustre/lustre/include/lustre_fid.h
@@ -229,6 +229,7 @@ enum local_oid {
 	MDD_LOV_OBJ_OSEQ	= 4121UL,
 	LFSCK_NAMESPACE_OID     = 4122UL,
 	REMOTE_PARENT_DIR_OID	= 4123UL,
+	SLAVE_LLOG_CATALOGS_OID	= 4124UL,
 };
 
 static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid)
@@ -392,21 +393,19 @@ struct ldlm_namespace;
  * but was moved into name[1] along with the OID to avoid consuming the
  * renaming name[2,3] fields that need to be used for the quota identifier.
  */
-static inline struct ldlm_res_id *
+static inline void
 fid_build_reg_res_name(const struct lu_fid *fid, struct ldlm_res_id *res)
 {
 	memset(res, 0, sizeof(*res));
 	res->name[LUSTRE_RES_ID_SEQ_OFF] = fid_seq(fid);
 	res->name[LUSTRE_RES_ID_VER_OID_OFF] = fid_ver_oid(fid);
-
-	return res;
 }
 
 /*
  * Return true if resource is for object identified by FID.
  */
-static inline int fid_res_name_eq(const struct lu_fid *fid,
-				  const struct ldlm_res_id *res)
+static inline bool fid_res_name_eq(const struct lu_fid *fid,
+				   const struct ldlm_res_id *res)
 {
 	return res->name[LUSTRE_RES_ID_SEQ_OFF] == fid_seq(fid) &&
 	       res->name[LUSTRE_RES_ID_VER_OID_OFF] == fid_ver_oid(fid);
@@ -415,29 +414,25 @@ static inline int fid_res_name_eq(const struct lu_fid *fid,
 /*
  * Extract FID from LDLM resource. Reverse of fid_build_reg_res_name().
  */
-static inline struct lu_fid *
+static inline void
 fid_extract_from_res_name(struct lu_fid *fid, const struct ldlm_res_id *res)
 {
 	fid->f_seq = res->name[LUSTRE_RES_ID_SEQ_OFF];
 	fid->f_oid = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF]);
 	fid->f_ver = (__u32)(res->name[LUSTRE_RES_ID_VER_OID_OFF] >> 32);
 	LASSERT(fid_res_name_eq(fid, res));
-
-	return fid;
 }
 
 /*
  * Build (DLM) resource identifier from global quota FID and quota ID.
  */
-static inline struct ldlm_res_id *
+static inline void
 fid_build_quota_res_name(const struct lu_fid *glb_fid, union lquota_id *qid,
 			 struct ldlm_res_id *res)
 {
 	fid_build_reg_res_name(glb_fid, res);
 	res->name[LUSTRE_RES_ID_QUOTA_SEQ_OFF] = fid_seq(&qid->qid_fid);
 	res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] = fid_ver_oid(&qid->qid_fid);
-
-	return res;
 }
 
 /*
@@ -454,14 +449,12 @@ static inline void fid_extract_from_quota_res(struct lu_fid *glb_fid,
 		(__u32)(res->name[LUSTRE_RES_ID_QUOTA_VER_OID_OFF] >> 32);
 }
 
-static inline struct ldlm_res_id *
+static inline void
 fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash,
 		       struct ldlm_res_id *res)
 {
 	fid_build_reg_res_name(fid, res);
 	res->name[LUSTRE_RES_ID_HSH_OFF] = hash;
-
-	return res;
 }
 
 /**
@@ -482,7 +475,7 @@ fid_build_pdo_res_name(const struct lu_fid *fid, unsigned int hash,
  *    res will be built from normal FID directly, i.e. res[0] = f_seq,
  *    res[1] = f_oid + f_ver.
  */
-static inline void ostid_build_res_name(struct ost_id *oi,
+static inline void ostid_build_res_name(const struct ost_id *oi,
 					struct ldlm_res_id *name)
 {
 	memset(name, 0, sizeof(*name));
@@ -497,8 +490,8 @@ static inline void ostid_build_res_name(struct ost_id *oi,
 /**
  * Return true if the resource is for the object identified by this id & group.
  */
-static inline int ostid_res_name_eq(struct ost_id *oi,
-				    struct ldlm_res_id *name)
+static inline int ostid_res_name_eq(const struct ost_id *oi,
+				    const struct ldlm_res_id *name)
 {
 	/* Note: it is just a trick here to save some effort, probably the
 	 * correct way would be turn them into the FID and compare
@@ -603,13 +596,14 @@ static inline __u32 fid_flatten32(const struct lu_fid *fid)
 	 * (from OID), or up to 128M inodes without collisions for new files.
 	 */
 	ino = ((seq & 0x000fffffULL) << 12) + ((seq >> 8) & 0xfffff000) +
-	       (seq >> (64 - (40-8)) & 0xffffff00) +
+	       (seq >> (64 - (40 - 8)) & 0xffffff00) +
 	       (fid_oid(fid) & 0xff000fff) + ((fid_oid(fid) & 0x00fff000) << 8);
 
 	return ino ? ino : fid_oid(fid);
 }
 
-static inline int lu_fid_diff(struct lu_fid *fid1, struct lu_fid *fid2)
+static inline int lu_fid_diff(const struct lu_fid *fid1,
+			      const struct lu_fid *fid2)
 {
 	LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n",
 		 PFID(fid1), PFID(fid2));
diff --git a/drivers/staging/lustre/lustre/include/lustre_handles.h b/drivers/staging/lustre/lustre/include/lustre_handles.h
index 1a63a6b9e116..e071bac9df57 100644
--- a/drivers/staging/lustre/lustre/include/lustre_handles.h
+++ b/drivers/staging/lustre/lustre/include/lustre_handles.h
@@ -66,6 +66,7 @@ struct portals_handle_ops {
 struct portals_handle {
 	struct list_head			h_link;
 	__u64				h_cookie;
+	const void			*h_owner;
 	struct portals_handle_ops	*h_ops;
 
 	/* newly added fields to handle the RCU issue. -jxiong */
@@ -75,15 +76,13 @@ struct portals_handle {
 	unsigned int			h_in:1;
 };
 
-#define RCU2HANDLE(rcu)    container_of(rcu, struct portals_handle, h_rcu)
-
 /* handles.c */
 
 /* Add a handle to the hash table */
 void class_handle_hash(struct portals_handle *,
 		       struct portals_handle_ops *ops);
 void class_handle_unhash(struct portals_handle *);
-void *class_handle2object(__u64 cookie);
+void *class_handle2object(__u64 cookie, const void *owner);
 void class_handle_free_cb(struct rcu_head *rcu);
 int class_handle_init(void);
 void class_handle_cleanup(void);
diff --git a/drivers/staging/lustre/lustre/include/lustre_import.h b/drivers/staging/lustre/lustre/include/lustre_import.h
index 4445be7a59dd..5461ba33d90c 100644
--- a/drivers/staging/lustre/lustre/include/lustre_import.h
+++ b/drivers/staging/lustre/lustre/include/lustre_import.h
@@ -285,8 +285,10 @@ struct obd_import {
 				  imp_resend_replay:1,
 				  /* disable normal recovery, for test only. */
 				  imp_no_pinger_recover:1,
+#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
 				  /* need IR MNE swab */
 				  imp_need_mne_swab:1,
+#endif
 				  /* import must be reconnected instead of
 				   * chosing new connection
 				   */
@@ -305,28 +307,6 @@ struct obd_import {
 	time64_t	     imp_last_reply_time;    /* for health check */
 };
 
-typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
-				    int event, void *event_arg, void *cb_data);
-
-/**
- * Structure for import observer.
- * It is possible to register "observer" on an import and every time
- * something happens to an import (like connect/evict/disconnect)
- * obderver will get its callback called with event type
- */
-struct obd_import_observer {
-	struct list_head	   oio_chain;
-	obd_import_callback  oio_cb;
-	void		*oio_cb_data;
-};
-
-void class_observe_import(struct obd_import *imp, obd_import_callback cb,
-			  void *cb_data);
-void class_unobserve_import(struct obd_import *imp, obd_import_callback cb,
-			    void *cb_data);
-void class_notify_import_observers(struct obd_import *imp, int event,
-				   void *event_arg);
-
 /* import.c */
 static inline unsigned int at_est2timeout(unsigned int val)
 {
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 06958f217fc8..6b231913ba2e 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -51,7 +51,6 @@
 #include "lustre_cfg.h"
 
 /* target.c */
-struct kstatfs;
 struct ptlrpc_request;
 struct obd_export;
 struct lu_target;
@@ -74,325 +73,8 @@ int do_set_info_async(struct obd_import *imp,
 		      u32 vallen, void *val,
 		      struct ptlrpc_request_set *set);
 
-#define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */
-#define OBD_MAX_IOCTL_BUFFER CONFIG_LUSTRE_OBD_MAX_IOCTL_BUFFER
-
 void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
 
-/* client.c */
-
-int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
-struct client_obd *client_conn2cli(struct lustre_handle *conn);
-
-struct md_open_data;
-struct obd_client_handle {
-	struct lustre_handle	 och_fh;
-	struct lu_fid		 och_fid;
-	struct md_open_data	*och_mod;
-	struct lustre_handle	 och_lease_handle; /* open lock for lease */
-	__u32			 och_magic;
-	fmode_t			 och_flags;
-};
-
-#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
-
-/* statfs_pack.c */
-void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
-
-/*
- * For md echo client
- */
-enum md_echo_cmd {
-	ECHO_MD_CREATE       = 1, /* Open/Create file on MDT */
-	ECHO_MD_MKDIR	= 2, /* Mkdir on MDT */
-	ECHO_MD_DESTROY      = 3, /* Unlink file on MDT */
-	ECHO_MD_RMDIR	= 4, /* Rmdir on MDT */
-	ECHO_MD_LOOKUP       = 5, /* Lookup on MDT */
-	ECHO_MD_GETATTR      = 6, /* Getattr on MDT */
-	ECHO_MD_SETATTR      = 7, /* Setattr on MDT */
-	ECHO_MD_ALLOC_FID    = 8, /* Get FIDs from MDT */
-};
-
-/*
- *   OBD IOCTLS
- */
-#define OBD_IOCTL_VERSION 0x00010004
-
-struct obd_ioctl_data {
-	__u32 ioc_len;
-	__u32 ioc_version;
-
-	union {
-		__u64 ioc_cookie;
-		__u64 ioc_u64_1;
-	};
-	union {
-		__u32 ioc_conn1;
-		__u32 ioc_u32_1;
-	};
-	union {
-		__u32 ioc_conn2;
-		__u32 ioc_u32_2;
-	};
-
-	struct obdo ioc_obdo1;
-	struct obdo ioc_obdo2;
-
-	u64	 ioc_count;
-	u64	 ioc_offset;
-	__u32    ioc_dev;
-	__u32    ioc_command;
-
-	__u64 ioc_nid;
-	__u32 ioc_nal;
-	__u32 ioc_type;
-
-	/* buffers the kernel will treat as user pointers */
-	__u32  ioc_plen1;
-	void __user *ioc_pbuf1;
-	__u32  ioc_plen2;
-	void __user *ioc_pbuf2;
-
-	/* inline buffers for various arguments */
-	__u32  ioc_inllen1;
-	char  *ioc_inlbuf1;
-	__u32  ioc_inllen2;
-	char  *ioc_inlbuf2;
-	__u32  ioc_inllen3;
-	char  *ioc_inlbuf3;
-	__u32  ioc_inllen4;
-	char  *ioc_inlbuf4;
-
-	char    ioc_bulk[0];
-};
-
-struct obd_ioctl_hdr {
-	__u32 ioc_len;
-	__u32 ioc_version;
-};
-
-static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
-{
-	int len = cfs_size_round(sizeof(struct obd_ioctl_data));
-
-	len += cfs_size_round(data->ioc_inllen1);
-	len += cfs_size_round(data->ioc_inllen2);
-	len += cfs_size_round(data->ioc_inllen3);
-	len += cfs_size_round(data->ioc_inllen4);
-	return len;
-}
-
-static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
-{
-	if (data->ioc_len > OBD_MAX_IOCTL_BUFFER) {
-		CERROR("OBD ioctl: ioc_len larger than %d\n",
-		       OBD_MAX_IOCTL_BUFFER);
-		return 1;
-	}
-	if (data->ioc_inllen1 > OBD_MAX_IOCTL_BUFFER) {
-		CERROR("OBD ioctl: ioc_inllen1 larger than ioc_len\n");
-		return 1;
-	}
-	if (data->ioc_inllen2 > OBD_MAX_IOCTL_BUFFER) {
-		CERROR("OBD ioctl: ioc_inllen2 larger than ioc_len\n");
-		return 1;
-	}
-	if (data->ioc_inllen3 > OBD_MAX_IOCTL_BUFFER) {
-		CERROR("OBD ioctl: ioc_inllen3 larger than ioc_len\n");
-		return 1;
-	}
-	if (data->ioc_inllen4 > OBD_MAX_IOCTL_BUFFER) {
-		CERROR("OBD ioctl: ioc_inllen4 larger than ioc_len\n");
-		return 1;
-	}
-	if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-		CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-		CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
-		CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
-		CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_pbuf1 && !data->ioc_plen1) {
-		CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_pbuf2 && !data->ioc_plen2) {
-		CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
-		return 1;
-	}
-	if (data->ioc_plen1 && !data->ioc_pbuf1) {
-		CERROR("OBD ioctl: plen1 set but NULL pointer\n");
-		return 1;
-	}
-	if (data->ioc_plen2 && !data->ioc_pbuf2) {
-		CERROR("OBD ioctl: plen2 set but NULL pointer\n");
-		return 1;
-	}
-	if (obd_ioctl_packlen(data) > data->ioc_len) {
-		CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
-		       obd_ioctl_packlen(data), data->ioc_len);
-		return 1;
-	}
-	return 0;
-}
-
-#include "obd_support.h"
-
-/* function defined in lustre/obdclass/<platform>/<platform>-module.c */
-int obd_ioctl_getdata(char **buf, int *len, void __user *arg);
-int obd_ioctl_popdata(void __user *arg, void *data, int len);
-
-static inline void obd_ioctl_freedata(char *buf, int len)
-{
-	kvfree(buf);
-	return;
-}
-
-/*
- * BSD ioctl description:
- * #define IOC_V1       _IOR(g, n1, long)
- * #define IOC_V2       _IOW(g, n2, long)
- *
- * ioctl(f, IOC_V1, arg);
- * arg will be treated as a long value,
- *
- * ioctl(f, IOC_V2, arg)
- * arg will be treated as a pointer, bsd will call
- * copyin(buf, arg, sizeof(long))
- *
- * To make BSD ioctl handles argument correctly and simplely,
- * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data
- * for us. Does this change affect Linux?  (XXX Liang)
- */
-#define OBD_IOC_DATA_TYPE long
-
-#define OBD_IOC_CREATE		 _IOWR('f', 101, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DESTROY		_IOW('f', 104, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PREALLOCATE	    _IOWR('f', 105, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_SETATTR		_IOW('f', 107, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETATTR		_IOWR ('f', 108, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_READ		   _IOWR('f', 109, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_WRITE		  _IOWR('f', 110, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_STATFS		 _IOWR('f', 113, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SYNC		   _IOW('f', 114, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_READ2		  _IOWR('f', 115, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_FORMAT		 _IOWR('f', 116, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARTITION	      _IOWR('f', 117, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_COPY		   _IOWR('f', 120, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_MIGR		   _IOWR('f', 121, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PUNCH		  _IOWR('f', 122, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_MODULE_DEBUG	   _IOWR('f', 124, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_BRW_READ	       _IOWR('f', 125, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_BRW_WRITE	      _IOWR('f', 126, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_NAME2DEV	       _IOWR('f', 127, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_UUID2DEV	       _IOWR('f', 130, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_GETNAME		_IOWR('f', 131, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETMDNAME	      _IOR('f', 131, char[MAX_OBD_NAME])
-#define OBD_IOC_GETDTNAME	       OBD_IOC_GETNAME
-
-#define OBD_IOC_LOV_GET_CONFIG	 _IOWR('f', 132, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLIENT_RECOVER	 _IOW('f', 133, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PING_TARGET	    _IOW('f', 136, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139)
-#define OBD_IOC_NO_TRANSNO	     _IOW('f', 140, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_SET_READONLY	   _IOW('f', 141, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ABORT_RECOVERY	 _IOR('f', 142, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_ROOT_SQUASH	    _IOWR('f', 143, OBD_IOC_DATA_TYPE)
-
-#define OBD_GET_VERSION		_IOWR ('f', 144, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_GSS_SUPPORT	    _IOWR('f', 145, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CLOSE_UUID	     _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CHANGELOG_SEND	 _IOW('f', 148, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_GETDEVICE	      _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_FID2PATH	       _IOWR ('f', 150, OBD_IOC_DATA_TYPE)
-/* see also <lustre/lustre_user.h> for ioctls 151-153 */
-/* OBD_IOC_LOV_SETSTRIPE: See also LL_IOC_LOV_SETSTRIPE */
-#define OBD_IOC_LOV_SETSTRIPE	  _IOW('f', 154, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LOV_GETSTRIPE: See also LL_IOC_LOV_GETSTRIPE */
-#define OBD_IOC_LOV_GETSTRIPE	  _IOW('f', 155, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LOV_SETEA: See also LL_IOC_LOV_SETEA */
-#define OBD_IOC_LOV_SETEA	      _IOW('f', 156, OBD_IOC_DATA_TYPE)
-/* see <lustre/lustre_user.h> for ioctls 157-159 */
-/* OBD_IOC_QUOTACHECK: See also LL_IOC_QUOTACHECK */
-#define OBD_IOC_QUOTACHECK	     _IOW('f', 160, int)
-/* OBD_IOC_POLL_QUOTACHECK: See also LL_IOC_POLL_QUOTACHECK */
-#define OBD_IOC_POLL_QUOTACHECK	_IOR('f', 161, struct if_quotacheck *)
-/* OBD_IOC_QUOTACTL: See also LL_IOC_QUOTACTL */
-#define OBD_IOC_QUOTACTL	       _IOWR('f', 162, struct if_quotactl)
-/* see  also <lustre/lustre_user.h> for ioctls 163-176 */
-#define OBD_IOC_CHANGELOG_REG	  _IOW('f', 177, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_DEREG	_IOW('f', 178, struct obd_ioctl_data)
-#define OBD_IOC_CHANGELOG_CLEAR	_IOW('f', 179, struct obd_ioctl_data)
-#define OBD_IOC_RECORD		 _IOWR('f', 180, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_ENDRECORD	      _IOWR('f', 181, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARSE		  _IOWR('f', 182, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DORECORD	       _IOWR('f', 183, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PROCESS_CFG	    _IOWR('f', 184, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_DUMP_LOG	       _IOWR('f', 185, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_CLEAR_LOG	      _IOWR('f', 186, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PARAM		  _IOW('f', 187, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_POOL		   _IOWR('f', 188, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_REPLACE_NIDS	   _IOWR('f', 189, OBD_IOC_DATA_TYPE)
-
-#define OBD_IOC_CATLOGLIST	     _IOWR('f', 190, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_INFO	      _IOWR('f', 191, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_PRINT	     _IOWR('f', 192, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CANCEL	    _IOWR('f', 193, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_REMOVE	    _IOWR('f', 194, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_LLOG_CHECK	     _IOWR('f', 195, OBD_IOC_DATA_TYPE)
-/* OBD_IOC_LLOG_CATINFO is deprecated */
-#define OBD_IOC_LLOG_CATINFO	   _IOWR('f', 196, OBD_IOC_DATA_TYPE)
-
-/*	#define ECHO_IOC_GET_STRIPE    _IOWR('f', 200, OBD_IOC_DATA_TYPE) */
-/*	#define ECHO_IOC_SET_STRIPE    _IOWR('f', 201, OBD_IOC_DATA_TYPE) */
-/*	#define ECHO_IOC_ENQUEUE       _IOWR('f', 202, OBD_IOC_DATA_TYPE) */
-/*	#define ECHO_IOC_CANCEL        _IOWR('f', 203, OBD_IOC_DATA_TYPE) */
-
-#define OBD_IOC_GET_OBJ_VERSION	_IOR('f', 210, OBD_IOC_DATA_TYPE)
-
-/* <lustre/lustre_user.h> defines ioctl number 218-219 */
-#define OBD_IOC_GET_MNTOPT	     _IOW('f', 220, mntopt_t)
-
-#define OBD_IOC_ECHO_MD		_IOR('f', 221, struct obd_ioctl_data)
-#define OBD_IOC_ECHO_ALLOC_SEQ	 _IOWR('f', 222, struct obd_ioctl_data)
-
-#define OBD_IOC_START_LFSCK	       _IOWR('f', 230, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_STOP_LFSCK	       _IOW('f', 231, OBD_IOC_DATA_TYPE)
-#define OBD_IOC_PAUSE_LFSCK	       _IOW('f', 232, OBD_IOC_DATA_TYPE)
-
-/* XXX _IOWR('f', 250, long) has been defined in
- * libcfs/include/libcfs/libcfs_private.h for debug, don't use it
- */
-
-/* Until such time as we get_info the per-stripe maximum from the OST,
- * we define this to be 2T - 4k, which is the ext3 maxbytes.
- */
-#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-/* Special values for remove LOV EA from disk */
-#define LOVEA_DELETE_VALUES(size, count, offset) (size == 0 && count == 0 && \
-						 offset == (typeof(offset))(-1))
-
-/* #define POISON_BULK 0 */
-
 /*
  * l_wait_event is a flexible sleeping function, permitting simple caller
  * configuration of interrupt and timeout sensitivity along with actions to
diff --git a/drivers/staging/lustre/lustre/include/lustre_linkea.h b/drivers/staging/lustre/lustre/include/lustre_linkea.h
new file mode 100644
index 000000000000..249e8bf4fa22
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_linkea.h
@@ -0,0 +1,79 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2014, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: di wang <di.wang@intel.com>
+ */
+
+#define DEFAULT_LINKEA_SIZE	4096
+
+struct linkea_data {
+	/**
+	 * Buffer to keep link EA body.
+	 */
+	struct lu_buf		*ld_buf;
+	/**
+	 * The matched header, entry and its length in the EA
+	 */
+	struct link_ea_header	*ld_leh;
+	struct link_ea_entry	*ld_lee;
+	int			ld_reclen;
+};
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf);
+int linkea_init(struct linkea_data *ldata);
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+			 struct lu_name *lname, struct lu_fid *pfid);
+int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
+		      const struct lu_fid *pfid);
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+		   const struct lu_fid *pfid);
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname);
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+		      const struct lu_fid  *pfid);
+
+static inline void linkea_first_entry(struct linkea_data *ldata)
+{
+	LASSERT(ldata);
+	LASSERT(ldata->ld_leh);
+
+	if (ldata->ld_leh->leh_reccount == 0)
+		ldata->ld_lee = NULL;
+	else
+		ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
+}
+
+static inline void linkea_next_entry(struct linkea_data *ldata)
+{
+	LASSERT(ldata);
+	LASSERT(ldata->ld_leh);
+
+	if (ldata->ld_lee) {
+		ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
+							 ldata->ld_reclen);
+		if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh +
+					      ldata->ld_leh->leh_len))
+			ldata->ld_lee = NULL;
+	}
+}
diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h
deleted file mode 100644
index b16897702559..000000000000
--- a/drivers/staging/lustre/lustre/include/lustre_lite.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.gnu.org/licenses/gpl-2.0.html
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2011, 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#ifndef _LL_H
-#define _LL_H
-
-/** \defgroup lite lite
- *
- * @{
- */
-
-#include "linux/lustre_lite.h"
-
-#include "obd_class.h"
-#include "lustre_net.h"
-#include "lustre_mds.h"
-#include "lustre_ha.h"
-
-/* 4UL * 1024 * 1024 */
-#define LL_MAX_BLKSIZE_BITS     (22)
-#define LL_MAX_BLKSIZE	  (1UL<<LL_MAX_BLKSIZE_BITS)
-
-/*
- * This is embedded into llite super-blocks to keep track of
- * connect flags (capabilities) supported by all imports given mount is
- * connected to.
- */
-struct lustre_client_ocd {
-	/*
-	 * This is conjunction of connect_flags across all imports (LOVs) this
-	 * mount is connected to. This field is updated by cl_ocd_update()
-	 * under ->lco_lock.
-	 */
-	__u64	      lco_flags;
-	struct mutex	   lco_lock;
-	struct obd_export *lco_md_exp;
-	struct obd_export *lco_dt_exp;
-};
-
-/*
- * Chain of hash overflow pages.
- */
-struct ll_dir_chain {
-	/* XXX something. Later */
-};
-
-static inline void ll_dir_chain_init(struct ll_dir_chain *chain)
-{
-}
-
-static inline void ll_dir_chain_fini(struct ll_dir_chain *chain)
-{
-}
-
-static inline unsigned long hash_x_index(__u64 hash, int hash64)
-{
-	if (BITS_PER_LONG == 32 && hash64)
-		hash >>= 32;
-	/* save hash 0 as index 0 because otherwise we'll save it at
-	 * page index end (~0UL) and it causes truncate_inode_pages_range()
-	 * to loop forever.
-	 */
-	return ~0UL - (hash + !hash);
-}
-
-/** @} lite */
-
-#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_lmv.h b/drivers/staging/lustre/lustre/include/lustre_lmv.h
new file mode 100644
index 000000000000..d7f7afa8dfa7
--- /dev/null
+++ b/drivers/staging/lustre/lustre/include/lustre_lmv.h
@@ -0,0 +1,184 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Intel Corporation.
+ */
+/*
+ * lustre/include/lustre_lmv.h
+ *
+ * Lustre LMV structures and functions.
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#ifndef _LUSTRE_LMV_H
+#define _LUSTRE_LMV_H
+#include "lustre/lustre_idl.h"
+
+struct lmv_oinfo {
+	struct lu_fid	lmo_fid;
+	u32		lmo_mds;
+	struct inode	*lmo_root;
+};
+
+struct lmv_stripe_md {
+	__u32	lsm_md_magic;
+	__u32	lsm_md_stripe_count;
+	__u32	lsm_md_master_mdt_index;
+	__u32	lsm_md_hash_type;
+	__u32	lsm_md_layout_version;
+	__u32	lsm_md_default_count;
+	__u32	lsm_md_default_index;
+	char	lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
+	struct lmv_oinfo lsm_md_oinfo[0];
+};
+
+static inline bool
+lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
+{
+	__u32 idx;
+
+	if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
+	    lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
+	    lsm1->lsm_md_master_mdt_index != lsm2->lsm_md_master_mdt_index ||
+	    lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
+	    lsm1->lsm_md_layout_version != lsm2->lsm_md_layout_version ||
+	    !strcmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name))
+		return false;
+
+	for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
+		if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
+			       &lsm2->lsm_md_oinfo[idx].lmo_fid))
+			return false;
+	}
+
+	return true;
+}
+
+union lmv_mds_md;
+
+int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+		  const union lmv_mds_md *lmm, int stripe_count);
+
+static inline int lmv_alloc_memmd(struct lmv_stripe_md **lsmp, int stripe_count)
+{
+	return lmv_unpack_md(NULL, lsmp, NULL, stripe_count);
+}
+
+static inline void lmv_free_memmd(struct lmv_stripe_md *lsm)
+{
+	lmv_unpack_md(NULL, &lsm, NULL, 0);
+}
+
+static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
+				  const struct lmv_mds_md_v1 *lmv_src)
+{
+	__u32 i;
+
+	lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
+	lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
+	lmv_dst->lmv_master_mdt_index =
+		le32_to_cpu(lmv_src->lmv_master_mdt_index);
+	lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
+	lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
+
+	for (i = 0; i < lmv_src->lmv_stripe_count; i++)
+		fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
+			      &lmv_src->lmv_stripe_fids[i]);
+}
+
+static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
+				 const union lmv_mds_md *lmv_src)
+{
+	switch (le32_to_cpu(lmv_src->lmv_magic)) {
+	case LMV_MAGIC_V1:
+		lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
+		break;
+	default:
+		break;
+	}
+}
+
+/* This hash is only for testing purpose */
+static inline unsigned int
+lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
+{
+	const unsigned char *p = (const unsigned char *)name;
+	unsigned int c = 0;
+
+	while (--namelen >= 0)
+		c += p[namelen];
+
+	c = c % count;
+
+	return c;
+}
+
+static inline unsigned int
+lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
+{
+	__u64 hash;
+
+	hash = lustre_hash_fnv_1a_64(name, namelen);
+
+	return do_div(hash, count);
+}
+
+static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type,
+					   unsigned int stripe_count,
+					   const char *name, int namelen)
+{
+	__u32 hash_type = lmv_hash_type & LMV_HASH_TYPE_MASK;
+	int idx;
+
+	LASSERT(namelen > 0);
+	if (stripe_count <= 1)
+		return 0;
+
+	/* for migrating object, always start from 0 stripe */
+	if (lmv_hash_type & LMV_HASH_FLAG_MIGRATION)
+		return 0;
+
+	switch (hash_type) {
+	case LMV_HASH_TYPE_ALL_CHARS:
+		idx = lmv_hash_all_chars(stripe_count, name, namelen);
+		break;
+	case LMV_HASH_TYPE_FNV_1A_64:
+		idx = lmv_hash_fnv1a(stripe_count, name, namelen);
+		break;
+	default:
+		idx = -EBADFD;
+		break;
+	}
+	CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name,
+	       hash_type, idx);
+
+	return idx;
+}
+
+static inline bool lmv_is_known_hash_type(__u32 type)
+{
+	return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 ||
+	       (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS;
+}
+
+#endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_log.h b/drivers/staging/lustre/lustre/include/lustre_log.h
index b96e02317bfc..995b266932e3 100644
--- a/drivers/staging/lustre/lustre/include/lustre_log.h
+++ b/drivers/staging/lustre/lustre/include/lustre_log.h
@@ -277,12 +277,11 @@ static inline void llog_ctxt_put(struct llog_ctxt *ctxt)
 	__llog_ctxt_put(NULL, ctxt);
 }
 
-static inline void llog_group_init(struct obd_llog_group *olg, int group)
+static inline void llog_group_init(struct obd_llog_group *olg)
 {
 	init_waitqueue_head(&olg->olg_waitq);
 	spin_lock_init(&olg->olg_lock);
 	mutex_init(&olg->olg_cat_processing);
-	olg->olg_seq = group;
 }
 
 static inline int llog_group_set_ctxt(struct obd_llog_group *olg,
diff --git a/drivers/staging/lustre/lustre/include/lustre_mdc.h b/drivers/staging/lustre/lustre/include/lustre_mdc.h
index fa62b95d351f..8fc2d3f2dfd6 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mdc.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mdc.h
@@ -96,7 +96,7 @@ static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
 				    struct lookup_intent *it)
 {
 	if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
-		   it->it_op == IT_LAYOUT))
+		   it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
 		return;
 
 	/* This would normally block until the existing request finishes.
@@ -136,7 +136,7 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
 				    struct lookup_intent *it)
 {
 	if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
-		   it->it_op == IT_LAYOUT))
+		   it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
 		return;
 
 	if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
@@ -156,34 +156,44 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
 	mutex_unlock(&lck->rpcl_mutex);
 }
 
-/* Update the maximum observed easize and cookiesize.  The default easize
- * and cookiesize is initialized to the minimum value but allowed to grow
- * up to a single page in size if required to handle the common case.
+/**
+ * Update the maximum possible easize and cookiesize.
+ *
+ * The values are learned from ptlrpc replies sent by the MDT.  The
+ * default easize and cookiesize is initialized to the minimum value but
+ * allowed to grow up to a single page in size if required to handle the
+ * common case.
+ *
+ * \see client_obd::cl_default_mds_easize and
+ * client_obd::cl_default_mds_cookiesize
+ *
+ * \param[in] exp	export for MDC device
+ * \param[in] body	body of ptlrpc reply from MDT
+ *
  */
 static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
 					       struct mdt_body *body)
 {
-	if (body->valid & OBD_MD_FLMODEASIZE) {
+	if (body->mbo_valid & OBD_MD_FLMODEASIZE) {
 		struct client_obd *cli = &exp->exp_obd->u.cli;
+		u32 def_cookiesize, def_easize;
 
-		if (cli->cl_max_mds_easize < body->max_mdsize) {
-			cli->cl_max_mds_easize = body->max_mdsize;
-			cli->cl_default_mds_easize =
-			    min_t(__u32, body->max_mdsize, PAGE_SIZE);
-		}
-		if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
-			cli->cl_max_mds_cookiesize = body->max_cookiesize;
-			cli->cl_default_mds_cookiesize =
-			    min_t(__u32, body->max_cookiesize, PAGE_SIZE);
-		}
+		if (cli->cl_max_mds_easize < body->mbo_max_mdsize)
+			cli->cl_max_mds_easize = body->mbo_max_mdsize;
+
+		def_easize = min_t(__u32, body->mbo_max_mdsize,
+				   OBD_MAX_DEFAULT_EA_SIZE);
+		cli->cl_default_mds_easize = def_easize;
+
+		if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize)
+			cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize;
+
+		def_cookiesize = min_t(__u32, body->mbo_max_cookiesize,
+				       OBD_MAX_DEFAULT_COOKIE_SIZE);
+		cli->cl_default_mds_cookiesize = def_cookiesize;
 	}
 }
 
-struct mdc_cache_waiter {
-	struct list_head	      mcw_entry;
-	wait_queue_head_t	     mcw_waitq;
-};
-
 /* mdc/mdc_locks.c */
 int it_open_error(int phase, struct lookup_intent *it);
 
diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h
index 4104bd9bd5c4..23a7e4f78e9a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_mds.h
+++ b/drivers/staging/lustre/lustre/include/lustre_mds.h
@@ -58,9 +58,6 @@ struct mds_group_info {
 #define MDD_OBD_NAME     "mdd_obd"
 #define MDD_OBD_UUID     "mdd_obd_uuid"
 
-/* these are local flags, used only on the client, private */
-#define M_CHECK_STALE	   0200000000
-
 /** @} mds */
 
 #endif
diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h
index d5debd615fdf..e9aba99ee52a 100644
--- a/drivers/staging/lustre/lustre/include/lustre_net.h
+++ b/drivers/staging/lustre/lustre/include/lustre_net.h
@@ -261,7 +261,10 @@
 
 #define MDS_MAXREQSIZE		(5 * 1024)	/* >= 4736 */
 
-#define OST_MAXREQSIZE		(5 * 1024)
+/**
+ * FIEMAP request can be 4K+ for now
+ */
+#define OST_MAXREQSIZE		(16 * 1024)
 
 /* Macro to hide a typecast. */
 #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
@@ -570,13 +573,13 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \param[in,out] policy The policy being initialized
 	 */
-	int	(*op_policy_init) (struct ptlrpc_nrs_policy *policy);
+	int	(*op_policy_init)(struct ptlrpc_nrs_policy *policy);
 	/**
 	 * Called during policy unregistration; this operation is optional.
 	 *
 	 * \param[in,out] policy The policy being unregistered/finalized
 	 */
-	void	(*op_policy_fini) (struct ptlrpc_nrs_policy *policy);
+	void	(*op_policy_fini)(struct ptlrpc_nrs_policy *policy);
 	/**
 	 * Called when activating a policy via lprocfs; policies allocate and
 	 * initialize their resources here; this operation is optional.
@@ -585,7 +588,7 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \see nrs_policy_start_locked()
 	 */
-	int	(*op_policy_start) (struct ptlrpc_nrs_policy *policy);
+	int	(*op_policy_start)(struct ptlrpc_nrs_policy *policy);
 	/**
 	 * Called when deactivating a policy via lprocfs; policies deallocate
 	 * their resources here; this operation is optional
@@ -594,7 +597,7 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \see nrs_policy_stop0()
 	 */
-	void	(*op_policy_stop) (struct ptlrpc_nrs_policy *policy);
+	void	(*op_policy_stop)(struct ptlrpc_nrs_policy *policy);
 	/**
 	 * Used for policy-specific operations; i.e. not generic ones like
 	 * \e PTLRPC_NRS_CTL_START and \e PTLRPC_NRS_CTL_GET_INFO; analogous
@@ -610,8 +613,8 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \see ptlrpc_nrs_policy_control()
 	 */
-	int	(*op_policy_ctl) (struct ptlrpc_nrs_policy *policy,
-				  enum ptlrpc_nrs_ctl opc, void *arg);
+	int	(*op_policy_ctl)(struct ptlrpc_nrs_policy *policy,
+				 enum ptlrpc_nrs_ctl opc, void *arg);
 
 	/**
 	 * Called when obtaining references to the resources of the resource
@@ -648,11 +651,11 @@ struct ptlrpc_nrs_pol_ops {
 	 * \see ptlrpc_nrs_req_initialize()
 	 * \see ptlrpc_nrs_hpreq_add_nolock()
 	 */
-	int	(*op_res_get) (struct ptlrpc_nrs_policy *policy,
-			       struct ptlrpc_nrs_request *nrq,
-			       const struct ptlrpc_nrs_resource *parent,
-			       struct ptlrpc_nrs_resource **resp,
-			       bool moving_req);
+	int	(*op_res_get)(struct ptlrpc_nrs_policy *policy,
+			      struct ptlrpc_nrs_request *nrq,
+			      const struct ptlrpc_nrs_resource *parent,
+			      struct ptlrpc_nrs_resource **resp,
+			      bool moving_req);
 	/**
 	 * Called when releasing references taken for resources in the resource
 	 * hierarchy for the request; this operation is optional.
@@ -663,8 +666,8 @@ struct ptlrpc_nrs_pol_ops {
 	 * \see ptlrpc_nrs_req_finalize()
 	 * \see ptlrpc_nrs_hpreq_add_nolock()
 	 */
-	void	(*op_res_put) (struct ptlrpc_nrs_policy *policy,
-			       const struct ptlrpc_nrs_resource *res);
+	void	(*op_res_put)(struct ptlrpc_nrs_policy *policy,
+			      const struct ptlrpc_nrs_resource *res);
 
 	/**
 	 * Obtains a request for handling from the policy, and optionally
@@ -683,8 +686,8 @@ struct ptlrpc_nrs_pol_ops {
 	 * \see ptlrpc_nrs_req_get_nolock()
 	 */
 	struct ptlrpc_nrs_request *
-		(*op_req_get) (struct ptlrpc_nrs_policy *policy, bool peek,
-			       bool force);
+		(*op_req_get)(struct ptlrpc_nrs_policy *policy, bool peek,
+			      bool force);
 	/**
 	 * Called when attempting to add a request to a policy for later
 	 * handling; this operation is mandatory.
@@ -697,8 +700,8 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \see ptlrpc_nrs_req_add_nolock()
 	 */
-	int	(*op_req_enqueue) (struct ptlrpc_nrs_policy *policy,
-				   struct ptlrpc_nrs_request *nrq);
+	int	(*op_req_enqueue)(struct ptlrpc_nrs_policy *policy,
+				  struct ptlrpc_nrs_request *nrq);
 	/**
 	 * Removes a request from the policy's set of pending requests. Normally
 	 * called after a request has been polled successfully from the policy
@@ -707,8 +710,8 @@ struct ptlrpc_nrs_pol_ops {
 	 * \param[in,out] policy The policy the request \a nrq belongs to
 	 * \param[in,out] nrq    The request to dequeue
 	 */
-	void	(*op_req_dequeue) (struct ptlrpc_nrs_policy *policy,
-				   struct ptlrpc_nrs_request *nrq);
+	void	(*op_req_dequeue)(struct ptlrpc_nrs_policy *policy,
+				  struct ptlrpc_nrs_request *nrq);
 	/**
 	 * Called after the request being carried out. Could be used for
 	 * job/resource control; this operation is optional.
@@ -721,8 +724,8 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \see ptlrpc_nrs_req_stop_nolock()
 	 */
-	void	(*op_req_stop) (struct ptlrpc_nrs_policy *policy,
-				struct ptlrpc_nrs_request *nrq);
+	void	(*op_req_stop)(struct ptlrpc_nrs_policy *policy,
+			       struct ptlrpc_nrs_request *nrq);
 	/**
 	 * Registers the policy's lprocfs interface with a PTLRPC service.
 	 *
@@ -731,7 +734,7 @@ struct ptlrpc_nrs_pol_ops {
 	 * \retval 0	success
 	 * \retval != 0	error
 	 */
-	int	(*op_lprocfs_init) (struct ptlrpc_service *svc);
+	int	(*op_lprocfs_init)(struct ptlrpc_service *svc);
 	/**
 	 * Unegisters the policy's lprocfs interface with a PTLRPC service.
 	 *
@@ -743,7 +746,7 @@ struct ptlrpc_nrs_pol_ops {
 	 *
 	 * \param[in] svc The service
 	 */
-	void	(*op_lprocfs_fini) (struct ptlrpc_service *svc);
+	void	(*op_lprocfs_fini)(struct ptlrpc_service *svc);
 };
 
 /**
@@ -1628,7 +1631,7 @@ static inline bool ptlrpc_nrs_req_can_move(struct ptlrpc_request *req)
 /**
  * Returns 1 if request buffer at offset \a index was already swabbed
  */
-static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
+static inline int lustre_req_swabbed(struct ptlrpc_request *req, size_t index)
 {
 	LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
 	return req->rq_req_swab_mask & (1 << index);
@@ -1637,7 +1640,7 @@ static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
 /**
  * Returns 1 if request reply buffer at offset \a index was already swabbed
  */
-static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index)
+static inline int lustre_rep_swabbed(struct ptlrpc_request *req, size_t index)
 {
 	LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
 	return req->rq_rep_swab_mask & (1 << index);
@@ -1662,7 +1665,8 @@ static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req)
 /**
  * Mark request buffer at offset \a index that it was already swabbed
  */
-static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
+static inline void lustre_set_req_swabbed(struct ptlrpc_request *req,
+					  size_t index)
 {
 	LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
 	LASSERT((req->rq_req_swab_mask & (1 << index)) == 0);
@@ -1672,7 +1676,8 @@ static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
 /**
  * Mark request reply buffer at offset \a index that it was already swabbed
  */
-static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index)
+static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req,
+					  size_t index)
 {
 	LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
 	LASSERT((req->rq_rep_swab_mask & (1 << index)) == 0);
@@ -2403,7 +2408,6 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags);
 int ptlrpc_reply(struct ptlrpc_request *req);
 int ptlrpc_send_error(struct ptlrpc_request *req, int difficult);
 int ptlrpc_error(struct ptlrpc_request *req);
-void ptlrpc_resend_req(struct ptlrpc_request *request);
 int ptlrpc_at_get_net_latency(struct ptlrpc_request *req);
 int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
 int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd);
@@ -2423,23 +2427,17 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
 
 int ptlrpc_queue_wait(struct ptlrpc_request *req);
 int ptlrpc_replay_req(struct ptlrpc_request *req);
-int ptlrpc_unregister_reply(struct ptlrpc_request *req, int async);
 void ptlrpc_abort_inflight(struct obd_import *imp);
 void ptlrpc_abort_set(struct ptlrpc_request_set *set);
 
 struct ptlrpc_request_set *ptlrpc_prep_set(void);
 struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
 					     void *arg);
-int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
 int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set);
 int ptlrpc_set_wait(struct ptlrpc_request_set *);
-int ptlrpc_expired_set(void *data);
-void ptlrpc_interrupted_set(void *data);
 void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
 void ptlrpc_set_destroy(struct ptlrpc_request_set *);
 void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
-void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
-			    struct ptlrpc_request *req);
 
 void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
 int ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
@@ -2611,9 +2609,9 @@ int ptlrpc_reconnect_import(struct obd_import *imp);
  * @{
  */
 int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
-			 int index);
+			 u32 index);
 void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
-			    int index);
+			    u32 index);
 int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len);
 int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len);
 
@@ -2632,27 +2630,27 @@ int lustre_shrink_msg(struct lustre_msg *msg, int segment,
 		      unsigned int newlen, int move_data);
 void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
 int __lustre_unpack_msg(struct lustre_msg *m, int len);
-int lustre_msg_hdr_size(__u32 magic, int count);
-int lustre_msg_size(__u32 magic, int count, __u32 *lengths);
-int lustre_msg_size_v2(int count, __u32 *lengths);
-int lustre_packed_msg_size(struct lustre_msg *msg);
-int lustre_msg_early_size(void);
-void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size);
-void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
-int lustre_msg_buflen(struct lustre_msg *m, int n);
-int lustre_msg_bufcount(struct lustre_msg *m);
-char *lustre_msg_string(struct lustre_msg *m, int n, int max_len);
+u32 lustre_msg_hdr_size(__u32 magic, u32 count);
+u32 lustre_msg_size(__u32 magic, int count, __u32 *lengths);
+u32 lustre_msg_size_v2(int count, __u32 *lengths);
+u32 lustre_packed_msg_size(struct lustre_msg *msg);
+u32 lustre_msg_early_size(void);
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size);
+void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 minlen);
+u32 lustre_msg_buflen(struct lustre_msg *m, u32 n);
+u32 lustre_msg_bufcount(struct lustre_msg *m);
+char *lustre_msg_string(struct lustre_msg *m, u32 n, u32 max_len);
 __u32 lustre_msghdr_get_flags(struct lustre_msg *msg);
 void lustre_msghdr_set_flags(struct lustre_msg *msg, __u32 flags);
 __u32 lustre_msg_get_flags(struct lustre_msg *msg);
-void lustre_msg_add_flags(struct lustre_msg *msg, int flags);
-void lustre_msg_set_flags(struct lustre_msg *msg, int flags);
-void lustre_msg_clear_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags);
+void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags);
+void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags);
 __u32 lustre_msg_get_op_flags(struct lustre_msg *msg);
-void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags);
+void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags);
 struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg);
 __u32 lustre_msg_get_type(struct lustre_msg *msg);
-void lustre_msg_add_version(struct lustre_msg *msg, int version);
+void lustre_msg_add_version(struct lustre_msg *msg, u32 version);
 __u32 lustre_msg_get_opc(struct lustre_msg *msg);
 __u64 lustre_msg_get_last_committed(struct lustre_msg *msg);
 __u64 *lustre_msg_get_versions(struct lustre_msg *msg);
diff --git a/drivers/staging/lustre/lustre/include/lustre_param.h b/drivers/staging/lustre/lustre/include/lustre_param.h
index 82aadd32c2b8..8061a04ee806 100644
--- a/drivers/staging/lustre/lustre/include/lustre_param.h
+++ b/drivers/staging/lustre/lustre/include/lustre_param.h
@@ -39,6 +39,9 @@
 #ifndef _LUSTRE_PARAM_H
 #define _LUSTRE_PARAM_H
 
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../../include/linux/lnet/types.h"
+
 /** \defgroup param param
  *
  * @{
diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h
index 5842cb18b49e..5842cb18b49e 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_patchless_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_patchless_compat.h
diff --git a/drivers/staging/lustre/lustre/include/lustre_req_layout.h b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
index 544a43c862b9..a13558e53274 100644
--- a/drivers/staging/lustre/lustre/include/lustre_req_layout.h
+++ b/drivers/staging/lustre/lustre/include/lustre_req_layout.h
@@ -76,7 +76,8 @@ void req_capsule_init(struct req_capsule *pill, struct ptlrpc_request *req,
 void req_capsule_fini(struct req_capsule *pill);
 
 void req_capsule_set(struct req_capsule *pill, const struct req_format *fmt);
-int req_capsule_filled_sizes(struct req_capsule *pill, enum req_location loc);
+size_t req_capsule_filled_sizes(struct req_capsule *pill,
+				enum req_location loc);
 int  req_capsule_server_pack(struct req_capsule *pill);
 
 void *req_capsule_client_get(struct req_capsule *pill,
@@ -86,27 +87,27 @@ void *req_capsule_client_swab_get(struct req_capsule *pill,
 				  void *swabber);
 void *req_capsule_client_sized_get(struct req_capsule *pill,
 				   const struct req_msg_field *field,
-				   int len);
+				   u32 len);
 void *req_capsule_server_get(struct req_capsule *pill,
 			     const struct req_msg_field *field);
 void *req_capsule_server_sized_get(struct req_capsule *pill,
 				   const struct req_msg_field *field,
-				   int len);
+				   u32 len);
 void *req_capsule_server_swab_get(struct req_capsule *pill,
 				  const struct req_msg_field *field,
 				  void *swabber);
 void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
 					const struct req_msg_field *field,
-					int len, void *swabber);
+					u32 len, void *swabber);
 
 void req_capsule_set_size(struct req_capsule *pill,
 			  const struct req_msg_field *field,
-			  enum req_location loc, int size);
-int req_capsule_get_size(const struct req_capsule *pill,
+			  enum req_location loc, u32 size);
+u32 req_capsule_get_size(const struct req_capsule *pill,
 			 const struct req_msg_field *field,
 			 enum req_location loc);
-int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
-int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc);
+u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
 			 enum req_location loc);
 void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt);
 
@@ -115,8 +116,7 @@ int req_capsule_has_field(const struct req_capsule *pill,
 			  enum req_location loc);
 void req_capsule_shrink(struct req_capsule *pill,
 			const struct req_msg_field *field,
-			unsigned int newlen,
-			enum req_location loc);
+			u32 newlen, enum req_location loc);
 int  req_layout_init(void);
 void req_layout_fini(void);
 
@@ -149,14 +149,11 @@ extern struct req_format RQF_MDS_GETATTR;
 extern struct req_format RQF_MDS_GETATTR_NAME;
 extern struct req_format RQF_MDS_CLOSE;
 extern struct req_format RQF_MDS_RELEASE_CLOSE;
-extern struct req_format RQF_MDS_PIN;
-extern struct req_format RQF_MDS_UNPIN;
 extern struct req_format RQF_MDS_CONNECT;
 extern struct req_format RQF_MDS_DISCONNECT;
 extern struct req_format RQF_MDS_GET_INFO;
 extern struct req_format RQF_MDS_READPAGE;
 extern struct req_format RQF_MDS_WRITEPAGE;
-extern struct req_format RQF_MDS_IS_SUBDIR;
 extern struct req_format RQF_MDS_DONE_WRITING;
 extern struct req_format RQF_MDS_REINT;
 extern struct req_format RQF_MDS_REINT_CREATE;
diff --git a/drivers/staging/lustre/lustre/include/lustre_ver.h b/drivers/staging/lustre/lustre/include/lustre_ver.h
index 64559a16f4de..19c9135e2273 100644
--- a/drivers/staging/lustre/lustre/include/lustre_ver.h
+++ b/drivers/staging/lustre/lustre/include/lustre_ver.h
@@ -2,14 +2,21 @@
 #define _LUSTRE_VER_H_
 
 #define LUSTRE_MAJOR 2
-#define LUSTRE_MINOR 4
-#define LUSTRE_PATCH 60
+#define LUSTRE_MINOR 6
+#define LUSTRE_PATCH 99
 #define LUSTRE_FIX 0
-#define LUSTRE_VERSION_STRING "2.4.60"
+#define LUSTRE_VERSION_STRING "2.6.99"
 
-#define LUSTRE_VERSION_CODE OBD_OCD_VERSION(LUSTRE_MAJOR, \
-					    LUSTRE_MINOR, LUSTRE_PATCH, \
-					    LUSTRE_FIX)
+#define OBD_OCD_VERSION(major, minor, patch, fix)			\
+	(((major) << 24) + ((minor) << 16) + ((patch) << 8) + (fix))
+
+#define OBD_OCD_VERSION_MAJOR(version)	((int)((version) >> 24) & 255)
+#define OBD_OCD_VERSION_MINOR(version)	((int)((version) >> 16) & 255)
+#define OBD_OCD_VERSION_PATCH(version)	((int)((version) >>  8) & 255)
+#define OBD_OCD_VERSION_FIX(version)	((int)((version) >>  0) & 255)
+
+#define LUSTRE_VERSION_CODE						\
+	OBD_OCD_VERSION(LUSTRE_MAJOR, LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX)
 
 /*
  * If lustre version of client and servers it connects to differs by more
diff --git a/drivers/staging/lustre/lustre/include/obd.h b/drivers/staging/lustre/lustre/include/obd.h
index a1bc2c478ff9..f6fc4dd05bd6 100644
--- a/drivers/staging/lustre/lustre/include/obd.h
+++ b/drivers/staging/lustre/lustre/include/obd.h
@@ -35,21 +35,13 @@
 
 #include <linux/spinlock.h>
 
-#define IOC_OSC_TYPE	 'h'
-#define IOC_OSC_MIN_NR       20
-#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR       50
-
-#define IOC_MDC_TYPE	 'i'
-#define IOC_MDC_MIN_NR       20
-#define IOC_MDC_MAX_NR       50
-
 #include "lustre/lustre_idl.h"
 #include "lustre_lib.h"
 #include "lu_ref.h"
 #include "lustre_export.h"
 #include "lustre_fid.h"
 #include "lustre_fld.h"
+#include "lustre_handles.h"
 #include "lustre_intent.h"
 
 #define MAX_OBD_DEVICES 8192
@@ -81,6 +73,13 @@ static inline void loi_init(struct lov_oinfo *loi)
 {
 }
 
+/*
+ * If we are unable to get the maximum object size from the OST in
+ * ocd_maxbytes using OBD_CONNECT_MAXBYTES, then we fall back to using
+ * the old maximum object size from ext3.
+ */
+#define LUSTRE_EXT3_STRIPE_MAXBYTES 0x1fffffff000ULL
+
 struct lov_stripe_md {
 	atomic_t     lsm_refc;
 	spinlock_t	lsm_lock;
@@ -89,31 +88,17 @@ struct lov_stripe_md {
 	/* maximum possible file size, might change as OSTs status changes,
 	 * e.g. disconnected, deactivated
 	 */
-	__u64	    lsm_maxbytes;
-	struct {
-		/* Public members. */
-		struct ost_id lw_object_oi; /* lov object id/seq */
-
-		/* LOV-private members start here -- only for use in lov/. */
-		__u32 lw_magic;
-		__u32 lw_stripe_size;      /* size of the stripe */
-		__u32 lw_pattern;	  /* striping pattern (RAID0, RAID1) */
-		__u16 lw_stripe_count;  /* number of objects being striped over */
-		__u16 lw_layout_gen;       /* generation of the layout */
-		char  lw_pool_name[LOV_MAXPOOLNAME]; /* pool name */
-	} lsm_wire;
-
+	__u64		lsm_maxbytes;
+	struct ost_id	lsm_oi;
+	__u32		lsm_magic;
+	__u32		lsm_stripe_size;
+	__u32		lsm_pattern;	/* striping pattern (RAID0, RAID1) */
+	__u16		lsm_stripe_count;
+	__u16		lsm_layout_gen;
+	char		lsm_pool_name[LOV_MAXPOOLNAME + 1];
 	struct lov_oinfo *lsm_oinfo[0];
 };
 
-#define lsm_oi		 lsm_wire.lw_object_oi
-#define lsm_magic	lsm_wire.lw_magic
-#define lsm_layout_gen   lsm_wire.lw_layout_gen
-#define lsm_stripe_size  lsm_wire.lw_stripe_size
-#define lsm_pattern      lsm_wire.lw_pattern
-#define lsm_stripe_count lsm_wire.lw_stripe_count
-#define lsm_pool_name    lsm_wire.lw_pool_name
-
 static inline bool lsm_is_released(struct lov_stripe_md *lsm)
 {
 	return !!(lsm->lsm_pattern & LOV_PATTERN_F_RELEASED);
@@ -177,31 +162,10 @@ struct obd_type {
 struct brw_page {
 	u64 off;
 	struct page *pg;
-	int count;
+	unsigned int count;
 	u32 flag;
 };
 
-/* llog contexts */
-enum llog_ctxt_id {
-	LLOG_CONFIG_ORIG_CTXT  =  0,
-	LLOG_CONFIG_REPL_CTXT,
-	LLOG_MDS_OST_ORIG_CTXT,
-	LLOG_MDS_OST_REPL_CTXT,
-	LLOG_SIZE_ORIG_CTXT,
-	LLOG_SIZE_REPL_CTXT,
-	LLOG_RD1_ORIG_CTXT,
-	LLOG_RD1_REPL_CTXT,
-	LLOG_TEST_ORIG_CTXT,
-	LLOG_TEST_REPL_CTXT,
-	LLOG_LOVEA_ORIG_CTXT,
-	LLOG_LOVEA_REPL_CTXT,
-	LLOG_CHANGELOG_ORIG_CTXT,	/**< changelog generation on mdd */
-	LLOG_CHANGELOG_REPL_CTXT,	/**< changelog access on clients */
-	LLOG_CHANGELOG_USER_ORIG_CTXT,	/**< for multiple changelog consumers */
-	LLOG_AGENT_ORIG_CTXT,		/**< agent requests generation on cdt */
-	LLOG_MAX_CTXTS
-};
-
 struct timeout_item {
 	enum timeout_event ti_event;
 	unsigned long	 ti_timeout;
@@ -211,11 +175,12 @@ struct timeout_item {
 	struct list_head	 ti_chain;
 };
 
-#define OSC_MAX_RIF_DEFAULT       8
-#define OSC_MAX_RIF_MAX	 256
-#define OSC_MAX_DIRTY_DEFAULT  (OSC_MAX_RIF_DEFAULT * 4)
-#define OSC_MAX_DIRTY_MB_MAX   2048     /* arbitrary, but < MAX_LONG bytes */
-#define OSC_DEFAULT_RESENDS      10
+#define OBD_MAX_RIF_DEFAULT	8
+#define OBD_MAX_RIF_MAX		512
+#define OSC_MAX_RIF_MAX		256
+#define OSC_MAX_DIRTY_DEFAULT	(OBD_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_MB_MAX	2048	/* arbitrary, but < MAX_LONG bytes */
+#define OSC_DEFAULT_RESENDS	10
 
 /* possible values for fo_sync_lock_cancel */
 enum {
@@ -225,40 +190,74 @@ enum {
 	NUM_SYNC_ON_CANCEL_STATES
 };
 
-#define MDC_MAX_RIF_DEFAULT       8
-#define MDC_MAX_RIF_MAX	 512
-
 enum obd_cl_sem_lock_class {
 	OBD_CLI_SEM_NORMAL,
 	OBD_CLI_SEM_MGC,
 	OBD_CLI_SEM_MDCOSC,
 };
 
+/*
+ * Limit reply buffer size for striping data to one x86_64 page. This
+ * value is chosen to fit the striping data for common use cases while
+ * staying well below the limit at which the buffer must be backed by
+ * vmalloc(). Excessive use of vmalloc() may cause spinlock contention
+ * on the MDS.
+ */
+#define OBD_MAX_DEFAULT_EA_SIZE		4096
+#define OBD_MAX_DEFAULT_COOKIE_SIZE	4096
+
 struct mdc_rpc_lock;
 struct obd_import;
 struct client_obd {
 	struct rw_semaphore  cl_sem;
 	struct obd_uuid	  cl_target_uuid;
 	struct obd_import       *cl_import; /* ptlrpc connection state */
-	int		      cl_conn_count;
-	/* max_mds_easize is purely a performance thing so we don't have to
-	 * call obd_size_diskmd() all the time.
+	size_t			 cl_conn_count;
+	/*
+	 * Cache maximum and default values for easize and cookiesize. This is
+	 * strictly a performance optimization to minimize calls to
+	 * obd_size_diskmd(). The default values are used to calculate the
+	 * initial size of a request buffer. The ptlrpc layer will resize the
+	 * buffer as needed to accommodate a larger reply from the
+	 * server. The default values should be small enough to avoid wasted
+	 * memory and excessive use of vmalloc(), yet large enough to avoid
+	 * reallocating the buffer in the common use case.
 	 */
-	int			 cl_default_mds_easize;
-	int			 cl_max_mds_easize;
-	int			 cl_default_mds_cookiesize;
-	int			 cl_max_mds_cookiesize;
+	/*
+	 * Default EA size for striping attributes. It is initialized at
+	 * mount-time based on the default stripe width of the filesystem,
+	 * then it tracks the largest observed EA size advertised by
+	 * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE.
+	 */
+	u32			 cl_default_mds_easize;
+	/* Maximum possible EA size computed at mount-time based on
+	 * the number of OSTs in the filesystem. May be increased at
+	 * run-time if a larger observed size is advertised by the MDT.
+	 */
+	u32			 cl_max_mds_easize;
+	/* Default cookie size for llog cookies (see struct llog_cookie). It is
+	 * initialized to zero at mount-time, then it tracks the largest
+	 * observed cookie size advertised by the MDT, up to a maximum value of
+	 * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not
+	 * used by clients communicating with MDS versions 2.4.0 and later.
+	 */
+	u32			 cl_default_mds_cookiesize;
+	/* Maximum possible cookie size computed at mount-time based on
+	 * the number of OSTs in the filesystem. May be increased at
+	 * run-time if a larger observed size is advertised by the MDT.
+	 */
+	u32			 cl_max_mds_cookiesize;
 
 	enum lustre_sec_part     cl_sp_me;
 	enum lustre_sec_part     cl_sp_to;
 	struct sptlrpc_flavor    cl_flvr_mgc;   /* fixed flavor of mgc->mgs */
 
 	/* the grant values are protected by loi_list_lock below */
-	long		     cl_dirty;	 /* all _dirty_ in bytes */
-	long		     cl_dirty_max;     /* allowed w/o rpc */
-	long		     cl_dirty_transit; /* dirty synchronous */
-	long		     cl_avail_grant;   /* bytes of credit for ost */
-	long		     cl_lost_grant;    /* lost credits (trunc) */
+	unsigned long		 cl_dirty_pages;	/* all _dirty_ in pahges */
+	unsigned long		 cl_dirty_max_pages;	/* allowed w/o rpc */
+	unsigned long		 cl_dirty_transit;	/* dirty synchronous */
+	unsigned long		 cl_avail_grant;	/* bytes of credit for ost */
+	unsigned long		 cl_lost_grant;		/* lost credits (trunc) */
 
 	/* since we allocate grant by blocks, we don't know how many grant will
 	 * be used to add a page into cache. As a solution, we reserve maximum
@@ -275,8 +274,7 @@ struct client_obd {
 	 * the extent size. A chunk is max(PAGE_SIZE, OST block size)
 	 */
 	int		  cl_chunkbits;
-	int		  cl_chunk;
-	int		  cl_extent_tax; /* extent overhead, by bytes */
+	unsigned int	  cl_extent_tax; /* extent overhead, by bytes */
 
 	/* keep track of objects that have lois that contain pages which
 	 * have been queued for async brw.  this lock also protects the
@@ -301,13 +299,13 @@ struct client_obd {
 	struct list_head	       cl_loi_hp_ready_list;
 	struct list_head	       cl_loi_write_list;
 	struct list_head	       cl_loi_read_list;
-	int		      cl_r_in_flight;
-	int		      cl_w_in_flight;
+	__u32			 cl_r_in_flight;
+	__u32			 cl_w_in_flight;
 	/* just a sum of the loi/lop pending numbers to be exported by sysfs */
 	atomic_t	     cl_pending_w_pages;
 	atomic_t	     cl_pending_r_pages;
 	__u32			 cl_max_pages_per_rpc;
-	int		      cl_max_rpcs_in_flight;
+	__u32			 cl_max_rpcs_in_flight;
 	struct obd_histogram     cl_read_rpc_hist;
 	struct obd_histogram     cl_write_rpc_hist;
 	struct obd_histogram     cl_read_page_hist;
@@ -318,13 +316,13 @@ struct client_obd {
 	/* lru for osc caching pages */
 	struct cl_client_cache	*cl_cache;
 	struct list_head	 cl_lru_osc; /* member of cl_cache->ccc_lru */
-	atomic_t		*cl_lru_left;
-	atomic_t		 cl_lru_busy;
+	atomic_long_t		*cl_lru_left;
+	atomic_long_t		 cl_lru_busy;
+	atomic_long_t		 cl_lru_in_list;
 	atomic_t		 cl_lru_shrinkers;
-	atomic_t		 cl_lru_in_list;
 	struct list_head	 cl_lru_list; /* lru page list */
 	spinlock_t		 cl_lru_list_lock; /* page list protector */
-	atomic_t		 cl_unstable_count;
+	atomic_long_t		 cl_unstable_count;
 
 	/* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
 	atomic_t	     cl_destroy_in_flight;
@@ -350,7 +348,7 @@ struct client_obd {
 	/* used by quotacheck when the servers are older than 2.4 */
 	int		      cl_qchk_stat; /* quotacheck stat of the peer */
 #define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
-#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 7, 50, 0)
+#if OBD_OCD_VERSION(2, 7, 53, 0) < LUSTRE_VERSION_CODE
 #warning "please consider removing quotacheck compatibility code"
 #endif
 
@@ -431,7 +429,7 @@ struct lov_obd {
 struct lmv_tgt_desc {
 	struct obd_uuid		ltd_uuid;
 	struct obd_export	*ltd_exp;
-	int			ltd_idx;
+	u32			ltd_idx;
 	struct mutex		ltd_fid_mutex;
 	unsigned long		ltd_active:1; /* target up for requests */
 };
@@ -458,9 +456,8 @@ struct lmv_obd {
 	int			max_def_easize;
 	int			max_cookiesize;
 	int			max_def_cookiesize;
-	int			server_timeout;
 
-	int			tgts_size; /* size of tgts array */
+	u32			tgts_size; /* size of tgts array */
 	struct lmv_tgt_desc	**tgts;
 
 	struct obd_connect_data	conn_data;
@@ -470,12 +467,11 @@ struct lmv_obd {
 struct niobuf_local {
 	__u64		lnb_file_offset;
 	__u32		lnb_page_offset;
-	__u32		len;
-	__u32		flags;
-	struct page	*page;
-	struct dentry	*dentry;
-	int		lnb_grant_used;
-	int		rc;
+	__u32		lnb_len;
+	__u32		lnb_flags;
+	struct page	*lnb_page;
+	void		*lnb_data;
+	int		lnb_rc;
 };
 
 #define LUSTRE_FLD_NAME	 "fld"
@@ -517,7 +513,6 @@ struct niobuf_local {
 #define N_LOCAL_TEMP_PAGE 0x10000000
 
 struct obd_trans_info {
-	__u64		    oti_transno;
 	__u64		    oti_xid;
 	/* Only used on the server side for tracking acks. */
 	struct oti_req_ack_lock {
@@ -527,50 +522,11 @@ struct obd_trans_info {
 	void		    *oti_handle;
 	struct llog_cookie       oti_onecookie;
 	struct llog_cookie      *oti_logcookies;
-	int		      oti_numcookies;
-	/** synchronous write is needed */
-	unsigned long		 oti_sync_write:1;
 
-	/* initial thread handling transaction */
-	struct ptlrpc_thread *oti_thread;
-	__u32		    oti_conn_cnt;
 	/** VBR: versions */
 	__u64		    oti_pre_version;
-	/** JobID */
-	char		    *oti_jobid;
-
-	struct obd_uuid	 *oti_ost_uuid;
 };
 
-static inline void oti_alloc_cookies(struct obd_trans_info *oti,
-				     int num_cookies)
-{
-	if (!oti)
-		return;
-
-	if (num_cookies == 1)
-		oti->oti_logcookies = &oti->oti_onecookie;
-	else
-		oti->oti_logcookies = libcfs_kvzalloc(num_cookies * sizeof(oti->oti_onecookie),
-						      GFP_NOFS);
-
-	oti->oti_numcookies = num_cookies;
-}
-
-static inline void oti_free_cookies(struct obd_trans_info *oti)
-{
-	if (!oti || !oti->oti_logcookies)
-		return;
-
-	if (oti->oti_logcookies == &oti->oti_onecookie)
-		LASSERT(oti->oti_numcookies == 1);
-	else
-		kvfree(oti->oti_logcookies);
-
-	oti->oti_logcookies = NULL;
-	oti->oti_numcookies = 0;
-}
-
 /*
  * Events signalled through obd_notify() upcall-chain.
  */
@@ -616,7 +572,6 @@ struct target_recovery_data {
 };
 
 struct obd_llog_group {
-	int		olg_seq;
 	struct llog_ctxt  *olg_ctxts[LLOG_MAX_CTXTS];
 	wait_queue_head_t	olg_waitq;
 	spinlock_t	   olg_lock;
@@ -625,7 +580,6 @@ struct obd_llog_group {
 
 /* corresponds to one of the obd's */
 #define OBD_DEVICE_MAGIC	0XAB5CD6EF
-#define OBD_DEV_BY_DEVNAME      0xffffd0de
 
 struct lvfs_run_ctxt {
 	struct dt_device *dt;
@@ -653,7 +607,6 @@ struct obd_device {
 		      obd_starting:1,      /* started setup */
 		      obd_force:1,	 /* cleanup with > 0 obd refcount */
 		      obd_fail:1,	 /* cleanup with failover */
-		      obd_async_recov:1, /* allow asynchronous orphan cleanup */
 		      obd_no_conn:1,       /* deny new connections */
 		      obd_inactive:1,      /* device active/inactive
 					    * (for sysfs status only!!)
@@ -728,9 +681,6 @@ struct obd_device {
 	struct completion	obd_kobj_unregister;
 };
 
-#define OBD_LLOG_FL_SENDNOW     0x0001
-#define OBD_LLOG_FL_EXIT	0x0002
-
 enum obd_cleanup_stage {
 /* Special case hack for MDS LOVs */
 	OBD_CLEANUP_EARLY,
@@ -740,8 +690,6 @@ enum obd_cleanup_stage {
 
 /* get/set_info keys */
 #define KEY_ASYNC	       "async"
-#define KEY_BLOCKSIZE_BITS      "blocksize_bits"
-#define KEY_BLOCKSIZE	   "blocksize"
 #define KEY_CHANGELOG_CLEAR     "changelog_clear"
 #define KEY_FID2PATH	    "fid2path"
 #define KEY_CHECKSUM	    "checksum"
@@ -753,30 +701,22 @@ enum obd_cleanup_stage {
 #define KEY_GRANT_SHRINK	"grant_shrink"
 #define KEY_HSM_COPYTOOL_SEND   "hsm_send"
 #define KEY_INIT_RECOV_BACKUP   "init_recov_bk"
-#define KEY_INIT_RECOV	  "initial_recov"
 #define KEY_INTERMDS	    "inter_mds"
 #define KEY_LAST_ID	     "last_id"
 #define KEY_LAST_FID		"last_fid"
-#define KEY_LOCK_TO_STRIPE      "lock_to_stripe"
 #define KEY_LOVDESC	     "lovdesc"
-#define KEY_LOV_IDX	     "lov_idx"
 #define KEY_MAX_EASIZE		"max_easize"
 #define KEY_DEFAULT_EASIZE	"default_easize"
-#define KEY_MDS_CONN	    "mds_conn"
 #define KEY_MGSSEC	      "mgssec"
-#define KEY_NEXT_ID	     "next_id"
 #define KEY_READ_ONLY	   "read-only"
 #define KEY_REGISTER_TARGET     "register_target"
 #define KEY_SET_FS	      "set_fs"
 #define KEY_TGT_COUNT	   "tgt_count"
 /*      KEY_SET_INFO in lustre_idl.h */
 #define KEY_SPTLRPC_CONF	"sptlrpc_conf"
-#define KEY_CONNECT_FLAG	"connect_flags"
-#define KEY_SYNC_LOCK_CANCEL    "sync_lock_cancel"
 
 #define KEY_CACHE_SET		"cache_set"
 #define KEY_CACHE_LRU_SHRINK	"cache_lru_shrink"
-#define KEY_CHANGELOG_INDEX	"changelog_index"
 
 struct lu_context;
 
@@ -801,9 +741,11 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
 	/* CREAT needs to be tested before open (both could be set) */
 	if (it->it_op & IT_CREAT)
 		return LCK_CW;
-	else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP |
+	else if (it->it_op & (IT_GETATTR | IT_OPEN | IT_LOOKUP |
 			      IT_LAYOUT))
 		return LCK_CR;
+	else if (it->it_op & IT_READDIR)
+		return LCK_PR;
 	else if (it->it_op &  IT_GETXATTR)
 		return LCK_PR;
 	else if (it->it_op &  IT_SETXATTR)
@@ -813,6 +755,14 @@ static inline int it_to_lock_mode(struct lookup_intent *it)
 	return -EINVAL;
 }
 
+enum md_cli_flags {
+	CLI_SET_MEA	= BIT(0),
+	CLI_RM_ENTRY	= BIT(1),
+	CLI_HASH64	= BIT(2),
+	CLI_API32	= BIT(3),
+	CLI_MIGRATE	= BIT(4),
+};
+
 struct md_op_data {
 	struct lu_fid	   op_fid1; /* operation fid1 (usually parent) */
 	struct lu_fid	   op_fid2; /* operation fid2 (usually child) */
@@ -822,7 +772,7 @@ struct md_op_data {
 	struct lustre_handle    op_handle;
 	s64			op_mod_time;
 	const char	     *op_name;
-	int		     op_namelen;
+	size_t			op_namelen;
 	__u32		   op_mode;
 	struct lmv_stripe_md   *op_mea1;
 	struct lmv_stripe_md   *op_mea2;
@@ -831,6 +781,7 @@ struct md_op_data {
 	__u32		   op_fsgid;
 	cfs_cap_t	       op_cap;
 	void		   *op_data;
+	size_t			op_data_size;
 
 	/* iattr fields and blocks. */
 	struct iattr	    op_attr;
@@ -845,28 +796,29 @@ struct md_op_data {
 	/* Various operation flags. */
 	enum mds_op_bias        op_bias;
 
-	/* Operation type */
-	__u32		   op_opc;
-
 	/* Used by readdir */
 	__u64		   op_offset;
 
 	/* Used by readdir */
-	__u32		   op_npages;
+	__u32			op_max_pages;
 
 	/* used to transfer info between the stacks of MD client
 	 * see enum op_cli_flags
 	 */
-	__u32			op_cli_flags;
+	enum md_cli_flags	op_cli_flags;
 
 	/* File object data version for HSM release, on client */
 	__u64			op_data_version;
 	struct lustre_handle	op_lease_handle;
+
+	/* default stripe offset */
+	__u32			op_default_stripe_offset;
 };
 
-enum op_cli_flags {
-	CLI_SET_MEA	= 1 << 0,
-	CLI_RM_ENTRY	= 1 << 1,
+struct md_callback {
+	int (*md_blocking_ast)(struct ldlm_lock *lock,
+			       struct ldlm_lock_desc *desc,
+			       void *data, int flag);
 };
 
 struct md_enqueue_info;
@@ -879,8 +831,7 @@ struct md_enqueue_info {
 	struct inode	   *mi_dir;
 	int (*mi_cb)(struct ptlrpc_request *req,
 		     struct md_enqueue_info *minfo, int rc);
-	__u64		   mi_cbdata;
-	unsigned int	    mi_generation;
+	void			*mi_cbdata;
 };
 
 struct obd_ops {
@@ -894,8 +845,6 @@ struct obd_ops {
 			      __u32 keylen, void *key,
 			      __u32 vallen, void *val,
 			      struct ptlrpc_request_set *set);
-	int (*attach)(struct obd_device *dev, u32 len, void *data);
-	int (*detach)(struct obd_device *dev);
 	int (*setup)(struct obd_device *dev, struct lustre_cfg *cfg);
 	int (*precleanup)(struct obd_device *dev,
 			  enum obd_cleanup_stage cleanup_stage);
@@ -927,8 +876,8 @@ struct obd_ops {
 	int (*fid_fini)(struct obd_device *obd);
 
 	/* Allocate new fid according to passed @hint. */
-	int (*fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
-			 struct md_op_data *op_data);
+	int (*fid_alloc)(const struct lu_env *env, struct obd_export *exp,
+			 struct lu_fid *fid, struct md_op_data *op_data);
 
 	/*
 	 * Object with @fid is getting deleted, we may want to do something
@@ -943,13 +892,10 @@ struct obd_ops {
 	int (*unpackmd)(struct obd_export *exp,
 			struct lov_stripe_md **mem_tgt,
 			struct lov_mds_md *disk_src, int disk_len);
-	int (*preallocate)(struct lustre_handle *, u32 *req, u64 *ids);
 	int (*create)(const struct lu_env *env, struct obd_export *exp,
-		      struct obdo *oa, struct lov_stripe_md **ea,
-		      struct obd_trans_info *oti);
+		      struct obdo *oa, struct obd_trans_info *oti);
 	int (*destroy)(const struct lu_env *env, struct obd_export *exp,
-		       struct obdo *oa, struct lov_stripe_md *ea,
-		       struct obd_trans_info *oti, struct obd_export *md_exp);
+		       struct obdo *oa, struct obd_trans_info *oti);
 	int (*setattr)(const struct lu_env *, struct obd_export *exp,
 		       struct obd_info *oinfo, struct obd_trans_info *oti);
 	int (*setattr_async)(struct obd_export *exp, struct obd_info *oinfo,
@@ -959,8 +905,6 @@ struct obd_ops {
 		       struct obd_info *oinfo);
 	int (*getattr_async)(struct obd_export *exp, struct obd_info *oinfo,
 			     struct ptlrpc_request_set *set);
-	int (*adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
-			  u64 size, int shrink);
 	int (*preprw)(const struct lu_env *env, int cmd,
 		      struct obd_export *exp, struct obdo *oa, int objcount,
 		      struct obd_ioobj *obj, struct niobuf_remote *remote,
@@ -972,8 +916,6 @@ struct obd_ops {
 			struct niobuf_remote *remote, int pages,
 			struct niobuf_local *local,
 			struct obd_trans_info *oti, int rc);
-	int (*find_cbdata)(struct obd_export *, struct lov_stripe_md *,
-			   ldlm_iterator_t it, void *data);
 	int (*init_export)(struct obd_export *exp);
 	int (*destroy_export)(struct obd_export *exp);
 
@@ -1009,27 +951,11 @@ struct obd_ops {
 	 */
 };
 
-enum {
-	LUSTRE_OPC_MKDIR    = (1 << 0),
-	LUSTRE_OPC_SYMLINK  = (1 << 1),
-	LUSTRE_OPC_MKNOD    = (1 << 2),
-	LUSTRE_OPC_CREATE   = (1 << 3),
-	LUSTRE_OPC_ANY      = (1 << 4)
-};
-
 /* lmv structures */
-#define MEA_MAGIC_LAST_CHAR      0xb2221ca1
-#define MEA_MAGIC_ALL_CHARS      0xb222a11c
-#define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
-
-#define MAX_HASH_SIZE_32	 0x7fffffffUL
-#define MAX_HASH_SIZE	    0x7fffffffffffffffULL
-#define MAX_HASH_HIGHEST_BIT     0x1000000000000000ULL
-
 struct lustre_md {
 	struct mdt_body	 *body;
 	struct lov_stripe_md    *lsm;
-	struct lmv_stripe_md    *mea;
+	struct lmv_stripe_md    *lmv;
 #ifdef CONFIG_FS_POSIX_ACL
 	struct posix_acl	*posix_acl;
 #endif
@@ -1044,48 +970,55 @@ struct md_open_data {
 	bool			  mod_is_create;
 };
 
+struct obd_client_handle {
+	struct lustre_handle	 och_fh;
+	struct lu_fid		 och_fid;
+	struct md_open_data	*och_mod;
+	struct lustre_handle	 och_lease_handle; /* open lock for lease */
+	__u32			 och_magic;
+	int			 och_flags;
+};
+
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
 struct lookup_intent;
+struct cl_attr;
 
 struct md_ops {
 	int (*getstatus)(struct obd_export *, struct lu_fid *);
 	int (*null_inode)(struct obd_export *, const struct lu_fid *);
-	int (*find_cbdata)(struct obd_export *, const struct lu_fid *,
-			   ldlm_iterator_t, void *);
 	int (*close)(struct obd_export *, struct md_op_data *,
 		     struct md_open_data *, struct ptlrpc_request **);
 	int (*create)(struct obd_export *, struct md_op_data *,
-		      const void *, int, int, __u32, __u32, cfs_cap_t,
-		      __u64, struct ptlrpc_request **);
+		      const void *, size_t, umode_t, uid_t, gid_t,
+		      cfs_cap_t, __u64, struct ptlrpc_request **);
 	int (*done_writing)(struct obd_export *, struct md_op_data  *,
 			    struct md_open_data *);
 	int (*enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
+		       const ldlm_policy_data_t *,
 		       struct lookup_intent *, struct md_op_data *,
-		       struct lustre_handle *, void *, int,
-		       struct ptlrpc_request **, __u64);
+		       struct lustre_handle *, __u64);
 	int (*getattr)(struct obd_export *, struct md_op_data *,
 		       struct ptlrpc_request **);
 	int (*getattr_name)(struct obd_export *, struct md_op_data *,
 			    struct ptlrpc_request **);
 	int (*intent_lock)(struct obd_export *, struct md_op_data *,
-			   void *, int, struct lookup_intent *, int,
+			   struct lookup_intent *,
 			   struct ptlrpc_request **,
 			   ldlm_blocking_callback, __u64);
 	int (*link)(struct obd_export *, struct md_op_data *,
 		    struct ptlrpc_request **);
 	int (*rename)(struct obd_export *, struct md_op_data *,
-		      const char *, int, const char *, int,
+		      const char *, size_t, const char *, size_t,
 		      struct ptlrpc_request **);
-	int (*is_subdir)(struct obd_export *, const struct lu_fid *,
-			 const struct lu_fid *,
-			   struct ptlrpc_request **);
 	int (*setattr)(struct obd_export *, struct md_op_data *, void *,
-		       int, void *, int, struct ptlrpc_request **,
+		       size_t, void *, size_t, struct ptlrpc_request **,
 			 struct md_open_data **mod);
 	int (*sync)(struct obd_export *, const struct lu_fid *,
 		    struct ptlrpc_request **);
-	int (*readpage)(struct obd_export *, struct md_op_data *,
-			struct page **, struct ptlrpc_request **);
-
+	int (*read_page)(struct obd_export *, struct md_op_data *,
+			 struct md_callback *cb_op, __u64 hash_offset,
+			 struct page **ppage);
 	int (*unlink)(struct obd_export *, struct md_op_data *,
 		      struct ptlrpc_request **);
 
@@ -1097,7 +1030,7 @@ struct md_ops {
 			u64, const char *, const char *, int, int, int,
 			struct ptlrpc_request **);
 
-	int (*init_ea_size)(struct obd_export *, int, int, int, int);
+	int (*init_ea_size)(struct obd_export *, u32, u32, u32, u32);
 
 	int (*get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
 			     struct obd_export *, struct obd_export *,
@@ -1105,12 +1038,17 @@ struct md_ops {
 
 	int (*free_lustre_md)(struct obd_export *, struct lustre_md *);
 
+	int (*merge_attr)(struct obd_export *,
+			  const struct lmv_stripe_md *lsm,
+			  struct cl_attr *attr, ldlm_blocking_callback);
+
 	int (*set_open_replay_data)(struct obd_export *,
 				    struct obd_client_handle *,
 				    struct lookup_intent *);
 	int (*clear_open_replay_data)(struct obd_export *,
 				      struct obd_client_handle *);
-	int (*set_lock_data)(struct obd_export *, __u64 *, void *, __u64 *);
+	int (*set_lock_data)(struct obd_export *, const struct lustre_handle *,
+			     void *, __u64 *);
 
 	enum ldlm_mode (*lock_match)(struct obd_export *, __u64,
 				     const struct lu_fid *, enum ldlm_type,
@@ -1121,6 +1059,11 @@ struct md_ops {
 			     ldlm_policy_data_t *, enum ldlm_mode,
 			     enum ldlm_cancel_flags flags, void *opaque);
 
+	int (*get_fid_from_lsm)(struct obd_export *,
+				const struct lmv_stripe_md *,
+				const char *name, int namelen,
+				struct lu_fid *fid);
+
 	int (*intent_getattr_async)(struct obd_export *,
 				    struct md_enqueue_info *,
 				    struct ldlm_enqueue_info *);
@@ -1137,8 +1080,6 @@ struct md_ops {
 
 struct lsm_operations {
 	void (*lsm_free)(struct lov_stripe_md *);
-	int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
-			   struct obd_export *md_exp);
 	void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, u64 *,
 				    u64 *);
 	void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, u64 *,
@@ -1164,10 +1105,6 @@ static inline const struct lsm_operations *lsm_op_find(int magic)
 	}
 }
 
-/* Requests for obd_extent_calc() */
-#define OBD_CALC_STRIPE_START   1
-#define OBD_CALC_STRIPE_END     2
-
 static inline struct md_open_data *obd_mod_alloc(void)
 {
 	struct md_open_data *mod;
@@ -1211,7 +1148,8 @@ static inline const char *lu_dev_name(const struct lu_device *lu_dev)
 	return lu_dev->ld_obd->obd_name;
 }
 
-static inline bool filename_is_volatile(const char *name, int namelen, int *idx)
+static inline bool filename_is_volatile(const char *name, size_t namelen,
+					int *idx)
 {
 	const char	*start;
 	char		*end;
@@ -1259,4 +1197,28 @@ static inline int cli_brw_size(struct obd_device *obd)
 	return obd->u.cli.cl_max_pages_per_rpc << PAGE_SHIFT;
 }
 
+/*
+ * when RPC size or the max RPCs in flight is increased, the max dirty pages
+ * of the client should be increased accordingly to avoid sending fragmented
+ * RPCs over the network when the client runs out of the maximum dirty space
+ * when so many RPCs are being generated.
+ */
+static inline void client_adjust_max_dirty(struct client_obd *cli)
+{
+	/* initializing */
+	if (cli->cl_dirty_max_pages <= 0)
+		cli->cl_dirty_max_pages =
+			(OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) >> PAGE_SHIFT;
+	else {
+		unsigned long dirty_max = cli->cl_max_rpcs_in_flight *
+					  cli->cl_max_pages_per_rpc;
+
+		if (dirty_max > cli->cl_dirty_max_pages)
+			cli->cl_dirty_max_pages = dirty_max;
+	}
+
+	if (cli->cl_dirty_max_pages > totalram_pages / 8)
+		cli->cl_dirty_max_pages = totalram_pages / 8;
+}
+
 #endif /* __OBD_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_class.h b/drivers/staging/lustre/lustre/include/obd_class.h
index 6482a937000b..16094dbec08b 100644
--- a/drivers/staging/lustre/lustre/include/obd_class.h
+++ b/drivers/staging/lustre/lustre/include/obd_class.h
@@ -56,7 +56,6 @@
 #define OBD_STATFS_FOR_MDT0	0x0008	/* The statfs is only for retrieving
 					 * information from MDT0.
 					 */
-#define OBD_FL_PUNCH	    0x00000001  /* To indicate it is punch operation */
 
 /* OBD Device Declarations */
 extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
@@ -97,6 +96,11 @@ int obd_zombie_impexp_init(void);
 void obd_zombie_impexp_stop(void);
 void obd_zombie_barrier(void);
 
+int obd_get_request_slot(struct client_obd *cli);
+void obd_put_request_slot(struct client_obd *cli);
+__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli);
+int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max);
+
 struct llog_handle;
 struct llog_rec_hdr;
 typedef int (*llog_cb_t)(const struct lu_env *, struct llog_handle *,
@@ -265,10 +269,10 @@ static inline int lprocfs_climp_check(struct obd_device *obd)
 struct inode;
 struct lu_attr;
 struct obdo;
-void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid);
+void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid);
 
-void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
-void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid);
+void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj);
+void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid);
 
 #define OBT(dev)	(dev)->obd_type
 #define OBP(dev, op)    (dev)->obd_type->typ_dt_ops->op
@@ -673,15 +677,6 @@ static inline int obd_unpackmd(struct obd_export *exp,
 	return rc;
 }
 
-/* helper functions */
-static inline int obd_alloc_memmd(struct obd_export *exp,
-				  struct lov_stripe_md **mem_tgt)
-{
-	LASSERT(mem_tgt);
-	LASSERT(!*mem_tgt);
-	return obd_unpackmd(exp, mem_tgt, NULL, 0);
-}
-
 static inline int obd_free_memmd(struct obd_export *exp,
 				 struct lov_stripe_md **mem_tgt)
 {
@@ -695,29 +690,26 @@ static inline int obd_free_memmd(struct obd_export *exp,
 }
 
 static inline int obd_create(const struct lu_env *env, struct obd_export *exp,
-			     struct obdo *obdo, struct lov_stripe_md **ea,
-			     struct obd_trans_info *oti)
+			     struct obdo *obdo, struct obd_trans_info *oti)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, create);
 	EXP_COUNTER_INCREMENT(exp, create);
 
-	rc = OBP(exp->exp_obd, create)(env, exp, obdo, ea, oti);
+	rc = OBP(exp->exp_obd, create)(env, exp, obdo, oti);
 	return rc;
 }
 
 static inline int obd_destroy(const struct lu_env *env, struct obd_export *exp,
-			      struct obdo *obdo, struct lov_stripe_md *ea,
-			      struct obd_trans_info *oti,
-			      struct obd_export *md_exp)
+			      struct obdo *obdo, struct obd_trans_info *oti)
 {
 	int rc;
 
 	EXP_CHECK_DT_OP(exp, destroy);
 	EXP_COUNTER_INCREMENT(exp, destroy);
 
-	rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, ea, oti, md_exp);
+	rc = OBP(exp->exp_obd, destroy)(env, exp, obdo, oti);
 	return rc;
 }
 
@@ -925,7 +917,8 @@ static inline int obd_fid_fini(struct obd_device *obd)
 	return rc;
 }
 
-static inline int obd_fid_alloc(struct obd_export *exp,
+static inline int obd_fid_alloc(const struct lu_env *env,
+				struct obd_export *exp,
 				struct lu_fid *fid,
 				struct md_op_data *op_data)
 {
@@ -934,7 +927,7 @@ static inline int obd_fid_alloc(struct obd_export *exp,
 	EXP_CHECK_DT_OP(exp, fid_alloc);
 	EXP_COUNTER_INCREMENT(exp, fid_alloc);
 
-	rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, op_data);
+	rc = OBP(exp->exp_obd, fid_alloc)(env, exp, fid, op_data);
 	return rc;
 }
 
@@ -1147,19 +1140,6 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd,
 	return rc;
 }
 
-static inline int obd_adjust_kms(struct obd_export *exp,
-				 struct lov_stripe_md *lsm, u64 size,
-				 int shrink)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, adjust_kms);
-	EXP_COUNTER_INCREMENT(exp, adjust_kms);
-
-	rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
-	return rc;
-}
-
 static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
 				int len, void *karg, void __user *uarg)
 {
@@ -1172,19 +1152,6 @@ static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
 	return rc;
 }
 
-static inline int obd_find_cbdata(struct obd_export *exp,
-				  struct lov_stripe_md *lsm,
-				  ldlm_iterator_t it, void *data)
-{
-	int rc;
-
-	EXP_CHECK_DT_OP(exp, find_cbdata);
-	EXP_COUNTER_INCREMENT(exp, find_cbdata);
-
-	rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data);
-	return rc;
-}
-
 static inline void obd_import_event(struct obd_device *obd,
 				    struct obd_import *imp,
 				    enum obd_import_event event)
@@ -1210,12 +1177,7 @@ static inline int obd_notify(struct obd_device *obd,
 	if (rc)
 		return rc;
 
-	/* the check for async_recov is a complete hack - I'm hereby
-	 * overloading the meaning to also mean "this was called from
-	 * mds_postsetup".  I know that my mds is able to handle notifies
-	 * by this point, and it needs to get them to execute mds_postrecov.
-	 */
-	if (!obd->obd_set_up && !obd->obd_async_recov) {
+	if (!obd->obd_set_up) {
 		CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
 		return -EINVAL;
 	}
@@ -1358,18 +1320,6 @@ static inline int md_null_inode(struct obd_export *exp,
 	return rc;
 }
 
-static inline int md_find_cbdata(struct obd_export *exp,
-				 const struct lu_fid *fid,
-				 ldlm_iterator_t it, void *data)
-{
-	int rc;
-
-	EXP_CHECK_MD_OP(exp, find_cbdata);
-	EXP_MD_COUNTER_INCREMENT(exp, find_cbdata);
-	rc = MDP(exp->exp_obd, find_cbdata)(exp, fid, it, data);
-	return rc;
-}
-
 static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
 			   struct md_open_data *mod,
 			   struct ptlrpc_request **request)
@@ -1383,9 +1333,9 @@ static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
-			    const void *data, int datalen, int mode, __u32 uid,
-			    __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
-			    struct ptlrpc_request **request)
+			    const void *data, size_t datalen, umode_t mode,
+			    uid_t uid, gid_t gid, cfs_cap_t cap_effective,
+			    __u64 rdev, struct ptlrpc_request **request)
 {
 	int rc;
 
@@ -1410,19 +1360,18 @@ static inline int md_done_writing(struct obd_export *exp,
 
 static inline int md_enqueue(struct obd_export *exp,
 			     struct ldlm_enqueue_info *einfo,
+			     const ldlm_policy_data_t *policy,
 			     struct lookup_intent *it,
 			     struct md_op_data *op_data,
 			     struct lustre_handle *lockh,
-			     void *lmm, int lmmsize,
-			     struct ptlrpc_request **req,
 			     __u64 extra_lock_flags)
 {
 	int rc;
 
 	EXP_CHECK_MD_OP(exp, enqueue);
 	EXP_MD_COUNTER_INCREMENT(exp, enqueue);
-	rc = MDP(exp->exp_obd, enqueue)(exp, einfo, it, op_data, lockh,
-					lmm, lmmsize, req, extra_lock_flags);
+	rc = MDP(exp->exp_obd, enqueue)(exp, einfo, policy, it, op_data, lockh,
+					extra_lock_flags);
 	return rc;
 }
 
@@ -1439,9 +1388,9 @@ static inline int md_getattr_name(struct obd_export *exp,
 }
 
 static inline int md_intent_lock(struct obd_export *exp,
-				 struct md_op_data *op_data, void *lmm,
-				 int lmmsize, struct lookup_intent *it,
-				 int lookup_flags, struct ptlrpc_request **reqp,
+				 struct md_op_data *op_data,
+				 struct lookup_intent *it,
+				 struct ptlrpc_request **reqp,
 				 ldlm_blocking_callback cb_blocking,
 				 __u64 extra_lock_flags)
 {
@@ -1449,9 +1398,8 @@ static inline int md_intent_lock(struct obd_export *exp,
 
 	EXP_CHECK_MD_OP(exp, intent_lock);
 	EXP_MD_COUNTER_INCREMENT(exp, intent_lock);
-	rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize,
-					    it, lookup_flags, reqp, cb_blocking,
-					    extra_lock_flags);
+	rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, it, reqp,
+					    cb_blocking, extra_lock_flags);
 	return rc;
 }
 
@@ -1467,8 +1415,8 @@ static inline int md_link(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
-			    const char *old, int oldlen, const char *new,
-			    int newlen, struct ptlrpc_request **request)
+			    const char *old, size_t oldlen, const char *new,
+			    size_t newlen, struct ptlrpc_request **request)
 {
 	int rc;
 
@@ -1479,21 +1427,8 @@ static inline int md_rename(struct obd_export *exp, struct md_op_data *op_data,
 	return rc;
 }
 
-static inline int md_is_subdir(struct obd_export *exp,
-			       const struct lu_fid *pfid,
-			       const struct lu_fid *cfid,
-			       struct ptlrpc_request **request)
-{
-	int rc;
-
-	EXP_CHECK_MD_OP(exp, is_subdir);
-	EXP_MD_COUNTER_INCREMENT(exp, is_subdir);
-	rc = MDP(exp->exp_obd, is_subdir)(exp, pfid, cfid, request);
-	return rc;
-}
-
 static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
-			     void *ea, int ealen, void *ea2, int ea2len,
+			     void *ea, size_t ealen, void *ea2, size_t ea2len,
 			     struct ptlrpc_request **request,
 			     struct md_open_data **mod)
 {
@@ -1517,15 +1452,18 @@ static inline int md_sync(struct obd_export *exp, const struct lu_fid *fid,
 	return rc;
 }
 
-static inline int md_readpage(struct obd_export *exp, struct md_op_data *opdata,
-			      struct page **pages,
-			      struct ptlrpc_request **request)
+static inline int md_read_page(struct obd_export *exp,
+			       struct md_op_data *op_data,
+			       struct md_callback *cb_op,
+			       __u64  hash_offset,
+			       struct page **ppage)
 {
 	int rc;
 
-	EXP_CHECK_MD_OP(exp, readpage);
-	EXP_MD_COUNTER_INCREMENT(exp, readpage);
-	rc = MDP(exp->exp_obd, readpage)(exp, opdata, pages, request);
+	EXP_CHECK_MD_OP(exp, read_page);
+	EXP_MD_COUNTER_INCREMENT(exp, read_page);
+	rc = MDP(exp->exp_obd, read_page)(exp, op_data, cb_op, hash_offset,
+					  ppage);
 	return rc;
 }
 
@@ -1559,6 +1497,16 @@ static inline int md_free_lustre_md(struct obd_export *exp,
 	return MDP(exp->exp_obd, free_lustre_md)(exp, md);
 }
 
+static inline int md_merge_attr(struct obd_export *exp,
+				const struct lmv_stripe_md *lsm,
+				struct cl_attr *attr,
+				ldlm_blocking_callback cb)
+{
+	EXP_CHECK_MD_OP(exp, merge_attr);
+	EXP_MD_COUNTER_INCREMENT(exp, merge_attr);
+	return MDP(exp->exp_obd, merge_attr)(exp, lsm, attr, cb);
+}
+
 static inline int md_setxattr(struct obd_export *exp, const struct lu_fid *fid,
 			      u64 valid, const char *name,
 			      const char *input, int input_size,
@@ -1603,7 +1551,8 @@ static inline int md_clear_open_replay_data(struct obd_export *exp,
 }
 
 static inline int md_set_lock_data(struct obd_export *exp,
-				   __u64 *lockh, void *data, __u64 *bits)
+				   const struct lustre_handle *lockh,
+				   void *data, __u64 *bits)
 {
 	EXP_CHECK_MD_OP(exp, set_lock_data);
 	EXP_MD_COUNTER_INCREMENT(exp, set_lock_data);
@@ -1674,6 +1623,19 @@ static inline int md_revalidate_lock(struct obd_export *exp,
 	return rc;
 }
 
+static inline int md_get_fid_from_lsm(struct obd_export *exp,
+				      const struct lmv_stripe_md *lsm,
+				      const char *name, int namelen,
+				      struct lu_fid *fid)
+{
+	int rc;
+
+	EXP_CHECK_MD_OP(exp, get_fid_from_lsm);
+	EXP_MD_COUNTER_INCREMENT(exp, get_fid_from_lsm);
+	rc = MDP(exp->exp_obd, get_fid_from_lsm)(exp, lsm, name, namelen, fid);
+	return rc;
+}
+
 /* OBD Metadata Support */
 
 int obd_init_caches(void);
@@ -1682,16 +1644,6 @@ void obd_cleanup_caches(void);
 /* support routines */
 extern struct kmem_cache *obdo_cachep;
 
-static inline void obdo2fid(struct obdo *oa, struct lu_fid *fid)
-{
-	/* something here */
-}
-
-static inline void fid2obdo(struct lu_fid *fid, struct obdo *oa)
-{
-	/* something here */
-}
-
 typedef int (*register_lwp_cb)(void *data);
 
 struct lwp_register_item {
@@ -1710,6 +1662,9 @@ struct lwp_register_item {
 extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
 /* obd_mount.c */
+int lustre_unregister_fs(void);
+int lustre_register_fs(void);
+int lustre_check_exclusion(struct super_block *sb, char *svname);
 
 /* sysctl.c */
 int obd_sysctl_init(void);
@@ -1730,8 +1685,24 @@ void class_exit_uuidlist(void);
 extern char obd_jobid_node[];
 extern struct miscdevice obd_psdev;
 extern spinlock_t obd_types_lock;
+int class_procfs_init(void);
+int class_procfs_clean(void);
 
 /* prng.c */
 #define ll_generate_random_uuid(uuid_out) cfs_get_random_bytes(uuid_out, sizeof(class_uuid_t))
 
+/* statfs_pack.c */
+struct kstatfs;
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
+
+/* root squash info */
+struct rw_semaphore;
+struct root_squash_info {
+	uid_t			rsi_uid;
+	gid_t			rsi_gid;
+	struct list_head	rsi_nosquash_nids;
+	struct rw_semaphore	rsi_sem;
+};
+
 #endif /* __LINUX_OBD_CLASS_H */
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index 845e64a56c21..b346a7f10aa4 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -35,7 +35,7 @@
 
 #include <linux/slab.h>
 #include "../../include/linux/libcfs/libcfs.h"
-#include "linux/lustre_compat25.h"
+#include "lustre_compat.h"
 #include "lprocfs_status.h"
 
 /* global variables */
@@ -52,11 +52,9 @@ extern unsigned int at_max;
 extern unsigned int at_history;
 extern int at_early_margin;
 extern int at_extra;
-extern unsigned int obd_sync_filter;
-extern unsigned int obd_max_dirty_pages;
-extern atomic_t obd_unstable_pages;
-extern atomic_t obd_dirty_pages;
-extern atomic_t obd_dirty_transit_pages;
+extern unsigned long obd_max_dirty_pages;
+extern atomic_long_t obd_dirty_pages;
+extern atomic_long_t obd_dirty_transit_pages;
 extern char obd_jobid_var[];
 
 /* Some hash init argument constants */
@@ -117,17 +115,17 @@ extern char obd_jobid_var[];
  * running on a backup server. (If it's too low, import_select_connection
  * will increase the timeout anyhow.)
  */
-#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout/20)
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN, obd_timeout / 20)
 /* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
 #define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
 			     INITIAL_CONNECT_TIMEOUT)
 /* The min time a target should wait for clients to reconnect in recovery */
-#define OBD_RECOVERY_TIME_MIN    (2*RECONNECT_DELAY_MAX)
+#define OBD_RECOVERY_TIME_MIN    (2 * RECONNECT_DELAY_MAX)
 #define OBD_IR_FACTOR_MIN	 1
 #define OBD_IR_FACTOR_MAX	 10
-#define OBD_IR_FACTOR_DEFAULT    (OBD_IR_FACTOR_MAX/2)
+#define OBD_IR_FACTOR_DEFAULT    (OBD_IR_FACTOR_MAX / 2)
 /* default timeout for the MGS to become IR_FULL */
-#define OBD_IR_MGS_TIMEOUT       (4*obd_timeout)
+#define OBD_IR_MGS_TIMEOUT       (4 * obd_timeout)
 #define LONG_UNLINK 300	  /* Unlink should happen before now */
 
 /**
@@ -318,6 +316,10 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LDLM_AGL_NOLOCK	 0x31b
 #define OBD_FAIL_LDLM_OST_LVB		 0x31c
 #define OBD_FAIL_LDLM_ENQUEUE_HANG	 0x31d
+#define OBD_FAIL_LDLM_CP_CB_WAIT2	 0x320
+#define OBD_FAIL_LDLM_CP_CB_WAIT3	 0x321
+#define OBD_FAIL_LDLM_CP_CB_WAIT4	 0x322
+#define OBD_FAIL_LDLM_CP_CB_WAIT5	 0x323
 
 /* LOCKLESS IO */
 #define OBD_FAIL_LDLM_SET_CONTENTION     0x385
@@ -400,6 +402,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDC_GETATTR_ENQUEUE     0x803
 #define OBD_FAIL_MDC_RPCS_SEM		 0x804
 #define OBD_FAIL_MDC_LIGHTWEIGHT	 0x805
+#define OBD_FAIL_MDC_CLOSE		 0x806
 
 #define OBD_FAIL_MGS		     0x900
 #define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
@@ -455,6 +458,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LOV_INIT			    0x1403
 #define OBD_FAIL_GLIMPSE_DELAY			    0x1404
 #define OBD_FAIL_LLITE_XATTR_ENOMEM		    0x1405
+#define OBD_FAIL_GETATTR_DELAY			    0x1409
 
 #define OBD_FAIL_FID_INDIR	0x1501
 #define OBD_FAIL_FID_INLMA	0x1502
@@ -474,11 +478,16 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LFSCK_CRASH		0x160a
 #define OBD_FAIL_LFSCK_NO_AUTO		0x160b
 #define OBD_FAIL_LFSCK_NO_DOUBLESCAN	0x160c
+#define OBD_FAIL_LFSCK_INVALID_PFID	0x1619
+#define OBD_FAIL_LFSCK_BAD_NAME_HASH	0x1628
 
 /* UPDATE */
 #define OBD_FAIL_UPDATE_OBJ_NET			0x1700
 #define OBD_FAIL_UPDATE_OBJ_NET_REP		0x1701
 
+/* LMV */
+#define OBD_FAIL_UNKNOWN_LMV_STRIPE		0x1901
+
 /* Assign references to moved code to reduce code changes */
 #define OBD_FAIL_PRECHECK(id)		   CFS_FAIL_PRECHECK(id)
 #define OBD_FAIL_CHECK(id)		      CFS_FAIL_CHECK(id)
@@ -520,7 +529,8 @@ do {									      \
 	POISON_PTR(ptr);						      \
 } while (0)
 
-#define KEY_IS(str) \
-	(keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0)
+#define KEY_IS(str)					\
+	(keylen >= (sizeof(str) - 1) &&			\
+	memcmp(key, str, (sizeof(str) - 1)) == 0)
 
 #endif
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
index f4a70ebddeaf..e134ecd21bb2 100644
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -90,6 +90,17 @@ static inline int extent_equal(struct interval_node_extent *e1,
 	return (e1->start == e2->start) && (e1->end == e2->end);
 }
 
+static inline int extent_overlapped(struct interval_node_extent *e1,
+				    struct interval_node_extent *e2)
+{
+	return (e1->start <= e2->end) && (e2->start <= e1->end);
+}
+
+static inline int node_equal(struct interval_node *n1, struct interval_node *n2)
+{
+	return extent_equal(&n1->in_extent, &n2->in_extent);
+}
+
 static inline __u64 max_u64(__u64 x, __u64 y)
 {
 	return x > y ? x : y;
@@ -262,7 +273,7 @@ struct interval_node *interval_insert(struct interval_node *node,
 	p = root;
 	while (*p) {
 		parent = *p;
-		if (extent_equal(&parent->in_extent, &node->in_extent))
+		if (node_equal(parent, node))
 			return parent;
 
 		/* max_high field must be updated after each iteration */
@@ -463,3 +474,90 @@ color:
 		interval_erase_color(child, parent, root);
 }
 EXPORT_SYMBOL(interval_erase);
+
+static inline int interval_may_overlap(struct interval_node *node,
+				       struct interval_node_extent *ext)
+{
+	return (ext->start <= node->in_max_high &&
+		ext->end >= interval_low(node));
+}
+
+/*
+ * This function finds all intervals that overlap interval ext,
+ * and calls func to handle resulted intervals one by one.
+ * in lustre, this function will find all conflicting locks in
+ * the granted queue and add these locks to the ast work list.
+ *
+ * {
+ *	if (!node)
+ *		return 0;
+ *	if (ext->end < interval_low(node)) {
+ *		interval_search(node->in_left, ext, func, data);
+ *	} else if (interval_may_overlap(node, ext)) {
+ *		if (extent_overlapped(ext, &node->in_extent))
+ *			func(node, data);
+ *		interval_search(node->in_left, ext, func, data);
+ *		interval_search(node->in_right, ext, func, data);
+ *	}
+ *	return 0;
+ * }
+ *
+ */
+enum interval_iter interval_search(struct interval_node *node,
+				   struct interval_node_extent *ext,
+				   interval_callback_t func,
+				   void *data)
+{
+	enum interval_iter rc = INTERVAL_ITER_CONT;
+	struct interval_node *parent;
+
+	LASSERT(ext);
+	LASSERT(func);
+
+	while (node) {
+		if (ext->end < interval_low(node)) {
+			if (node->in_left) {
+				node = node->in_left;
+				continue;
+			}
+		} else if (interval_may_overlap(node, ext)) {
+			if (extent_overlapped(ext, &node->in_extent)) {
+				rc = func(node, data);
+				if (rc == INTERVAL_ITER_STOP)
+					break;
+			}
+
+			if (node->in_left) {
+				node = node->in_left;
+				continue;
+			}
+			if (node->in_right) {
+				node = node->in_right;
+				continue;
+			}
+		}
+
+		parent = node->in_parent;
+		while (parent) {
+			if (node_is_left_child(node) &&
+			    parent->in_right) {
+				/*
+				 * If we ever got the left, it means that the
+				 * parent met ext->end<interval_low(parent), or
+				 * may_overlap(parent). If the former is true,
+				 * we needn't go back. So stop early and check
+				 * may_overlap(parent) after this loop.
+				 */
+				node = parent->in_right;
+				break;
+			}
+			node = parent;
+			parent = parent->in_parent;
+		}
+		if (!parent || !interval_may_overlap(parent, ext))
+			break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(interval_search);
diff --git a/drivers/staging/lustre/lustre/ldlm/l_lock.c b/drivers/staging/lustre/lustre/ldlm/l_lock.c
index ea8840cb9056..3845f386f1db 100644
--- a/drivers/staging/lustre/lustre/ldlm/l_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/l_lock.c
@@ -45,6 +45,8 @@
  * being an atomic operation.
  */
 struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock)
+				__acquires(&lock->l_lock)
+				__acquires(&lock->l_resource->lr_lock)
 {
 	spin_lock(&lock->l_lock);
 
@@ -59,6 +61,8 @@ EXPORT_SYMBOL(lock_res_and_lock);
  * Unlock a lock and its resource previously locked with lock_res_and_lock
  */
 void unlock_res_and_lock(struct ldlm_lock *lock)
+		__releases(&lock->l_resource->lr_lock)
+		__releases(&lock->l_lock)
 {
 	/* on server-side resource of lock doesn't change */
 	ldlm_clear_res_locked(lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
index f5023d9b78f5..ecf472e4813d 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_extent.c
@@ -221,7 +221,7 @@ void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
 }
 
 void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
-				     ldlm_policy_data_t *lpolicy)
+				      ldlm_policy_data_t *lpolicy)
 {
 	memset(lpolicy, 0, sizeof(*lpolicy));
 	lpolicy->l_extent.start = wpolicy->l_extent.start;
@@ -230,7 +230,7 @@ void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
 }
 
 void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
-				     ldlm_wire_policy_data_t *wpolicy)
+				      ldlm_wire_policy_data_t *wpolicy)
 {
 	memset(wpolicy, 0, sizeof(*wpolicy));
 	wpolicy->l_extent.start = lpolicy->l_extent.start;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
index d6b61bc39135..861f36f039b5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_flock.c
@@ -97,7 +97,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, enum ldlm_mode mode, __u64 flags)
 	LASSERT(hlist_unhashed(&lock->l_exp_flock_hash));
 
 	list_del_init(&lock->l_res_link);
-	if (flags == LDLM_FL_WAIT_NOREPROC && !ldlm_is_failed(lock)) {
+	if (flags == LDLM_FL_WAIT_NOREPROC) {
 		/* client side - set a flag to prevent sending a CANCEL */
 		lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_CBPENDING;
 
@@ -166,7 +166,7 @@ reprocess:
 		 */
 		list_for_each(tmp, &res->lr_granted) {
 			lock = list_entry(tmp, struct ldlm_lock,
-					      l_res_link);
+					  l_res_link);
 			if (ldlm_same_flock_owner(lock, req)) {
 				ownlocks = tmp;
 				break;
@@ -182,7 +182,7 @@ reprocess:
 		 */
 		list_for_each(tmp, &res->lr_granted) {
 			lock = list_entry(tmp, struct ldlm_lock,
-					      l_res_link);
+					  l_res_link);
 
 			if (ldlm_same_flock_owner(lock, req)) {
 				if (!ownlocks)
@@ -339,10 +339,10 @@ reprocess:
 						lock->l_granted_mode, &null_cbs,
 						NULL, 0, LVB_T_NONE);
 			lock_res_and_lock(req);
-			if (!new2) {
+			if (IS_ERR(new2)) {
 				ldlm_flock_destroy(req, lock->l_granted_mode,
 						   *flags);
-				*err = -ENOLCK;
+				*err = PTR_ERR(new2);
 				return LDLM_ITER_STOP;
 			}
 			goto reprocess;
@@ -455,29 +455,22 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 	enum ldlm_error		    err;
 	int			     rc = 0;
 
+	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT2, 4);
+	if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT3)) {
+		lock_res_and_lock(lock);
+		lock->l_flags |= LDLM_FL_FAIL_LOC;
+		unlock_res_and_lock(lock);
+		OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT3, 4);
+	}
 	CDEBUG(D_DLMTRACE, "flags: 0x%llx data: %p getlk: %p\n",
 	       flags, data, getlk);
 
-	/* Import invalidation. We need to actually release the lock
-	 * references being held, so that it can go away. No point in
-	 * holding the lock even if app still believes it has it, since
-	 * server already dropped it anyway. Only for granted locks too.
-	 */
-	if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
-	    (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
-		if (lock->l_req_mode == lock->l_granted_mode &&
-		    lock->l_granted_mode != LCK_NL && !data)
-			ldlm_lock_decref_internal(lock, lock->l_req_mode);
-
-		/* Need to wake up the waiter if we were evicted */
-		wake_up(&lock->l_waitq);
-		return 0;
-	}
-
 	LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
 
-	if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-		       LDLM_FL_BLOCK_CONV))) {
+	if (flags & LDLM_FL_FAILED)
+		goto granted;
+
+	if (!(flags & LDLM_FL_BLOCKED_MASK)) {
 		if (!data)
 			/* mds granted the lock in the reply */
 			goto granted;
@@ -514,12 +507,21 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 granted:
 	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 
-	if (ldlm_is_failed(lock)) {
-		LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
-		return -EIO;
+	if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT4)) {
+		lock_res_and_lock(lock);
+		/* DEADLOCK is always set with CBPENDING */
+		lock->l_flags |= LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING;
+		unlock_res_and_lock(lock);
+		OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT4, 4);
+	}
+	if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_CP_CB_WAIT5)) {
+		lock_res_and_lock(lock);
+		/* DEADLOCK is always set with CBPENDING */
+		lock->l_flags |= LDLM_FL_FAIL_LOC |
+				 LDLM_FL_FLOCK_DEADLOCK | LDLM_FL_CBPENDING;
+		unlock_res_and_lock(lock);
+		OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT5, 4);
 	}
-
-	LDLM_DEBUG(lock, "client-side enqueue granted");
 
 	lock_res_and_lock(lock);
 
@@ -530,20 +532,59 @@ granted:
 	if (ldlm_is_destroyed(lock)) {
 		unlock_res_and_lock(lock);
 		LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
-		return 0;
+		/*
+		 * An error is still to be returned, to propagate it up to
+		 * ldlm_cli_enqueue_fini() caller.
+		 */
+		return -EIO;
 	}
 
 	/* ldlm_lock_enqueue() has already placed lock on the granted list. */
-	list_del_init(&lock->l_res_link);
+	ldlm_resource_unlink_lock(lock);
+
+	/*
+	 * Import invalidation. We need to actually release the lock
+	 * references being held, so that it can go away. No point in
+	 * holding the lock even if app still believes it has it, since
+	 * server already dropped it anyway. Only for granted locks too.
+	 */
+	/* Do the same for DEADLOCK'ed locks. */
+	if (ldlm_is_failed(lock) || ldlm_is_flock_deadlock(lock)) {
+		int mode;
+
+		if (flags & LDLM_FL_TEST_LOCK)
+			LASSERT(ldlm_is_test_lock(lock));
+
+		if (ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock))
+			mode = getlk->fl_type;
+		else
+			mode = lock->l_granted_mode;
+
+		if (ldlm_is_flock_deadlock(lock)) {
+			LDLM_DEBUG(lock, "client-side enqueue deadlock received");
+			rc = -EDEADLK;
+		}
+		ldlm_flock_destroy(lock, mode, LDLM_FL_WAIT_NOREPROC);
+		unlock_res_and_lock(lock);
+
+		/* Need to wake up the waiter if we were evicted */
+		wake_up(&lock->l_waitq);
+
+		/*
+		 * An error is still to be returned, to propagate it up to
+		 * ldlm_cli_enqueue_fini() caller.
+		 */
+		return rc ? : -EIO;
+	}
+
+	LDLM_DEBUG(lock, "client-side enqueue granted");
 
-	if (ldlm_is_flock_deadlock(lock)) {
-		LDLM_DEBUG(lock, "client-side enqueue deadlock received");
-		rc = -EDEADLK;
-	} else if (flags & LDLM_FL_TEST_LOCK) {
+	if (flags & LDLM_FL_TEST_LOCK) {
 		/* fcntl(F_GETLK) request */
 		/* The old mode was saved in getlk->fl_type so that if the mode
 		 * in the lock changes we can decref the appropriate refcount.
 		 */
+		LASSERT(ldlm_is_test_lock(lock));
 		ldlm_flock_destroy(lock, getlk->fl_type, LDLM_FL_WAIT_NOREPROC);
 		switch (lock->l_granted_mode) {
 		case LCK_PR:
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index e4cf65d2d3b1..5e82cfc245b2 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -100,9 +100,10 @@ enum {
 int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
 		    enum ldlm_cancel_flags sync, int flags);
 int ldlm_cancel_lru_local(struct ldlm_namespace *ns,
-			 struct list_head *cancels, int count, int max,
-			 enum ldlm_cancel_flags cancel_flags, int flags);
-extern int ldlm_enqueue_min;
+			  struct list_head *cancels, int count, int max,
+			  enum ldlm_cancel_flags cancel_flags, int flags);
+extern unsigned int ldlm_enqueue_min;
+extern unsigned int ldlm_cancel_unused_locks_before_replay;
 
 /* ldlm_resource.c */
 int ldlm_resource_putref_locked(struct ldlm_resource *res);
@@ -200,8 +201,7 @@ ldlm_interval_extent(struct ldlm_interval *node)
 
 	LASSERT(!list_empty(&node->li_group));
 
-	lock = list_entry(node->li_group.next, struct ldlm_lock,
-			      l_sl_policy);
+	lock = list_entry(node->li_group.next, struct ldlm_lock, l_sl_policy);
 	return &lock->l_policy_data.l_extent;
 }
 
@@ -302,7 +302,7 @@ static inline int is_granted_or_cancelled(struct ldlm_lock *lock)
 
 	lock_res_and_lock(lock);
 	if ((lock->l_req_mode == lock->l_granted_mode) &&
-	     !ldlm_is_cp_reqd(lock))
+	    !ldlm_is_cp_reqd(lock))
 		ret = 1;
 	else if (ldlm_is_failed(lock) || ldlm_is_cancel(lock))
 		ret = 1;
@@ -326,13 +326,13 @@ void ldlm_ibits_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
 void ldlm_ibits_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
 				     ldlm_wire_policy_data_t *wpolicy);
 void ldlm_extent_policy_wire_to_local(const ldlm_wire_policy_data_t *wpolicy,
-				     ldlm_policy_data_t *lpolicy);
+				      ldlm_policy_data_t *lpolicy);
 void ldlm_extent_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
-				     ldlm_wire_policy_data_t *wpolicy);
+				      ldlm_wire_policy_data_t *wpolicy);
 void ldlm_flock_policy_wire18_to_local(const ldlm_wire_policy_data_t *wpolicy,
-				     ldlm_policy_data_t *lpolicy);
+				       ldlm_policy_data_t *lpolicy);
 void ldlm_flock_policy_wire21_to_local(const ldlm_wire_policy_data_t *wpolicy,
-				     ldlm_policy_data_t *lpolicy);
+				       ldlm_policy_data_t *lpolicy);
 
 void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy,
 				     ldlm_wire_policy_data_t *wpolicy);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
index 7c832aae7d5e..153e990c494e 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lib.c
@@ -82,7 +82,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
 			if (priority) {
 				list_del(&item->oic_item);
 				list_add(&item->oic_item,
-					     &imp->imp_conn_list);
+					 &imp->imp_conn_list);
 				item->oic_last_attempt = 0;
 			}
 			CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n",
@@ -102,7 +102,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
 			list_add(&imp_conn->oic_item, &imp->imp_conn_list);
 		else
 			list_add_tail(&imp_conn->oic_item,
-					  &imp->imp_conn_list);
+				      &imp->imp_conn_list);
 		CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n",
 		       imp, imp->imp_obd->obd_name, uuid->uuid,
 		       (priority ? "head" : "tail"));
@@ -299,12 +299,14 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	       min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
 		     sizeof(server_uuid)));
 
-	cli->cl_dirty = 0;
+	cli->cl_dirty_pages = 0;
 	cli->cl_avail_grant = 0;
-	/* FIXME: Should limit this for the sum of all cl_dirty_max. */
-	cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
-	if (cli->cl_dirty_max >> PAGE_SHIFT > totalram_pages / 8)
-		cli->cl_dirty_max = totalram_pages << (PAGE_SHIFT - 3);
+	/* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */
+	/*
+	 * cl_dirty_max_pages may be changed at connect time in
+	 * ptlrpc_connect_interpret().
+	 */
+	client_adjust_max_dirty(cli);
 	INIT_LIST_HEAD(&cli->cl_cache_waiters);
 	INIT_LIST_HEAD(&cli->cl_loi_ready_list);
 	INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
@@ -326,11 +328,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	/* lru for osc. */
 	INIT_LIST_HEAD(&cli->cl_lru_osc);
 	atomic_set(&cli->cl_lru_shrinkers, 0);
-	atomic_set(&cli->cl_lru_busy, 0);
-	atomic_set(&cli->cl_lru_in_list, 0);
+	atomic_long_set(&cli->cl_lru_busy, 0);
+	atomic_long_set(&cli->cl_lru_in_list, 0);
 	INIT_LIST_HEAD(&cli->cl_lru_list);
 	spin_lock_init(&cli->cl_lru_list_lock);
-	atomic_set(&cli->cl_unstable_count, 0);
+	atomic_long_set(&cli->cl_unstable_count, 0);
 
 	init_waitqueue_head(&cli->cl_destroy_waitq);
 	atomic_set(&cli->cl_destroy_in_flight, 0);
@@ -360,7 +362,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	cli->cl_chunkbits = PAGE_SHIFT;
 
 	if (!strcmp(name, LUSTRE_MDC_NAME)) {
-		cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
+		cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
 	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 128 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 2;
 	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 256 /* MB */) {
@@ -368,7 +370,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 	} else if (totalram_pages >> (20 - PAGE_SHIFT) <= 512 /* MB */) {
 		cli->cl_max_rpcs_in_flight = 4;
 	} else {
-		cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
+		cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
 	}
 	rc = ldlm_get_ref();
 	if (rc) {
@@ -534,7 +536,7 @@ int client_disconnect_export(struct obd_export *exp)
 	imp = cli->cl_import;
 
 	down_write(&cli->cl_sem);
-	CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
+	CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name,
 	       cli->cl_conn_count);
 
 	if (!cli->cl_conn_count) {
@@ -690,7 +692,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
 	if (rs->rs_transno > exp->exp_last_committed) {
 		/* not committed already */
 		list_add_tail(&rs->rs_obd_list,
-				  &exp->exp_uncommitted_replies);
+			      &exp->exp_uncommitted_replies);
 	}
 	spin_unlock(&exp->exp_uncommitted_replies_lock);
 
@@ -795,7 +797,7 @@ void ldlm_dump_export_locks(struct obd_export *exp)
 		CERROR("dumping locks for export %p,ignore if the unmount doesn't hang\n",
 		       exp);
 		list_for_each_entry(lock, &exp->exp_locks_list,
-					l_exp_refs_link)
+				    l_exp_refs_link)
 			LDLM_ERROR(lock, "lock:");
 	}
 	spin_unlock(&exp->exp_locks_list_guard);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index a5993f745ebe..3c48b4fb96f1 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -481,8 +481,8 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
 	unlock_res_and_lock(lock);
 
 	newres = ldlm_resource_get(ns, NULL, new_resid, type, 1);
-	if (!newres)
-		return -ENOMEM;
+	if (IS_ERR(newres))
+		return PTR_ERR(newres);
 
 	lu_ref_add(&newres->lr_reference, "lock", lock);
 	/*
@@ -542,7 +542,7 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
 
 	LASSERT(handle);
 
-	lock = class_handle2object(handle->cookie);
+	lock = class_handle2object(handle->cookie, NULL);
 	if (!lock)
 		return NULL;
 
@@ -937,7 +937,7 @@ static void search_granted_lock(struct list_head *queue,
 				/* go to next policy group within mode group */
 				tmp = policy_end->l_res_link.next;
 				lock = list_entry(tmp, struct ldlm_lock,
-						      l_res_link);
+						  l_res_link);
 			}  /* loop over policy groups within the mode group */
 
 			/* insert point is last lock of the mode group,
@@ -1028,15 +1028,28 @@ void ldlm_grant_lock(struct ldlm_lock *lock, struct list_head *work_list)
 	check_res_locked(res);
 
 	lock->l_granted_mode = lock->l_req_mode;
+
+	if (work_list && lock->l_completion_ast)
+		ldlm_add_ast_work_item(lock, NULL, work_list);
+
 	if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS)
 		ldlm_grant_lock_with_skiplist(lock);
 	else if (res->lr_type == LDLM_EXTENT)
 		ldlm_extent_add_lock(res, lock);
-	else
+	else if (res->lr_type == LDLM_FLOCK) {
+		/*
+		 * We should not add locks to granted list in the following cases:
+		 * - this is an UNLOCK but not a real lock;
+		 * - this is a TEST lock;
+		 * - this is a F_CANCELLK lock (async flock has req_mode == 0)
+		 * - this is a deadlock (flock cannot be granted)
+		 */
+		if (!lock->l_req_mode || lock->l_req_mode == LCK_NL ||
+		    ldlm_is_test_lock(lock) || ldlm_is_flock_deadlock(lock))
+			return;
 		ldlm_resource_add_lock(res, &res->lr_granted, lock);
-
-	if (work_list && lock->l_completion_ast)
-		ldlm_add_ast_work_item(lock, NULL, work_list);
+	} else
+		LBUG();
 
 	ldlm_pool_add(&ldlm_res_to_ns(res)->ns_pool, lock);
 }
@@ -1103,7 +1116,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue,
 		 * of bits.
 		 */
 		if (lock->l_resource->lr_type == LDLM_IBITS &&
-		     ((lock->l_policy_data.l_inodebits.bits &
+		    ((lock->l_policy_data.l_inodebits.bits &
 		      policy->l_inodebits.bits) !=
 		      policy->l_inodebits.bits))
 			continue;
@@ -1214,7 +1227,7 @@ enum ldlm_mode ldlm_lock_match(struct ldlm_namespace *ns, __u64 flags,
 	}
 
 	res = ldlm_resource_get(ns, NULL, res_id, type, 0);
-	if (!res) {
+	if (IS_ERR(res)) {
 		LASSERT(!old_lock);
 		return 0;
 	}
@@ -1363,12 +1376,12 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
 		if (size == sizeof(struct ost_lvb)) {
 			if (loc == RCL_CLIENT)
 				lvb = req_capsule_client_swab_get(pill,
-						&RMF_DLM_LVB,
-						lustre_swab_ost_lvb);
+								  &RMF_DLM_LVB,
+							lustre_swab_ost_lvb);
 			else
 				lvb = req_capsule_server_swab_get(pill,
-						&RMF_DLM_LVB,
-						lustre_swab_ost_lvb);
+								  &RMF_DLM_LVB,
+							lustre_swab_ost_lvb);
 			if (unlikely(!lvb)) {
 				LDLM_ERROR(lock, "no LVB");
 				return -EPROTO;
@@ -1380,8 +1393,8 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
 
 			if (loc == RCL_CLIENT)
 				lvb = req_capsule_client_swab_get(pill,
-						&RMF_DLM_LVB,
-						lustre_swab_ost_lvb_v1);
+								  &RMF_DLM_LVB,
+							lustre_swab_ost_lvb_v1);
 			else
 				lvb = req_capsule_server_sized_swab_get(pill,
 						&RMF_DLM_LVB, size,
@@ -1405,12 +1418,12 @@ int ldlm_fill_lvb(struct ldlm_lock *lock, struct req_capsule *pill,
 		if (size == sizeof(struct lquota_lvb)) {
 			if (loc == RCL_CLIENT)
 				lvb = req_capsule_client_swab_get(pill,
-						&RMF_DLM_LVB,
-						lustre_swab_lquota_lvb);
+								  &RMF_DLM_LVB,
+							lustre_swab_lquota_lvb);
 			else
 				lvb = req_capsule_server_swab_get(pill,
-						&RMF_DLM_LVB,
-						lustre_swab_lquota_lvb);
+								  &RMF_DLM_LVB,
+							lustre_swab_lquota_lvb);
 			if (unlikely(!lvb)) {
 				LDLM_ERROR(lock, "no LVB");
 				return -EPROTO;
@@ -1462,15 +1475,15 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
 {
 	struct ldlm_lock *lock;
 	struct ldlm_resource *res;
+	int rc;
 
 	res = ldlm_resource_get(ns, NULL, res_id, type, 1);
-	if (!res)
-		return NULL;
+	if (IS_ERR(res))
+		return ERR_CAST(res);
 
 	lock = ldlm_lock_new(res);
-
 	if (!lock)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	lock->l_req_mode = mode;
 	lock->l_ast_data = data;
@@ -1484,27 +1497,33 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
 	lock->l_tree_node = NULL;
 	/* if this is the extent lock, allocate the interval tree node */
 	if (type == LDLM_EXTENT) {
-		if (!ldlm_interval_alloc(lock))
+		if (!ldlm_interval_alloc(lock)) {
+			rc = -ENOMEM;
 			goto out;
+		}
 	}
 
 	if (lvb_len) {
 		lock->l_lvb_len = lvb_len;
 		lock->l_lvb_data = kzalloc(lvb_len, GFP_NOFS);
-		if (!lock->l_lvb_data)
+		if (!lock->l_lvb_data) {
+			rc = -ENOMEM;
 			goto out;
+		}
 	}
 
 	lock->l_lvb_type = lvb_type;
-	if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK))
+	if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_NEW_LOCK)) {
+		rc = -ENOENT;
 		goto out;
+	}
 
 	return lock;
 
 out:
 	ldlm_lock_destroy(lock);
 	LDLM_LOCK_RELEASE(lock);
-	return NULL;
+	return ERR_PTR(rc);
 }
 
 /**
@@ -1522,16 +1541,13 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
 	struct ldlm_lock *lock = *lockp;
 	struct ldlm_resource *res = lock->l_resource;
 
-	lock->l_last_activity = ktime_get_real_seconds();
-
 	lock_res_and_lock(lock);
 	if (lock->l_req_mode == lock->l_granted_mode) {
 		/* The server returned a blocked lock, but it was granted
 		 * before we got a chance to actually enqueue it.  We don't
 		 * need to do anything else.
 		 */
-		*flags &= ~(LDLM_FL_BLOCK_GRANTED |
-			    LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
+		*flags &= ~LDLM_FL_BLOCKED_MASK;
 		goto out;
 	}
 
@@ -1546,6 +1562,8 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
 	 */
 	if (*flags & LDLM_FL_AST_DISCARD_DATA)
 		ldlm_set_ast_discard_data(lock);
+	if (*flags & LDLM_FL_TEST_LOCK)
+		ldlm_set_test_lock(lock);
 
 	/*
 	 * This distinction between local lock trees is very important; a client
@@ -1688,7 +1706,7 @@ static int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 		return -ENOENT;
 
 	gl_work = list_entry(arg->list->next, struct ldlm_glimpse_work,
-				 gl_list);
+			     gl_list);
 	list_del_init(&gl_work->gl_list);
 
 	lock = gl_work->gl_lock;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index 821939ff2e6b..fde697ebaadc 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -559,8 +559,11 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 
 	switch (lustre_msg_get_opc(req->rq_reqmsg)) {
 	case LDLM_BL_CALLBACK:
-		if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+		if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) {
+			if (cfs_fail_err)
+				ldlm_callback_reply(req, -(int)cfs_fail_err);
 			return 0;
+		}
 		break;
 	case LDLM_CP_CALLBACK:
 		if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CP_CALLBACK_NET))
@@ -706,12 +709,12 @@ static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
 	if (!list_empty(&blp->blp_list) &&
 	    (list_empty(&blp->blp_prio_list) || num_bl == 0))
 		blwi = list_entry(blp->blp_list.next,
-				      struct ldlm_bl_work_item, blwi_entry);
+				  struct ldlm_bl_work_item, blwi_entry);
 	else
 		if (!list_empty(&blp->blp_prio_list))
 			blwi = list_entry(blp->blp_prio_list.next,
-					      struct ldlm_bl_work_item,
-					      blwi_entry);
+					  struct ldlm_bl_work_item,
+					  blwi_entry);
 
 	if (blwi) {
 		if (++num_bl >= atomic_read(&blp->blp_num_threads))
@@ -741,7 +744,7 @@ static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
 	init_completion(&bltd.bltd_comp);
 	bltd.bltd_num = atomic_read(&blp->blp_num_threads);
 	snprintf(bltd.bltd_name, sizeof(bltd.bltd_name),
-		"ldlm_bl_%02d", bltd.bltd_num);
+		 "ldlm_bl_%02d", bltd.bltd_num);
 	task = kthread_run(ldlm_bl_thread_main, &bltd, "%s", bltd.bltd_name);
 	if (IS_ERR(task)) {
 		CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
@@ -786,8 +789,8 @@ static int ldlm_bl_thread_main(void *arg)
 		if (!blwi) {
 			atomic_dec(&blp->blp_busy_threads);
 			l_wait_event_exclusive(blp->blp_waitq,
-					 (blwi = ldlm_bl_get_work(blp)),
-					 &lwi);
+					       (blwi = ldlm_bl_get_work(blp)),
+					       &lwi);
 			busy = atomic_inc_return(&blp->blp_busy_threads);
 		} else {
 			busy = atomic_read(&blp->blp_busy_threads);
@@ -874,8 +877,6 @@ void ldlm_put_ref(void)
 }
 EXPORT_SYMBOL(ldlm_put_ref);
 
-extern unsigned int ldlm_cancel_unused_locks_before_replay;
-
 static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj,
 						      struct attribute *attr,
 						      char *buf)
@@ -1094,16 +1095,17 @@ int ldlm_init(void)
 		return -ENOMEM;
 
 	ldlm_lock_slab = kmem_cache_create("ldlm_locks",
-			      sizeof(struct ldlm_lock), 0,
-			      SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL);
+					   sizeof(struct ldlm_lock), 0,
+					   SLAB_HWCACHE_ALIGN |
+					   SLAB_DESTROY_BY_RCU, NULL);
 	if (!ldlm_lock_slab) {
 		kmem_cache_destroy(ldlm_resource_slab);
 		return -ENOMEM;
 	}
 
 	ldlm_interval_slab = kmem_cache_create("interval_node",
-					sizeof(struct ldlm_interval),
-					0, SLAB_HWCACHE_ALIGN, NULL);
+					       sizeof(struct ldlm_interval),
+					       0, SLAB_HWCACHE_ALIGN, NULL);
 	if (!ldlm_interval_slab) {
 		kmem_cache_destroy(ldlm_resource_slab);
 		kmem_cache_destroy(ldlm_lock_slab);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 657ed4012776..9a1136e32dfc 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -357,38 +357,40 @@ static int ldlm_pool_recalc(struct ldlm_pool *pl)
 	int count;
 
 	recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
-	if (recalc_interval_sec <= 0)
-		goto recalc;
-
-	spin_lock(&pl->pl_lock);
 	if (recalc_interval_sec > 0) {
-		/*
-		 * Update pool statistics every 1s.
-		 */
-		ldlm_pool_recalc_stats(pl);
-
-		/*
-		 * Zero out all rates and speed for the last period.
-		 */
-		atomic_set(&pl->pl_grant_rate, 0);
-		atomic_set(&pl->pl_cancel_rate, 0);
+		spin_lock(&pl->pl_lock);
+		recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
+
+		if (recalc_interval_sec > 0) {
+			/*
+			 * Update pool statistics every 1s.
+			 */
+			ldlm_pool_recalc_stats(pl);
+
+			/*
+			 * Zero out all rates and speed for the last period.
+			 */
+			atomic_set(&pl->pl_grant_rate, 0);
+			atomic_set(&pl->pl_cancel_rate, 0);
+		}
+		spin_unlock(&pl->pl_lock);
 	}
-	spin_unlock(&pl->pl_lock);
 
- recalc:
 	if (pl->pl_ops->po_recalc) {
 		count = pl->pl_ops->po_recalc(pl);
 		lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
 				    count);
 	}
+
 	recalc_interval_sec = pl->pl_recalc_time - ktime_get_seconds() +
 			      pl->pl_recalc_period;
 	if (recalc_interval_sec <= 0) {
+		/* DEBUG: should be re-removed after LU-4536 is fixed */
+		CDEBUG(D_DLMTRACE, "%s: Negative interval(%ld), too short period(%ld)\n",
+		       pl->pl_name, (long)recalc_interval_sec,
+		       (long)pl->pl_recalc_period);
+
 		/* Prevent too frequent recalculation. */
-		CDEBUG(D_DLMTRACE,
-		       "Negative interval(%d), too short period(%lld)",
-		       recalc_interval_sec,
-		       (s64)pl->pl_recalc_period);
 		recalc_interval_sec = 1;
 	}
 
@@ -792,7 +794,8 @@ static struct completion ldlm_pools_comp;
  */
 static unsigned long ldlm_pools_count(ldlm_side_t client, gfp_t gfp_mask)
 {
-	int total = 0, nr_ns;
+	unsigned long total = 0;
+	int nr_ns;
 	struct ldlm_namespace *ns;
 	struct ldlm_namespace *ns_old = NULL; /* loop detection */
 	void *cookie;
@@ -995,7 +998,7 @@ static int ldlm_pools_thread_main(void *arg)
 	wake_up(&thread->t_ctl_waitq);
 
 	CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n",
-		"ldlm_poold", current_pid());
+	       "ldlm_poold", current_pid());
 
 	while (1) {
 		struct l_wait_info lwi;
@@ -1025,7 +1028,7 @@ static int ldlm_pools_thread_main(void *arg)
 	wake_up(&thread->t_ctl_waitq);
 
 	CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n",
-		"ldlm_poold", current_pid());
+	       "ldlm_poold", current_pid());
 
 	complete_and_exit(&ldlm_pools_comp, 0);
 }
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index af487f9937f4..35ba6f14d95f 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -63,8 +63,8 @@
 
 #include "ldlm_internal.h"
 
-int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT;
-module_param(ldlm_enqueue_min, int, 0644);
+unsigned int ldlm_enqueue_min = OBD_TIMEOUT_DEFAULT;
+module_param(ldlm_enqueue_min, uint, 0644);
 MODULE_PARM_DESC(ldlm_enqueue_min, "lock enqueue timeout minimum");
 
 /* in client side, whether the cached locks will be canceled before replay */
@@ -123,44 +123,56 @@ static int ldlm_expired_completion_wait(void *data)
 	return 0;
 }
 
+/**
+ * Calculate the Completion timeout (covering enqueue, BL AST, data flush,
+ * lock cancel, and their replies). Used for lock completion timeout on the
+ * client side.
+ *
+ * \param[in] lock	lock which is waiting the completion callback
+ *
+ * \retval		timeout in seconds to wait for the server reply
+ */
 /* We use the same basis for both server side and client side functions
  * from a single node.
  */
-static int ldlm_get_enq_timeout(struct ldlm_lock *lock)
+static unsigned int ldlm_cp_timeout(struct ldlm_lock *lock)
 {
-	int timeout = at_get(ldlm_lock_to_ns_at(lock));
+	unsigned int timeout;
 
 	if (AT_OFF)
-		return obd_timeout / 2;
-	/* Since these are non-updating timeouts, we should be conservative.
-	 * It would be nice to have some kind of "early reply" mechanism for
-	 * lock callbacks too...
+		return obd_timeout;
+
+	/*
+	 * Wait a long time for enqueue - server may have to callback a
+	 * lock from another client.  Server will evict the other client if it
+	 * doesn't respond reasonably, and then give us the lock.
 	 */
-	timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
-	return max(timeout, ldlm_enqueue_min);
+	timeout = at_get(ldlm_lock_to_ns_at(lock));
+	return max(3 * timeout, ldlm_enqueue_min);
 }
 
 /**
  * Helper function for ldlm_completion_ast(), updating timings when lock is
  * actually granted.
  */
-static int ldlm_completion_tail(struct ldlm_lock *lock)
+static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
 {
 	long delay;
-	int  result;
+	int result = 0;
 
 	if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
 		LDLM_DEBUG(lock, "client-side enqueue: destroyed");
 		result = -EIO;
+	} else if (!data) {
+		LDLM_DEBUG(lock, "client-side enqueue: granted");
 	} else {
+		/* Take into AT only CP RPC, not immediately granted locks */
 		delay = ktime_get_real_seconds() - lock->l_last_activity;
 		LDLM_DEBUG(lock, "client-side enqueue: granted after %lds",
 			   delay);
 
 		/* Update our time estimate */
-		at_measured(ldlm_lock_to_ns_at(lock),
-			    delay);
-		result = 0;
+		at_measured(ldlm_lock_to_ns_at(lock), delay);
 	}
 	return result;
 }
@@ -177,10 +189,9 @@ int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data)
 		return 0;
 	}
 
-	if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-		       LDLM_FL_BLOCK_CONV))) {
+	if (!(flags & LDLM_FL_BLOCKED_MASK)) {
 		wake_up(&lock->l_waitq);
-		return ldlm_completion_tail(lock);
+		return ldlm_completion_tail(lock, data);
 	}
 
 	LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, going forward");
@@ -224,8 +235,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 		goto noreproc;
 	}
 
-	if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-		       LDLM_FL_BLOCK_CONV))) {
+	if (!(flags & LDLM_FL_BLOCKED_MASK)) {
 		wake_up(&lock->l_waitq);
 		return 0;
 	}
@@ -240,13 +250,10 @@ noreproc:
 	if (obd)
 		imp = obd->u.cli.cl_import;
 
-	/* Wait a long time for enqueue - server may have to callback a
-	 * lock from another client.  Server will evict the other client if it
-	 * doesn't respond reasonably, and then give us the lock.
-	 */
-	timeout = ldlm_get_enq_timeout(lock) * 2;
+	timeout = ldlm_cp_timeout(lock);
 
 	lwd.lwd_lock = lock;
+	lock->l_last_activity = ktime_get_real_seconds();
 
 	if (ldlm_is_no_timeout(lock)) {
 		LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
@@ -279,7 +286,7 @@ noreproc:
 		return rc;
 	}
 
-	return ldlm_completion_tail(lock);
+	return ldlm_completion_tail(lock, data);
 }
 EXPORT_SYMBOL(ldlm_completion_ast);
 
@@ -309,8 +316,6 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
 	else
 		LDLM_DEBUG(lock, "lock was granted or failed in race");
 
-	ldlm_lock_decref_internal(lock, mode);
-
 	/* XXX - HACK because we shouldn't call ldlm_lock_destroy()
 	 *       from llite/file.c/ll_file_flock().
 	 */
@@ -321,9 +326,14 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
 	 */
 	if (lock->l_resource->lr_type == LDLM_FLOCK) {
 		lock_res_and_lock(lock);
-		ldlm_resource_unlink_lock(lock);
-		ldlm_lock_destroy_nolock(lock);
+		if (!ldlm_is_destroyed(lock)) {
+			ldlm_resource_unlink_lock(lock);
+			ldlm_lock_decref_internal_nolock(lock, mode);
+			ldlm_lock_destroy_nolock(lock);
+		}
 		unlock_res_and_lock(lock);
+	} else {
+		ldlm_lock_decref_internal(lock, mode);
 	}
 }
 
@@ -418,11 +428,6 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 	*flags = ldlm_flags_from_wire(reply->lock_flags);
 	lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
 					      LDLM_FL_INHERIT_MASK);
-	/* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
-	 * to wait with no timeout as well
-	 */
-	lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
-					      LDLM_FL_NO_TIMEOUT);
 	unlock_res_and_lock(lock);
 
 	CDEBUG(D_INFO, "local: %p, remote cookie: %#llx, flags: 0x%llx\n",
@@ -556,7 +561,7 @@ static inline int ldlm_capsule_handles_avail(struct req_capsule *pill,
 					     enum req_location loc,
 					     int off)
 {
-	int size = req_capsule_msg_size(pill, loc);
+	u32 size = req_capsule_msg_size(pill, loc);
 
 	return ldlm_req_handles_avail(size, off);
 }
@@ -565,7 +570,7 @@ static inline int ldlm_format_handles_avail(struct obd_import *imp,
 					    const struct req_format *fmt,
 					    enum req_location loc, int off)
 {
-	int size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc);
+	u32 size = req_capsule_fmt_size(imp->imp_msg_magic, fmt, loc);
 
 	return ldlm_req_handles_avail(size, off);
 }
@@ -696,8 +701,8 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
 		lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
 					einfo->ei_mode, &cbs, einfo->ei_cbdata,
 					lvb_len, lvb_type);
-		if (!lock)
-			return -ENOMEM;
+		if (IS_ERR(lock))
+			return PTR_ERR(lock);
 		/* for the local lock, add the reference */
 		ldlm_lock_addref_internal(lock, einfo->ei_mode);
 		ldlm_lock2handle(lock, lockh);
@@ -719,6 +724,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
 	lock->l_export = NULL;
 	lock->l_blocking_ast = einfo->ei_cb_bl;
 	lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
+	lock->l_last_activity = ktime_get_real_seconds();
 
 	/* lock not sent to server yet */
 
@@ -819,7 +825,7 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
 		lock_res_and_lock(lock);
 		ldlm_set_cbpending(lock);
 		local_only = !!(lock->l_flags &
-				(LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
+				(LDLM_FL_LOCAL_ONLY | LDLM_FL_CANCEL_ON_BLOCK));
 		ldlm_cancel_callback(lock);
 		rc = ldlm_is_bl_ast(lock) ? LDLM_FL_BL_AST : LDLM_FL_CANCELING;
 		unlock_res_and_lock(lock);
@@ -1180,8 +1186,7 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
 
 	slv = ldlm_pool_get_slv(pl);
 	lvf = ldlm_pool_get_lvf(pl);
-	la = cfs_duration_sec(cfs_time_sub(cur,
-			      lock->l_last_used));
+	la = cfs_duration_sec(cfs_time_sub(cur, lock->l_last_used));
 	lv = lvf * la * unused;
 
 	/* Inform pool about current CLV to see it via debugfs. */
@@ -1193,9 +1198,6 @@ static enum ldlm_policy_res ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
 	if (slv == 0 || lv < slv)
 		return LDLM_POLICY_KEEP_LOCK;
 
-	if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
-		return LDLM_POLICY_KEEP_LOCK;
-
 	return LDLM_POLICY_CANCEL_LOCK;
 }
 
@@ -1239,9 +1241,6 @@ static enum ldlm_policy_res ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
 			cfs_time_add(lock->l_last_used, ns->ns_max_age)))
 		return LDLM_POLICY_KEEP_LOCK;
 
-	if (ns->ns_cancel && ns->ns_cancel(lock) == 0)
-		return LDLM_POLICY_KEEP_LOCK;
-
 	return LDLM_POLICY_CANCEL_LOCK;
 }
 
@@ -1374,7 +1373,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 			break;
 
 		list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
-					     l_lru) {
+					 l_lru) {
 			/* No locks which got blocking requests. */
 			LASSERT(!ldlm_is_bl_ast(lock));
 
@@ -1413,7 +1412,8 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns,
 		 * That is, for shrinker policy we drop only
 		 * old locks, but additionally choose them by
 		 * their weight. Big extent locks will stay in
-		 * the cache. */
+		 * the cache.
+		 */
 		result = pf(ns, lock, unused, added, count);
 		if (result == LDLM_POLICY_KEEP_LOCK) {
 			lu_ref_del(&lock->l_reference,
@@ -1610,8 +1610,7 @@ int ldlm_cli_cancel_list(struct list_head *cancels, int count,
 	 */
 	while (count > 0) {
 		LASSERT(!list_empty(cancels));
-		lock = list_entry(cancels->next, struct ldlm_lock,
-				      l_bl_ast);
+		lock = list_entry(cancels->next, struct ldlm_lock, l_bl_ast);
 		LASSERT(lock->l_conn_export);
 
 		if (exp_connect_cancelset(lock->l_conn_export)) {
@@ -1660,7 +1659,7 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
 	int rc;
 
 	res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
-	if (!res) {
+	if (IS_ERR(res)) {
 		/* This is not a problem. */
 		CDEBUG(D_INFO, "No resource %llu\n", res_id->name[0]);
 		return 0;
@@ -1704,7 +1703,8 @@ static int ldlm_cli_hash_cancel_unused(struct cfs_hash *hs,
  * that have 0 readers/writers.
  *
  * If flags & LCF_LOCAL, throw the locks away without trying
- * to notify the server. */
+ * to notify the server.
+ */
 int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
 			   const struct ldlm_res_id *res_id,
 			   enum ldlm_cancel_flags flags, void *opaque)
@@ -1811,13 +1811,10 @@ int ldlm_resource_iterate(struct ldlm_namespace *ns,
 	struct ldlm_resource *res;
 	int rc;
 
-	if (!ns) {
-		CERROR("must pass in namespace\n");
-		LBUG();
-	}
+	LASSERTF(ns, "must pass in namespace\n");
 
 	res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
-	if (!res)
+	if (IS_ERR(res))
 		return 0;
 
 	LDLM_RESOURCE_ADDREF(res);
@@ -1843,7 +1840,7 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
 	 * bug 17614: locks being actively cancelled. Get a reference
 	 * on a lock so that it does not disappear under us (e.g. due to cancel)
 	 */
-	if (!(lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_CANCELING))) {
+	if (!(lock->l_flags & (LDLM_FL_FAILED | LDLM_FL_CANCELING))) {
 		list_add(&lock->l_pending_chain, list);
 		LDLM_LOCK_GET(lock);
 	}
@@ -2013,7 +2010,7 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
 					 LCF_LOCAL, LDLM_CANCEL_NO_WAIT);
 
 	CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n",
-			   canceled, ldlm_ns_name(ns));
+	       canceled, ldlm_ns_name(ns));
 }
 
 int ldlm_replay_locks(struct obd_import *imp)
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index 51a28d96af39..a09c25aea698 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -449,8 +449,8 @@ static unsigned ldlm_res_hop_hash(struct cfs_hash *hs,
 				  const void *key, unsigned mask)
 {
 	const struct ldlm_res_id     *id  = key;
-	unsigned		val = 0;
-	unsigned		i;
+	unsigned int		val = 0;
+	unsigned int		i;
 
 	for (i = 0; i < RES_NAME_SIZE; i++)
 		val += id->name[i];
@@ -561,9 +561,9 @@ static struct cfs_hash_ops ldlm_ns_fid_hash_ops = {
 struct ldlm_ns_hash_def {
 	enum ldlm_ns_type nsd_type;
 	/** hash bucket bits */
-	unsigned	nsd_bkt_bits;
+	unsigned int	nsd_bkt_bits;
 	/** hash bits */
-	unsigned	nsd_all_bits;
+	unsigned int	nsd_all_bits;
 	/** hash operations */
 	struct cfs_hash_ops *nsd_hops;
 };
@@ -758,8 +758,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 		 */
 		lock_res(res);
 		list_for_each(tmp, q) {
-			lock = list_entry(tmp, struct ldlm_lock,
-					      l_res_link);
+			lock = list_entry(tmp, struct ldlm_lock, l_res_link);
 			if (ldlm_is_cleaned(lock)) {
 				lock = NULL;
 				continue;
@@ -793,8 +792,14 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 			 */
 			unlock_res(res);
 			LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
+			if (lock->l_flags & LDLM_FL_FAIL_LOC) {
+				set_current_state(TASK_UNINTERRUPTIBLE);
+				schedule_timeout(cfs_time_seconds(4));
+				set_current_state(TASK_RUNNING);
+			}
 			if (lock->l_completion_ast)
-				lock->l_completion_ast(lock, 0, NULL);
+				lock->l_completion_ast(lock, LDLM_FL_FAILED,
+						       NULL);
 			LDLM_LOCK_RELEASE(lock);
 			continue;
 		}
@@ -875,7 +880,8 @@ static int __ldlm_namespace_free(struct ldlm_namespace *ns, int force)
 		       ldlm_ns_name(ns), atomic_read(&ns->ns_bref));
 force_wait:
 		if (force)
-			lwi = LWI_TIMEOUT(obd_timeout * HZ / 4, NULL, NULL);
+			lwi = LWI_TIMEOUT(msecs_to_jiffies(obd_timeout *
+					  MSEC_PER_SEC) / 4, NULL, NULL);
 
 		rc = l_wait_event(ns->ns_waitq,
 				  atomic_read(&ns->ns_bref) == 0, &lwi);
@@ -1082,10 +1088,11 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 		  int create)
 {
 	struct hlist_node     *hnode;
-	struct ldlm_resource *res;
+	struct ldlm_resource *res = NULL;
 	struct cfs_hash_bd	 bd;
 	__u64		 version;
 	int		      ns_refcount = 0;
+	int rc;
 
 	LASSERT(!parent);
 	LASSERT(ns->ns_rs_hash);
@@ -1095,31 +1102,20 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 	hnode = cfs_hash_bd_lookup_locked(ns->ns_rs_hash, &bd, (void *)name);
 	if (hnode) {
 		cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
-		res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
-		/* Synchronize with regard to resource creation. */
-		if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
-			mutex_lock(&res->lr_lvb_mutex);
-			mutex_unlock(&res->lr_lvb_mutex);
-		}
-
-		if (unlikely(res->lr_lvb_len < 0)) {
-			ldlm_resource_putref(res);
-			res = NULL;
-		}
-		return res;
+		goto lvbo_init;
 	}
 
 	version = cfs_hash_bd_version_get(&bd);
 	cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 0);
 
 	if (create == 0)
-		return NULL;
+		return ERR_PTR(-ENOENT);
 
 	LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
 		 "type: %d\n", type);
 	res = ldlm_resource_new();
 	if (!res)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	res->lr_ns_bucket  = cfs_hash_bd_extra_get(ns->ns_rs_hash, &bd);
 	res->lr_name       = *name;
@@ -1137,7 +1133,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 		/* We have taken lr_lvb_mutex. Drop it. */
 		mutex_unlock(&res->lr_lvb_mutex);
 		kmem_cache_free(ldlm_resource_slab, res);
-
+lvbo_init:
 		res = hlist_entry(hnode, struct ldlm_resource, lr_hash);
 		/* Synchronize with regard to resource creation. */
 		if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
@@ -1146,8 +1142,9 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 		}
 
 		if (unlikely(res->lr_lvb_len < 0)) {
+			rc = res->lr_lvb_len;
 			ldlm_resource_putref(res);
-			res = NULL;
+			res = ERR_PTR(rc);
 		}
 		return res;
 	}
@@ -1158,8 +1155,6 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 
 	cfs_hash_bd_unlock(ns->ns_rs_hash, &bd, 1);
 	if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
-		int rc;
-
 		OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
 		rc = ns->ns_lvbo->lvbo_init(res);
 		if (rc < 0) {
@@ -1169,7 +1164,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
 			res->lr_lvb_len = rc;
 			mutex_unlock(&res->lr_lvb_mutex);
 			ldlm_resource_putref(res);
-			return NULL;
+			return ERR_PTR(rc);
 		}
 	}
 
@@ -1386,7 +1381,7 @@ void ldlm_resource_dump(int level, struct ldlm_resource *res)
 	if (!list_empty(&res->lr_granted)) {
 		CDEBUG(level, "Granted locks (in reverse order):\n");
 		list_for_each_entry_reverse(lock, &res->lr_granted,
-						l_res_link) {
+					    l_res_link) {
 			LDLM_DEBUG_LIMIT(level, lock, "###");
 			if (!(level & D_CANTMASK) &&
 			    ++granted > ldlm_dump_granted_max) {
diff --git a/drivers/staging/lustre/lustre/llite/Makefile b/drivers/staging/lustre/lustre/llite/Makefile
index 2cbb1b80bd41..1ac0940bd8df 100644
--- a/drivers/staging/lustre/lustre/llite/Makefile
+++ b/drivers/staging/lustre/lustre/llite/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_LUSTRE_FS) += lustre.o
 lustre-y := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o \
-	    rw.o namei.o symlink.o llite_mmap.o \
+	    rw.o namei.o symlink.o llite_mmap.o range_lock.o \
 	    xattr.o xattr_cache.o rw26.o super25.o statahead.o \
 	    glimpse.o lcommon_cl.o lcommon_misc.o \
 	    vvp_dev.o vvp_page.o vvp_lock.o vvp_io.o vvp_object.o vvp_req.o \
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 463b1a360733..0e45d8fc4d7c 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -37,7 +37,6 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre/lustre_idl.h"
 #include "../include/lustre_dlm.h"
 
@@ -102,39 +101,6 @@ static int ll_dcompare(const struct dentry *dentry,
 	return 0;
 }
 
-static inline int return_if_equal(struct ldlm_lock *lock, void *data)
-{
-	return (ldlm_is_canceling(lock) && ldlm_is_discard_data(lock)) ?
-		LDLM_ITER_CONTINUE : LDLM_ITER_STOP;
-}
-
-/* find any ldlm lock of the inode in mdc and lov
- * return 0    not find
- *	1    find one
- *      < 0    error
- */
-static int find_cbdata(struct inode *inode)
-{
-	struct ll_sb_info *sbi = ll_i2sbi(inode);
-	struct lov_stripe_md *lsm;
-	int rc = 0;
-
-	LASSERT(inode);
-	rc = md_find_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
-			    return_if_equal, NULL);
-	if (rc != 0)
-		return rc;
-
-	lsm = ccc_inode_lsm_get(inode);
-	if (!lsm)
-		return rc;
-
-	rc = obd_find_cbdata(sbi->ll_dt_exp, lsm, return_if_equal, NULL);
-	ccc_inode_lsm_put(inode, lsm);
-
-	return rc;
-}
-
 /**
  * Called when last reference to a dentry is dropped and dcache wants to know
  * whether or not it should cache it:
@@ -155,19 +121,6 @@ static int ll_ddelete(const struct dentry *de)
 	/* kernel >= 2.6.38 last refcount is decreased after this function. */
 	LASSERT(d_count(de) == 1);
 
-	/* Disable this piece of code temporarily because this is called
-	 * inside dcache_lock so it's not appropriate to do lots of work
-	 * here. ATTENTION: Before this piece of code enabling, LU-2487 must be
-	 * resolved.
-	 */
-#if 0
-	/* if not ldlm lock for this inode, set i_nlink to 0 so that
-	 * this inode can be recycled later b=20433
-	 */
-	if (d_really_is_positive(de) && !find_cbdata(d_inode(de)))
-		clear_nlink(d_inode(de));
-#endif
-
 	if (d_lustre_invalid((struct dentry *)de))
 		return 1;
 	return 0;
@@ -325,14 +278,13 @@ static int ll_revalidate_dentry(struct dentry *dentry,
 	if (lookup_flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
 		return 1;
 
-	if (d_need_statahead(dir, dentry) <= 0)
+	if (!dentry_may_statahead(dir, dentry))
 		return 1;
 
 	if (lookup_flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	do_statahead_enter(dir, &dentry, !d_inode(dentry));
-	ll_statahead_mark(dir, dentry);
+	ll_statahead(dir, &dentry, !d_inode(dentry));
 	return 1;
 }
 
@@ -347,18 +299,9 @@ static int ll_revalidate_nd(struct dentry *dentry, unsigned int flags)
 	return ll_revalidate_dentry(dentry, flags);
 }
 
-static void ll_d_iput(struct dentry *de, struct inode *inode)
-{
-	LASSERT(inode);
-	if (!find_cbdata(inode))
-		clear_nlink(inode);
-	iput(inode);
-}
-
 const struct dentry_operations ll_d_ops = {
 	.d_revalidate = ll_revalidate_nd,
 	.d_release = ll_release,
 	.d_delete  = ll_ddelete,
-	.d_iput    = ll_d_iput,
 	.d_compare = ll_dcompare,
 };
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 5b381779c827..7f32a539d260 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -46,9 +46,8 @@
 
 #include "../include/obd_support.h"
 #include "../include/obd_class.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_lib.h"
-#include "../include/lustre/lustre_idl.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_dlm.h"
 #include "../include/lustre_fid.h"
 #include "../include/lustre_kernelcomm.h"
@@ -134,111 +133,35 @@
  * for this integrated page will be adjusted. See lmv_adjust_dirpages().
  *
  */
-
-/* returns the page unlocked, but with a reference */
-static int ll_dir_filler(void *_hash, struct page *page0)
+struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
+			     __u64 offset)
 {
-	struct inode *inode = page0->mapping->host;
-	int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH;
-	struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp;
-	struct ptlrpc_request *request;
-	struct mdt_body *body;
-	struct md_op_data *op_data;
-	__u64 hash = *((__u64 *)_hash);
-	struct page **page_pool;
+	struct md_callback cb_op;
 	struct page *page;
-	struct lu_dirpage *dp;
-	int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_SHIFT;
-	int nrdpgs = 0; /* number of pages read actually */
-	int npages;
-	int i;
 	int rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) hash %llu\n",
-	       PFID(ll_inode2fid(inode)), inode, hash);
-
-	LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
-
-	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
-				     LUSTRE_OPC_ANY, NULL);
-	if (IS_ERR(op_data))
-		return PTR_ERR(op_data);
-
-	page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
-	if (page_pool) {
-		page_pool[0] = page0;
-	} else {
-		page_pool = &page0;
-		max_pages = 1;
-	}
-	for (npages = 1; npages < max_pages; npages++) {
-		page = page_cache_alloc_cold(inode->i_mapping);
-		if (!page)
-			break;
-		page_pool[npages] = page;
-	}
-
-	op_data->op_npages = npages;
-	op_data->op_offset = hash;
-	rc = md_readpage(exp, op_data, page_pool, &request);
-	ll_finish_md_op_data(op_data);
-	if (rc < 0) {
-		/* page0 is special, which was added into page cache early */
-		delete_from_page_cache(page0);
-	} else if (rc == 0) {
-		body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
-		/* Checked by mdc_readpage() */
-		if (body->valid & OBD_MD_FLSIZE)
-			i_size_write(inode, body->size);
-
-		nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_SIZE-1)
-			 >> PAGE_SHIFT;
-		SetPageUptodate(page0);
-	}
-	unlock_page(page0);
-	ptlrpc_req_finished(request);
-
-	CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages);
-
-	for (i = 1; i < npages; i++) {
-		unsigned long offset;
-		int ret;
-
-		page = page_pool[i];
-
-		if (rc < 0 || i >= nrdpgs) {
-			put_page(page);
-			continue;
-		}
-
-		SetPageUptodate(page);
-
-		dp = kmap(page);
-		hash = le64_to_cpu(dp->ldp_hash_start);
-		kunmap(page);
-
-		offset = hash_x_index(hash, hash64);
-
-		prefetchw(&page->flags);
-		ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
-					    GFP_NOFS);
-		if (ret == 0) {
-			unlock_page(page);
-		} else {
-			CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: %d\n",
-			       offset, ret);
-		}
-		put_page(page);
-	}
+	cb_op.md_blocking_ast = ll_md_blocking_ast;
+	rc = md_read_page(ll_i2mdexp(dir), op_data, &cb_op, offset, &page);
+	if (rc)
+		return ERR_PTR(rc);
 
-	if (page_pool != &page0)
-		kfree(page_pool);
-	return rc;
+	return page;
 }
 
-void ll_release_page(struct page *page, int remove)
+void ll_release_page(struct inode *inode, struct page *page, bool remove)
 {
 	kunmap(page);
+
+	/*
+	 * Always remove the page for striped dir, because the page is
+	 * built from temporarily in LMV layer
+	 */
+	if (inode && S_ISDIR(inode->i_mode) &&
+	    ll_i2info(inode)->lli_lsm_md) {
+		__free_page(page);
+		return;
+	}
+
 	if (remove) {
 		lock_page(page);
 		if (likely(page->mapping))
@@ -248,225 +171,6 @@ void ll_release_page(struct page *page, int remove)
 	put_page(page);
 }
 
-/*
- * Find, kmap and return page that contains given hash.
- */
-static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
-				       __u64 *start, __u64 *end)
-{
-	int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
-	struct address_space *mapping = dir->i_mapping;
-	/*
-	 * Complement of hash is used as an index so that
-	 * radix_tree_gang_lookup() can be used to find a page with starting
-	 * hash _smaller_ than one we are looking for.
-	 */
-	unsigned long offset = hash_x_index(*hash, hash64);
-	struct page *page;
-	int found;
-
-	spin_lock_irq(&mapping->tree_lock);
-	found = radix_tree_gang_lookup(&mapping->page_tree,
-				       (void **)&page, offset, 1);
-	if (found > 0 && !radix_tree_exceptional_entry(page)) {
-		struct lu_dirpage *dp;
-
-		get_page(page);
-		spin_unlock_irq(&mapping->tree_lock);
-		/*
-		 * In contrast to find_lock_page() we are sure that directory
-		 * page cannot be truncated (while DLM lock is held) and,
-		 * hence, can avoid restart.
-		 *
-		 * In fact, page cannot be locked here at all, because
-		 * ll_dir_filler() does synchronous io.
-		 */
-		wait_on_page_locked(page);
-		if (PageUptodate(page)) {
-			dp = kmap(page);
-			if (BITS_PER_LONG == 32 && hash64) {
-				*start = le64_to_cpu(dp->ldp_hash_start) >> 32;
-				*end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
-				*hash  = *hash >> 32;
-			} else {
-				*start = le64_to_cpu(dp->ldp_hash_start);
-				*end   = le64_to_cpu(dp->ldp_hash_end);
-			}
-			LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n",
-				 *start, *end, *hash);
-			CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash %llu\n",
-			       offset, *start, *end, *hash);
-			if (*hash > *end) {
-				ll_release_page(page, 0);
-				page = NULL;
-			} else if (*end != *start && *hash == *end) {
-				/*
-				 * upon hash collision, remove this page,
-				 * otherwise put page reference, and
-				 * ll_get_dir_page() will issue RPC to fetch
-				 * the page we want.
-				 */
-				ll_release_page(page,
-				    le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
-				page = NULL;
-			}
-		} else {
-			put_page(page);
-			page = ERR_PTR(-EIO);
-		}
-
-	} else {
-		spin_unlock_irq(&mapping->tree_lock);
-		page = NULL;
-	}
-	return page;
-}
-
-struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
-			     struct ll_dir_chain *chain)
-{
-	ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
-	struct address_space *mapping = dir->i_mapping;
-	struct lustre_handle lockh;
-	struct lu_dirpage *dp;
-	struct page *page;
-	enum ldlm_mode mode;
-	int rc;
-	__u64 start = 0;
-	__u64 end = 0;
-	__u64 lhash = hash;
-	struct ll_inode_info *lli = ll_i2info(dir);
-	int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
-
-	mode = LCK_PR;
-	rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
-			   ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
-	if (!rc) {
-		struct ldlm_enqueue_info einfo = {
-			.ei_type = LDLM_IBITS,
-			.ei_mode = mode,
-			.ei_cb_bl = ll_md_blocking_ast,
-			.ei_cb_cp = ldlm_completion_ast,
-		};
-		struct lookup_intent it = { .it_op = IT_READDIR };
-		struct ptlrpc_request *request;
-		struct md_op_data *op_data;
-
-		op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
-					     LUSTRE_OPC_ANY, NULL);
-		if (IS_ERR(op_data))
-			return (void *)op_data;
-
-		rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
-				op_data, &lockh, NULL, 0, NULL, 0);
-
-		ll_finish_md_op_data(op_data);
-
-		request = (struct ptlrpc_request *)it.it_request;
-		if (request)
-			ptlrpc_req_finished(request);
-		if (rc < 0) {
-			CERROR("lock enqueue: " DFID " at %llu: rc %d\n",
-			       PFID(ll_inode2fid(dir)), hash, rc);
-			return ERR_PTR(rc);
-		}
-
-		CDEBUG(D_INODE, "setting lr_lvb_inode to inode "DFID"(%p)\n",
-		       PFID(ll_inode2fid(dir)), dir);
-		md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
-				 &it.it_lock_handle, dir, NULL);
-	} else {
-		/* for cross-ref object, l_ast_data of the lock may not be set,
-		 * we reset it here
-		 */
-		md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
-				 dir, NULL);
-	}
-	ldlm_lock_dump_handle(D_OTHER, &lockh);
-
-	mutex_lock(&lli->lli_readdir_mutex);
-	page = ll_dir_page_locate(dir, &lhash, &start, &end);
-	if (IS_ERR(page)) {
-		CERROR("dir page locate: "DFID" at %llu: rc %ld\n",
-		       PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
-		goto out_unlock;
-	} else if (page) {
-		/*
-		 * XXX nikita: not entirely correct handling of a corner case:
-		 * suppose hash chain of entries with hash value HASH crosses
-		 * border between pages P0 and P1. First both P0 and P1 are
-		 * cached, seekdir() is called for some entry from the P0 part
-		 * of the chain. Later P0 goes out of cache. telldir(HASH)
-		 * happens and finds P1, as it starts with matching hash
-		 * value. Remaining entries from P0 part of the chain are
-		 * skipped. (Is that really a bug?)
-		 *
-		 * Possible solutions: 0. don't cache P1 is such case, handle
-		 * it as an "overflow" page. 1. invalidate all pages at
-		 * once. 2. use HASH|1 as an index for P1.
-		 */
-		goto hash_collision;
-	}
-
-	page = read_cache_page(mapping, hash_x_index(hash, hash64),
-			       ll_dir_filler, &lhash);
-	if (IS_ERR(page)) {
-		CERROR("read cache page: "DFID" at %llu: rc %ld\n",
-		       PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
-		goto out_unlock;
-	}
-
-	wait_on_page_locked(page);
-	(void)kmap(page);
-	if (!PageUptodate(page)) {
-		CERROR("page not updated: "DFID" at %llu: rc %d\n",
-		       PFID(ll_inode2fid(dir)), hash, -5);
-		goto fail;
-	}
-	if (!PageChecked(page))
-		/* XXX: check page format later */
-		SetPageChecked(page);
-	if (PageError(page)) {
-		CERROR("page error: "DFID" at %llu: rc %d\n",
-		       PFID(ll_inode2fid(dir)), hash, -5);
-		goto fail;
-	}
-hash_collision:
-	dp = page_address(page);
-	if (BITS_PER_LONG == 32 && hash64) {
-		start = le64_to_cpu(dp->ldp_hash_start) >> 32;
-		end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
-		lhash = hash >> 32;
-	} else {
-		start = le64_to_cpu(dp->ldp_hash_start);
-		end   = le64_to_cpu(dp->ldp_hash_end);
-		lhash = hash;
-	}
-	if (end == start) {
-		LASSERT(start == lhash);
-		CWARN("Page-wide hash collision: %llu\n", end);
-		if (BITS_PER_LONG == 32 && hash64)
-			CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
-			      le64_to_cpu(dp->ldp_hash_start),
-			      le64_to_cpu(dp->ldp_hash_end), hash);
-		/*
-		 * Fetch whole overflow chain...
-		 *
-		 * XXX not yet.
-		 */
-		goto fail;
-	}
-out_unlock:
-	mutex_unlock(&lli->lli_readdir_mutex);
-	ldlm_lock_decref(&lockh, mode);
-	return page;
-
-fail:
-	ll_release_page(page, 1);
-	page = ERR_PTR(-EIO);
-	goto out_unlock;
-}
-
 /**
  * return IF_* type for given lu_dirent entry.
  * IF_* flag shld be converted to particular OS file type in
@@ -489,119 +193,100 @@ static __u16 ll_dirent_type_get(struct lu_dirent *ent)
 	return type;
 }
 
-int ll_dir_read(struct inode *inode, struct dir_context *ctx)
+int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
+		struct dir_context *ctx)
 {
-	struct ll_inode_info *info       = ll_i2info(inode);
 	struct ll_sb_info    *sbi	= ll_i2sbi(inode);
-	__u64		   pos		= ctx->pos;
-	int		   api32      = ll_need_32bit_api(sbi);
-	int		   hash64     = sbi->ll_flags & LL_SBI_64BIT_HASH;
+	__u64		   pos		= *ppos;
+	int		   is_api32 = ll_need_32bit_api(sbi);
+	int		   is_hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
 	struct page	  *page;
-	struct ll_dir_chain   chain;
-	int		   done = 0;
+	bool		   done = false;
 	int		   rc = 0;
 
-	ll_dir_chain_init(&chain);
-
-	page = ll_get_dir_page(inode, pos, &chain);
+	page = ll_get_dir_page(inode, op_data, pos);
 
 	while (rc == 0 && !done) {
 		struct lu_dirpage *dp;
 		struct lu_dirent  *ent;
+		__u64 hash;
+		__u64 next;
 
-		if (!IS_ERR(page)) {
-			/*
-			 * If page is empty (end of directory is reached),
-			 * use this value.
-			 */
-			__u64 hash = MDS_DIR_END_OFF;
-			__u64 next;
-
-			dp = page_address(page);
-			for (ent = lu_dirent_start(dp); ent && !done;
-			     ent = lu_dirent_next(ent)) {
-				__u16	  type;
-				int	    namelen;
-				struct lu_fid  fid;
-				__u64	  lhash;
-				__u64	  ino;
+		if (IS_ERR(page)) {
+			rc = PTR_ERR(page);
+			break;
+		}
 
+		hash = MDS_DIR_END_OFF;
+		dp = page_address(page);
+		for (ent = lu_dirent_start(dp); ent && !done;
+		     ent = lu_dirent_next(ent)) {
+			__u16	  type;
+			int	    namelen;
+			struct lu_fid  fid;
+			__u64	  lhash;
+			__u64	  ino;
+
+			hash = le64_to_cpu(ent->lde_hash);
+			if (hash < pos)
 				/*
-				 * XXX: implement correct swabbing here.
+				 * Skip until we find target hash
+				 * value.
 				 */
+				continue;
 
-				hash = le64_to_cpu(ent->lde_hash);
-				if (hash < pos)
-					/*
-					 * Skip until we find target hash
-					 * value.
-					 */
-					continue;
-
-				namelen = le16_to_cpu(ent->lde_namelen);
-				if (namelen == 0)
-					/*
-					 * Skip dummy record.
-					 */
-					continue;
-
-				if (api32 && hash64)
-					lhash = hash >> 32;
-				else
-					lhash = hash;
-				fid_le_to_cpu(&fid, &ent->lde_fid);
-				ino = cl_fid_build_ino(&fid, api32);
-				type = ll_dirent_type_get(ent);
-				ctx->pos = lhash;
-				/* For 'll_nfs_get_name_filldir()', it will try
-				 * to access the 'ent' through its 'lde_name',
-				 * so the parameter 'name' for 'ctx->actor()'
-				 * must be part of the 'ent'.
+			namelen = le16_to_cpu(ent->lde_namelen);
+			if (namelen == 0)
+				/*
+				 * Skip dummy record.
 				 */
-				done = !dir_emit(ctx, ent->lde_name,
-						 namelen, ino, type);
-			}
-			next = le64_to_cpu(dp->ldp_hash_end);
-			if (!done) {
-				pos = next;
-				if (pos == MDS_DIR_END_OFF) {
-					/*
-					 * End of directory reached.
-					 */
-					done = 1;
-					ll_release_page(page, 0);
-				} else if (1 /* chain is exhausted*/) {
-					/*
-					 * Normal case: continue to the next
-					 * page.
-					 */
-					ll_release_page(page,
-					    le32_to_cpu(dp->ldp_flags) &
-							LDF_COLLIDE);
-					next = pos;
-					page = ll_get_dir_page(inode, pos,
-							       &chain);
-				} else {
-					/*
-					 * go into overflow page.
-					 */
-					LASSERT(le32_to_cpu(dp->ldp_flags) &
-						LDF_COLLIDE);
-					ll_release_page(page, 1);
-				}
-			} else {
-				pos = hash;
-				ll_release_page(page, 0);
-			}
+				continue;
+
+			if (is_api32 && is_hash64)
+				lhash = hash >> 32;
+			else
+				lhash = hash;
+			fid_le_to_cpu(&fid, &ent->lde_fid);
+			ino = cl_fid_build_ino(&fid, is_api32);
+			type = ll_dirent_type_get(ent);
+			ctx->pos = lhash;
+			/* For 'll_nfs_get_name_filldir()', it will try
+			 * to access the 'ent' through its 'lde_name',
+			 * so the parameter 'name' for 'ctx->actor()'
+			 * must be part of the 'ent'.
+			 */
+			done = !dir_emit(ctx, ent->lde_name,
+					 namelen, ino, type);
+		}
+
+		if (done) {
+			pos = hash;
+			ll_release_page(inode, page, false);
+			break;
+		}
+
+		next = le64_to_cpu(dp->ldp_hash_end);
+		pos = next;
+		if (pos == MDS_DIR_END_OFF) {
+			/*
+			 * End of directory reached.
+			 */
+			done = 1;
+			ll_release_page(inode, page, false);
 		} else {
-			rc = PTR_ERR(page);
-			CERROR("error reading dir "DFID" at %lu: rc %d\n",
-			       PFID(&info->lli_fid), (unsigned long)pos, rc);
+			/*
+			 * Normal case: continue to the next
+			 * page.
+			 */
+			ll_release_page(inode, page,
+					le32_to_cpu(dp->ldp_flags) &
+					LDF_COLLIDE);
+			next = pos;
+			page = ll_get_dir_page(inode, op_data, pos);
 		}
 	}
 
 	ctx->pos = pos;
-	ll_dir_chain_fini(&chain);
 	return rc;
 }
 
@@ -613,9 +298,10 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
 	__u64 pos = lfd ? lfd->lfd_pos : 0;
 	int			hash64	= sbi->ll_flags & LL_SBI_64BIT_HASH;
 	int			api32	= ll_need_32bit_api(sbi);
+	struct md_op_data *op_data;
 	int			rc;
 
-	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos %lu/%llu 32bit_api %d\n",
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) pos/size %lu/%llu 32bit_api %d\n",
 	       PFID(ll_inode2fid(inode)), inode, (unsigned long)pos,
 	       i_size_read(inode), api32);
 
@@ -627,19 +313,58 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx)
 		goto out;
 	}
 
+	op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
+				     LUSTRE_OPC_ANY, inode);
+	if (IS_ERR(op_data)) {
+		rc = PTR_ERR(op_data);
+		goto out;
+	}
+
+	if (unlikely(op_data->op_mea1)) {
+		/*
+		 * This is only needed for striped dir to fill ..,
+		 * see lmv_read_page
+		 */
+		if (file_dentry(filp)->d_parent &&
+		    file_dentry(filp)->d_parent->d_inode) {
+			__u64 ibits = MDS_INODELOCK_UPDATE;
+			struct inode *parent;
+
+			parent = file_dentry(filp)->d_parent->d_inode;
+			if (ll_have_md_lock(parent, &ibits, LCK_MINMODE))
+				op_data->op_fid3 = *ll_inode2fid(parent);
+		}
+
+		/*
+		 * If it can not find in cache, do lookup .. on the master
+		 * object
+		 */
+		if (fid_is_zero(&op_data->op_fid3)) {
+			rc = ll_dir_get_parent_fid(inode, &op_data->op_fid3);
+			if (rc) {
+				ll_finish_md_op_data(op_data);
+				return rc;
+			}
+		}
+	}
+	op_data->op_max_pages = sbi->ll_md_brw_pages;
 	ctx->pos = pos;
-	rc = ll_dir_read(inode, ctx);
+	rc = ll_dir_read(inode, &pos, op_data, ctx);
+	pos = ctx->pos;
 	if (lfd)
-		lfd->lfd_pos = ctx->pos;
-	if (ctx->pos == MDS_DIR_END_OFF) {
+		lfd->lfd_pos = pos;
+
+	if (pos == MDS_DIR_END_OFF) {
 		if (api32)
-			ctx->pos = LL_DIR_END_OFF_32BIT;
+			pos = LL_DIR_END_OFF_32BIT;
 		else
-			ctx->pos = LL_DIR_END_OFF;
+			pos = LL_DIR_END_OFF;
 	} else {
 		if (api32 && hash64)
-			ctx->pos >>= 32;
+			pos >>= 32;
 	}
+	ctx->pos = pos;
+	ll_finish_md_op_data(op_data);
 	filp->f_version = inode->i_version;
 
 out:
@@ -668,18 +393,40 @@ static int ll_send_mgc_param(struct obd_export *mgc, char *string)
 	return rc;
 }
 
-static int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump,
-			       char *filename)
+/**
+ * Create striped directory with specified stripe(@lump)
+ *
+ * param[in] parent	the parent of the directory.
+ * param[in] lump	the specified stripes.
+ * param[in] dirname	the name of the directory.
+ * param[in] mode	the specified mode of the directory.
+ *
+ * retval		=0 if striped directory is being created successfully.
+ *			<0 if the creation is failed.
+ */
+static int ll_dir_setdirstripe(struct inode *parent, struct lmv_user_md *lump,
+			       const char *dirname, umode_t mode)
 {
 	struct ptlrpc_request *request = NULL;
 	struct md_op_data *op_data;
-	struct ll_sb_info *sbi = ll_i2sbi(dir);
-	int mode;
+	struct ll_sb_info *sbi = ll_i2sbi(parent);
 	int err;
 
-	mode = (~current_umask() & 0755) | S_IFDIR;
-	op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
-				     strlen(filename), mode, LUSTRE_OPC_MKDIR,
+	if (unlikely(lump->lum_magic != LMV_USER_MAGIC))
+		return -EINVAL;
+
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p) name %s stripe_offset %d, stripe_count: %u\n",
+	       PFID(ll_inode2fid(parent)), parent, dirname,
+	       (int)lump->lum_stripe_offset, lump->lum_stripe_count);
+
+	if (lump->lum_magic != cpu_to_le32(LMV_USER_MAGIC))
+		lustre_swab_lmv_user_md(lump);
+
+	if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
+		mode &= ~current_umask();
+	mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
+	op_data = ll_prep_md_op_data(NULL, parent, NULL, dirname,
+				     strlen(dirname), mode, LUSTRE_OPC_MKDIR,
 				     lump);
 	if (IS_ERR(op_data)) {
 		err = PTR_ERR(op_data);
@@ -730,6 +477,13 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
 			lum_size = sizeof(struct lov_user_md_v3);
 			break;
 		}
+		case LMV_USER_MAGIC: {
+			if (lump->lmm_magic != cpu_to_le32(LMV_USER_MAGIC))
+				lustre_swab_lmv_user_md(
+					(struct lmv_user_md *)lump);
+			lum_size = sizeof(struct lmv_user_md);
+			break;
+		}
 		default: {
 			CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
 			       lump->lmm_magic, LOV_USER_MAGIC_V1,
@@ -746,9 +500,6 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	if (lump && lump->lmm_magic == cpu_to_le32(LMV_USER_MAGIC))
-		op_data->op_cli_flags |= CLI_SET_MEA;
-
 	/* swabbing is done in lov_setstripe() on server side */
 	rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
 			NULL, 0, &req, NULL);
@@ -803,8 +554,16 @@ end:
 	return rc;
 }
 
-int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
-		     int *lmm_size, struct ptlrpc_request **request)
+/**
+ * This function will be used to get default LOV/LMV/Default LMV
+ * @valid will be used to indicate which stripe it will retrieve
+ *	OBD_MD_MEA		LMV stripe EA
+ *	OBD_MD_DEFAULT_MEA	Default LMV stripe EA
+ *	otherwise		Default LOV EA.
+ * Each time, it can only retrieve 1 stripe EA
+ **/
+int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size,
+		     struct ptlrpc_request **request, u64 valid)
 {
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct mdt_body   *body;
@@ -813,7 +572,7 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 	int rc, lmmsize;
 	struct md_op_data *op_data;
 
-	rc = ll_get_default_mdsize(sbi, &lmmsize);
+	rc = ll_get_max_mdsize(sbi, &lmmsize);
 	if (rc)
 		return rc;
 
@@ -834,9 +593,9 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 
 	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 
-	lmmsize = body->eadatasize;
+	lmmsize = body->mbo_eadatasize;
 
-	if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+	if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
 	    lmmsize == 0) {
 		rc = -ENODATA;
 		goto out;
@@ -844,6 +603,7 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 
 	lmm = req_capsule_server_sized_get(&req->rq_pill,
 					   &RMF_MDT_MD, lmmsize);
+	LASSERT(lmm);
 
 	/*
 	 * This is coming from the MDS, so is probably in
@@ -860,40 +620,51 @@ int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 		if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
 			lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
 		break;
+	case LMV_MAGIC_V1:
+		if (cpu_to_le32(LMV_MAGIC) != LMV_MAGIC)
+			lustre_swab_lmv_mds_md((union lmv_mds_md *)lmm);
+		break;
+	case LMV_USER_MAGIC:
+		if (cpu_to_le32(LMV_USER_MAGIC) != LMV_USER_MAGIC)
+			lustre_swab_lmv_user_md((struct lmv_user_md *)lmm);
+		break;
 	default:
 		CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
 		rc = -EPROTO;
 	}
 out:
-	*lmmp = lmm;
-	*lmm_size = lmmsize;
+	*plmm = lmm;
+	*plmm_size = lmmsize;
 	*request = req;
 	return rc;
 }
 
-/*
- *  Get MDT index for the inode.
- */
-int ll_get_mdt_idx(struct inode *inode)
+int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid)
 {
-	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct md_op_data *op_data;
-	int rc, mdtidx;
+	int mdt_index, rc;
 
-	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0,
-				     0, LUSTRE_OPC_ANY, NULL);
-	if (IS_ERR(op_data))
-		return PTR_ERR(op_data);
+	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+	if (!op_data)
+		return -ENOMEM;
 
 	op_data->op_flags |= MF_GET_MDT_IDX;
+	op_data->op_fid1 = *fid;
 	rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
-	mdtidx = op_data->op_mds;
-	ll_finish_md_op_data(op_data);
-	if (rc < 0) {
-		CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
+	mdt_index = op_data->op_mds;
+	kvfree(op_data);
+	if (rc < 0)
 		return rc;
-	}
-	return mdtidx;
+
+	return mdt_index;
+}
+
+/*
+ *  Get MDT index for the inode.
+ */
+int ll_get_mdt_idx(struct inode *inode)
+{
+	return ll_get_mdt_idx_by_fid(ll_i2sbi(inode), ll_inode2fid(inode));
 }
 
 /**
@@ -1288,11 +1059,9 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return 0;
 	}
 	case IOC_MDC_LOOKUP: {
-		struct ptlrpc_request *request = NULL;
 		int namelen, len = 0;
 		char *buf = NULL;
 		char *filename;
-		struct md_op_data *op_data;
 
 		rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
 		if (rc)
@@ -1308,21 +1077,13 @@ static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			goto out_free;
 		}
 
-		op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, namelen,
-					     0, LUSTRE_OPC_ANY, NULL);
-		if (IS_ERR(op_data)) {
-			rc = PTR_ERR(op_data);
-			goto out_free;
-		}
-
-		op_data->op_valid = OBD_MD_FLID;
-		rc = md_getattr_name(sbi->ll_md_exp, op_data, &request);
-		ll_finish_md_op_data(op_data);
+		rc = ll_get_fid_by_name(inode, filename, namelen, NULL);
 		if (rc < 0) {
-			CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
+			CERROR("%s: lookup %.*s failed: rc = %d\n",
+			       ll_get_fsname(inode->i_sb, NULL, 0), namelen,
+			       filename, rc);
 			goto out_free;
 		}
-		ptlrpc_req_finished(request);
 out_free:
 		obd_ioctl_freedata(buf, len);
 		return rc;
@@ -1333,6 +1094,7 @@ out_free:
 		char		*filename;
 		int		 namelen = 0;
 		int		 lumlen = 0;
+		umode_t mode;
 		int		 len;
 		int		 rc;
 
@@ -1366,15 +1128,32 @@ out_free:
 			goto lmv_out_free;
 		}
 
-		/**
-		 * ll_dir_setdirstripe will be used to set dir stripe
-		 *  mdc_create--->mdt_reint_create (with dirstripe)
-		 */
-		rc = ll_dir_setdirstripe(inode, lum, filename);
+#if OBD_OCD_VERSION(2, 9, 50, 0) > LUSTRE_VERSION_CODE
+		mode = data->ioc_type != 0 ? data->ioc_type : S_IRWXUGO;
+#else
+		mode = data->ioc_type;
+#endif
+		rc = ll_dir_setdirstripe(inode, lum, filename, mode);
 lmv_out_free:
 		obd_ioctl_freedata(buf, len);
 		return rc;
 	}
+	case LL_IOC_LMV_SET_DEFAULT_STRIPE: {
+		struct lmv_user_md __user *ulump;
+		struct lmv_user_md lum;
+		int rc;
+
+		ulump = (struct lmv_user_md __user *)arg;
+		if (copy_from_user(&lum, ulump, sizeof(lum)))
+			return -EFAULT;
+
+		if (lum.lum_magic != LMV_USER_MAGIC)
+			return -EINVAL;
+
+		rc = ll_dir_setstripe(inode, (struct lov_user_md *)&lum, 0);
+
+		return rc;
+	}
 	case LL_IOC_LOV_SETSTRIPE: {
 		struct lov_user_md_v3 lumv3;
 		struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
@@ -1404,50 +1183,100 @@ lmv_out_free:
 		return rc;
 	}
 	case LL_IOC_LMV_GETSTRIPE: {
-		struct lmv_user_md __user *lump = (void __user *)arg;
+		struct lmv_user_md __user *ulmv;
 		struct lmv_user_md lum;
-		struct lmv_user_md *tmp;
+		struct ptlrpc_request *request = NULL;
+		struct lmv_user_md *tmp = NULL;
+		union lmv_mds_md *lmm = NULL;
+		u64 valid = 0;
+		int stripe_count;
+		int mdt_index;
 		int lum_size;
-		int rc = 0;
-		int mdtindex;
+		int lmmsize;
+		int rc;
+		int i;
 
-		if (copy_from_user(&lum, lump, sizeof(struct lmv_user_md)))
+		ulmv = (struct lmv_user_md __user *)arg;
+		if (copy_from_user(&lum, ulmv, sizeof(*ulmv)))
 			return -EFAULT;
 
-		if (lum.lum_magic != LMV_MAGIC_V1)
+		/*
+		 * lum_magic will indicate which stripe the ioctl will like
+		 * to get, LMV_MAGIC_V1 is for normal LMV stripe, LMV_USER_MAGIC
+		 * is for default LMV stripe
+		 */
+		if (lum.lum_magic == LMV_MAGIC_V1)
+			valid |= OBD_MD_MEA;
+		else if (lum.lum_magic == LMV_USER_MAGIC)
+			valid |= OBD_MD_DEFAULT_MEA;
+		else
 			return -EINVAL;
 
-		lum_size = lmv_user_md_size(1, LMV_MAGIC_V1);
+		rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize, &request,
+				      valid);
+		if (rc)
+			goto finish_req;
+
+		/* Get default LMV EA */
+		if (lum.lum_magic == LMV_USER_MAGIC) {
+			if (rc)
+				goto finish_req;
+
+			if (lmmsize > sizeof(*ulmv)) {
+				rc = -EINVAL;
+				goto finish_req;
+			}
+
+			if (copy_to_user(ulmv, lmm, lmmsize))
+				rc = -EFAULT;
+
+			goto finish_req;
+		}
+
+		stripe_count = lmv_mds_md_stripe_count_get(lmm);
+		lum_size = lmv_user_md_size(stripe_count, LMV_MAGIC_V1);
 		tmp = kzalloc(lum_size, GFP_NOFS);
 		if (!tmp) {
 			rc = -ENOMEM;
-			goto free_lmv;
+			goto finish_req;
 		}
 
-		*tmp = lum;
-		tmp->lum_type = LMV_STRIPE_TYPE;
-		tmp->lum_stripe_count = 1;
-		mdtindex = ll_get_mdt_idx(inode);
-		if (mdtindex < 0) {
+		mdt_index = ll_get_mdt_idx(inode);
+		if (mdt_index < 0) {
 			rc = -ENOMEM;
-			goto free_lmv;
+			goto out_tmp;
+		}
+		tmp->lum_magic = LMV_MAGIC_V1;
+		tmp->lum_stripe_count = 0;
+		tmp->lum_stripe_offset = mdt_index;
+		for (i = 0; i < stripe_count; i++) {
+			struct lu_fid fid;
+
+			fid_le_to_cpu(&fid, &lmm->lmv_md_v1.lmv_stripe_fids[i]);
+			mdt_index = ll_get_mdt_idx_by_fid(sbi, &fid);
+			if (mdt_index < 0) {
+				rc = mdt_index;
+				goto out_tmp;
+			}
+			tmp->lum_objects[i].lum_mds = mdt_index;
+			tmp->lum_objects[i].lum_fid = fid;
+			tmp->lum_stripe_count++;
 		}
 
-		tmp->lum_stripe_offset = mdtindex;
-		tmp->lum_objects[0].lum_mds = mdtindex;
-		memcpy(&tmp->lum_objects[0].lum_fid, ll_inode2fid(inode),
-		       sizeof(struct lu_fid));
-		if (copy_to_user((void __user *)arg, tmp, lum_size)) {
+		if (copy_to_user(ulmv, tmp, lum_size)) {
 			rc = -EFAULT;
-			goto free_lmv;
+			goto out_tmp;
 		}
-free_lmv:
+out_tmp:
 		kfree(tmp);
+finish_req:
+		ptlrpc_req_finished(request);
 		return rc;
 	}
+
 	case LL_IOC_LOV_SWAP_LAYOUTS:
 		return -EPERM;
-	case LL_IOC_OBD_STATFS:
+	case IOC_OBD_STATFS:
 		return ll_obd_statfs(inode, (void __user *)arg);
 	case LL_IOC_LOV_GETSTRIPE:
 	case LL_IOC_MDC_GETINFO:
@@ -1469,7 +1298,8 @@ free_lmv:
 			rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
 						      &lmmsize, &request);
 		} else {
-			rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
+			rc = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
+					      &request, 0);
 		}
 
 		if (request) {
@@ -1512,18 +1342,18 @@ skip_lmm:
 			lstat_t st = { 0 };
 
 			st.st_dev     = inode->i_sb->s_dev;
-			st.st_mode    = body->mode;
-			st.st_nlink   = body->nlink;
-			st.st_uid     = body->uid;
-			st.st_gid     = body->gid;
-			st.st_rdev    = body->rdev;
-			st.st_size    = body->size;
+			st.st_mode    = body->mbo_mode;
+			st.st_nlink   = body->mbo_nlink;
+			st.st_uid     = body->mbo_uid;
+			st.st_gid     = body->mbo_gid;
+			st.st_rdev    = body->mbo_rdev;
+			st.st_size    = body->mbo_size;
 			st.st_blksize = PAGE_SIZE;
-			st.st_blocks  = body->blocks;
-			st.st_atime   = body->atime;
-			st.st_mtime   = body->mtime;
-			st.st_ctime   = body->ctime;
-			st.st_ino     = cl_fid_build_ino(&body->fid1,
+			st.st_blocks  = body->mbo_blocks;
+			st.st_atime   = body->mbo_atime;
+			st.st_mtime   = body->mbo_mtime;
+			st.st_ctime   = body->mbo_ctime;
+			st.st_ino     = cl_fid_build_ino(&body->mbo_fid1,
 							 sbi->ll_flags &
 							 LL_SBI_32BIT_API);
 
@@ -1611,9 +1441,6 @@ free_lmm:
 		kvfree(lmm);
 		return rc;
 	}
-	case OBD_IOC_LLOG_CATINFO: {
-		return -EOPNOTSUPP;
-	}
 	case OBD_IOC_QUOTACHECK: {
 		struct obd_quotactl *oqctl;
 		int error = 0;
@@ -1671,7 +1498,7 @@ out_poll:
 		kfree(check);
 		return rc;
 	}
-	case LL_IOC_QUOTACTL: {
+	case OBD_IOC_QUOTACTL: {
 		struct if_quotactl *qctl;
 
 		qctl = kzalloc(sizeof(*qctl), GFP_NOFS);
@@ -1739,6 +1566,25 @@ out_quotactl:
 		return rc;
 	case OBD_IOC_FID2PATH:
 		return ll_fid2path(inode, (void __user *)arg);
+	case LL_IOC_GETPARENT:
+		return ll_getparent(file, (void __user *)arg);
+	case LL_IOC_FID2MDTIDX: {
+		struct obd_export *exp = ll_i2mdexp(inode);
+		struct lu_fid fid;
+		__u32 index;
+
+		if (copy_from_user(&fid, (const struct lu_fid __user *)arg,
+				   sizeof(fid)))
+			return -EFAULT;
+
+		/* Call mdc_iocontrol */
+		rc = obd_iocontrol(LL_IOC_FID2MDTIDX, exp, sizeof(fid), &fid,
+				   &index);
+		if (rc)
+			return rc;
+
+		return index;
+	}
 	case LL_IOC_HSM_REQUEST: {
 		struct hsm_user_request	*hur;
 		ssize_t			 totalsize;
@@ -1853,6 +1699,45 @@ out_quotactl:
 		kfree(copy);
 		return rc;
 	}
+	case LL_IOC_MIGRATE: {
+		char *buf = NULL;
+		const char *filename;
+		int namelen = 0;
+		int len;
+		int rc;
+		int mdtidx;
+
+		rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
+		if (rc < 0)
+			return rc;
+
+		data = (struct obd_ioctl_data *)buf;
+		if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
+		    !data->ioc_inllen1 || !data->ioc_inllen2) {
+			rc = -EINVAL;
+			goto migrate_free;
+		}
+
+		filename = data->ioc_inlbuf1;
+		namelen = data->ioc_inllen1;
+		if (namelen < 1 || namelen != strlen(filename) + 1) {
+			rc = -EINVAL;
+			goto migrate_free;
+		}
+
+		if (data->ioc_inllen2 != sizeof(mdtidx)) {
+			rc = -EINVAL;
+			goto migrate_free;
+		}
+		mdtidx = *(int *)data->ioc_inlbuf2;
+
+		rc = ll_migrate(inode, file, mdtidx, filename, namelen - 1);
+migrate_free:
+		obd_ioctl_freedata(buf, len);
+
+		return rc;
+	}
+
 	default:
 		return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
 				     (void __user *)arg);
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 57281b9e31ff..6e3a188baaae 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -38,14 +38,15 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 #include "../include/lustre_dlm.h"
-#include "../include/lustre_lite.h"
 #include <linux/pagemap.h>
 #include <linux/file.h>
+#include <linux/sched.h>
 #include <linux/mount.h>
-#include "llite_internal.h"
 #include "../include/lustre/ll_fiemap.h"
+#include "../include/lustre/lustre_ioctl.h"
 
 #include "../include/cl_object.h"
+#include "llite_internal.h"
 
 static int
 ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
@@ -188,17 +189,11 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
 		spin_unlock(&lli->lli_lock);
 	}
 
-	if (rc == 0) {
-		rc = ll_objects_destroy(req, inode);
-		if (rc)
-			CERROR("inode %lu ll_objects destroy: rc = %d\n",
-			       inode->i_ino, rc);
-	}
 	if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
 		struct mdt_body *body;
 
 		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-		if (!(body->valid & OBD_MD_FLRELEASED))
+		if (!(body->mbo_valid & OBD_MD_FLRELEASED))
 			rc = -EBUSY;
 	}
 
@@ -349,13 +344,11 @@ int ll_file_release(struct inode *inode, struct file *file)
 	fd = LUSTRE_FPRIVATE(file);
 	LASSERT(fd);
 
-	/* The last ref on @file, maybe not be the owner pid of statahead.
-	 * Different processes can open the same dir, "ll_opendir_key" means:
-	 * it is me that should stop the statahead thread.
+	/* The last ref on @file, maybe not be the owner pid of statahead,
+	 * because parent and child process can share the same file handle.
 	 */
-	if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
-	    lli->lli_opendir_pid != 0)
-		ll_stop_statahead(inode, lli->lli_opendir_key);
+	if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
+		ll_deauthorize_statahead(inode, fd);
 
 	if (is_root_inode(inode)) {
 		LUSTRE_FPRIVATE(file) = NULL;
@@ -364,7 +357,8 @@ int ll_file_release(struct inode *inode, struct file *file)
 	}
 
 	if (!S_ISDIR(inode->i_mode)) {
-		lov_read_and_clear_async_rc(lli->lli_clob);
+		if (lli->lli_clob)
+			lov_read_and_clear_async_rc(lli->lli_clob);
 		lli->lli_async_rc = 0;
 	}
 
@@ -376,55 +370,39 @@ int ll_file_release(struct inode *inode, struct file *file)
 	return rc;
 }
 
-static int ll_intent_file_open(struct dentry *dentry, void *lmm,
-			       int lmmsize, struct lookup_intent *itp)
+static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
+			       struct lookup_intent *itp)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(de);
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
-	struct dentry *parent = dentry->d_parent;
-	const char *name = dentry->d_name.name;
-	const int len = dentry->d_name.len;
+	struct dentry *parent = de->d_parent;
+	const char *name = NULL;
 	struct md_op_data *op_data;
-	struct ptlrpc_request *req;
-	__u32 opc = LUSTRE_OPC_ANY;
-	int rc;
+	struct ptlrpc_request *req = NULL;
+	int len = 0, rc;
 
-	/* Usually we come here only for NFSD, and we want open lock. */
-	/* We can also get here if there was cached open handle in revalidate_it
-	 * but it disappeared while we were getting from there to ll_file_open.
-	 * But this means this file was closed and immediately opened which
-	 * makes a good candidate for using OPEN lock
-	 */
-	/* If lmmsize & lmm are not 0, we are just setting stripe info
-	 * parameters. No need for the open lock
+	LASSERT(parent);
+	LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
+
+	/*
+	 * if server supports open-by-fid, or file name is invalid, don't pack
+	 * name in open request
 	 */
-	if (!lmm && lmmsize == 0) {
-		struct ll_dentry_data *ldd = ll_d2d(dentry);
-		/*
-		 * If we came via ll_iget_for_nfs, then we need to request
-		 * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
-		 *
-		 * NB: when ldd is NULL, it must have come via normal
-		 * lookup path only, since ll_iget_for_nfs always calls
-		 * ll_d_init().
-		 */
-		if (ldd && ldd->lld_nfs_dentry) {
-			ldd->lld_nfs_dentry = 0;
-			itp->it_flags |= MDS_OPEN_LOCK;
-		}
-		if (itp->it_flags & FMODE_WRITE)
-			opc = LUSTRE_OPC_CREATE;
+	if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
+	    lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
+		name = de->d_name.name;
+		len = de->d_name.len;
 	}
 
-	op_data  = ll_prep_md_op_data(NULL, d_inode(parent),
-				      inode, name, len,
-				      O_RDWR, opc, NULL);
+	op_data  = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
+				      O_RDWR, LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
+	op_data->op_data = lmm;
+	op_data->op_data_size = lmmsize;
 
-	itp->it_flags |= MDS_OPEN_BY_FID;
-	rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
-			    0 /*unused */, &req, ll_md_blocking_ast, 0);
+	rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
+			    &ll_md_blocking_ast, 0);
 	ll_finish_md_op_data(op_data);
 	if (rc == -ESTALE) {
 		/* reason for keep own exit path - don`t flood log
@@ -479,8 +457,8 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it,
 	struct mdt_body *body;
 
 	body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
-	och->och_fh = body->handle;
-	och->och_fid = body->fid1;
+	och->och_fh = body->mbo_handle;
+	och->och_fid = body->mbo_fid1;
 	och->och_lease_handle.cookie = it->it_lock_handle;
 	och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
 	och->och_flags = it->it_flags;
@@ -508,7 +486,7 @@ static int ll_local_open(struct file *file, struct lookup_intent *it,
 
 		body = req_capsule_server_get(&it->it_request->rq_pill,
 					      &RMF_MDT_BODY);
-		ll_ioepoch_open(lli, body->ioepoch);
+		ll_ioepoch_open(lli, body->mbo_ioepoch);
 	}
 
 	LUSTRE_FPRIVATE(file) = fd;
@@ -543,7 +521,7 @@ int ll_file_open(struct inode *inode, struct file *file)
 	struct obd_client_handle **och_p = NULL;
 	__u64 *och_usecount = NULL;
 	struct ll_file_data *fd;
-	int rc = 0, opendir_set = 0;
+	int rc = 0;
 
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
 	       PFID(ll_inode2fid(inode)), inode, file->f_flags);
@@ -558,16 +536,8 @@ int ll_file_open(struct inode *inode, struct file *file)
 	}
 
 	fd->fd_file = file;
-	if (S_ISDIR(inode->i_mode)) {
-		spin_lock(&lli->lli_sa_lock);
-		if (!lli->lli_opendir_key && !lli->lli_sai &&
-		    lli->lli_opendir_pid == 0) {
-			lli->lli_opendir_key = fd;
-			lli->lli_opendir_pid = current_pid();
-			opendir_set = 1;
-		}
-		spin_unlock(&lli->lli_sa_lock);
-	}
+	if (S_ISDIR(inode->i_mode))
+		ll_authorize_statahead(inode, fd);
 
 	if (is_root_inode(inode)) {
 		LUSTRE_FPRIVATE(file) = fd;
@@ -615,7 +585,7 @@ restart:
 	} else if (it->it_flags & FMODE_EXEC) {
 		och_p = &lli->lli_mds_exec_och;
 		och_usecount = &lli->lli_open_fd_exec_count;
-	 } else {
+	} else {
 		och_p = &lli->lli_mds_read_och;
 		och_usecount = &lli->lli_open_fd_read_count;
 	}
@@ -652,9 +622,19 @@ restart:
 			 * result in a deadlock
 			 */
 			mutex_unlock(&lli->lli_och_mutex);
-			it->it_create_mode |= M_CHECK_STALE;
+			/*
+			 * Normally called under two situations:
+			 * 1. NFS export.
+			 * 2. revalidate with IT_OPEN (revalidate doesn't
+			 *    execute this intent any more).
+			 *
+			 * Always fetch MDS_OPEN_LOCK if this is not setstripe.
+			 *
+			 * Always specify MDS_OPEN_BY_FID because we don't want
+			 * to get file with different fid.
+			 */
+			it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
 			rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
-			it->it_create_mode &= ~M_CHECK_STALE;
 			if (rc)
 				goto out_openerr;
 
@@ -716,9 +696,10 @@ out_och_free:
 		mutex_unlock(&lli->lli_och_mutex);
 
 out_openerr:
-		if (opendir_set != 0)
-			ll_stop_statahead(inode, lli->lli_opendir_key);
-		ll_file_data_put(fd);
+		if (lli->lli_opendir_key == fd)
+			ll_deauthorize_statahead(inode, fd);
+		if (fd)
+			ll_file_data_put(fd);
 	} else {
 		ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
 	}
@@ -764,7 +745,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 	struct lookup_intent it = { .it_op = IT_OPEN };
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct md_op_data *op_data;
-	struct ptlrpc_request *req;
+	struct ptlrpc_request *req = NULL;
 	struct lustre_handle old_handle = { 0 };
 	struct obd_client_handle *och = NULL;
 	int rc;
@@ -831,8 +812,8 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 
 	it.it_flags = fmode | open_flags;
 	it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
-	rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
-			    ll_md_blocking_lease_ast,
+	rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
+			    &ll_md_blocking_lease_ast,
 	/* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
 	 * it can be cancelled which may mislead applications that the lease is
 	 * broken;
@@ -840,7 +821,7 @@ ll_lease_open(struct inode *inode, struct file *file, fmode_t fmode,
 	 * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
 	 * doesn't deal with openhandle, so normal openhandle will be leaked.
 	 */
-				LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+			    LDLM_FL_NO_LRU | LDLM_FL_EXCL);
 	ll_finish_md_op_data(op_data);
 	ptlrpc_req_finished(req);
 	if (rc < 0)
@@ -908,7 +889,6 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
 {
 	struct ldlm_lock *lock;
 	bool cancelled = true;
-	int rc;
 
 	lock = ldlm_handle2lock(&och->och_lease_handle);
 	if (lock) {
@@ -926,9 +906,8 @@ static int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
 	if (lease_broken)
 		*lease_broken = cancelled;
 
-	rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
-				       NULL);
-	return rc;
+	return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
+					 inode, och, NULL);
 }
 
 /* Fills the obdo with the attributes for the lsm */
@@ -1138,10 +1117,11 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
 {
 	struct ll_inode_info *lli = ll_i2info(file_inode(file));
 	struct ll_file_data  *fd  = LUSTRE_FPRIVATE(file);
+	struct range_lock range;
 	struct cl_io	 *io;
 	ssize_t	       result;
 
-	CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
+	CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n",
 	       file->f_path.dentry->d_name.name, iot, *ppos, count);
 
 restart:
@@ -1150,7 +1130,12 @@ restart:
 
 	if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
 		struct vvp_io *vio = vvp_env_io(env);
-		int write_mutex_locked = 0;
+		bool range_locked = false;
+
+		if (file->f_flags & O_APPEND)
+			range_lock_init(&range, 0, LUSTRE_EOF);
+		else
+			range_lock_init(&range, *ppos, *ppos + count - 1);
 
 		vio->vui_fd  = LUSTRE_FPRIVATE(file);
 		vio->vui_io_subtype = args->via_io_subtype;
@@ -1159,14 +1144,23 @@ restart:
 		case IO_NORMAL:
 			vio->vui_iter = args->u.normal.via_iter;
 			vio->vui_iocb = args->u.normal.via_iocb;
-			if ((iot == CIT_WRITE) &&
+			/*
+			 * Direct IO reads must also take range lock,
+			 * or multiple reads will try to work on the same pages
+			 * See LU-6227 for details.
+			 */
+			if (((iot == CIT_WRITE) ||
+			     (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
 			    !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-				if (mutex_lock_interruptible(&lli->
-							       lli_write_mutex)) {
-					result = -ERESTARTSYS;
+				CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
+				       range.rl_node.in_extent.start,
+				       range.rl_node.in_extent.end);
+				result = range_lock(&lli->lli_write_tree,
+						    &range);
+				if (result < 0)
 					goto out;
-				}
-				write_mutex_locked = 1;
+
+				range_locked = true;
 			}
 			down_read(&lli->lli_trunc_sem);
 			break;
@@ -1183,8 +1177,12 @@ restart:
 		ll_cl_remove(file, env);
 		if (args->via_io_subtype == IO_NORMAL)
 			up_read(&lli->lli_trunc_sem);
-		if (write_mutex_locked)
-			mutex_unlock(&lli->lli_write_mutex);
+		if (range_locked) {
+			CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
+			       range.rl_node.in_extent.start,
+			       range.rl_node.in_extent.end);
+			range_unlock(&lli->lli_write_tree, &range);
+		}
 	} else {
 		/* cl_io_rw_init() handled IO */
 		result = io->ci_result;
@@ -1201,7 +1199,7 @@ out:
 	 * short read/write instead of restart io.
 	 */
 	if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
-		CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n",
+		CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zu\n",
 		       iot == CIT_READ ? "read" : "write",
 		       file, *ppos, count);
 		LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
@@ -1296,94 +1294,15 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 	return result;
 }
 
-static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx)
-{
-	struct obd_export *exp = ll_i2dtexp(inode);
-	struct obd_trans_info oti = { 0 };
-	struct obdo *oa = NULL;
-	int lsm_size;
-	int rc = 0;
-	struct lov_stripe_md *lsm = NULL, *lsm2;
-
-	oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
-	if (!oa)
-		return -ENOMEM;
-
-	lsm = ccc_inode_lsm_get(inode);
-	if (!lsm_has_objects(lsm)) {
-		rc = -ENOENT;
-		goto out;
-	}
-
-	lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
-		   (lsm->lsm_stripe_count));
-
-	lsm2 = libcfs_kvzalloc(lsm_size, GFP_NOFS);
-	if (!lsm2) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	oa->o_oi = *oi;
-	oa->o_nlink = ost_idx;
-	oa->o_flags |= OBD_FL_RECREATE_OBJS;
-	oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
-	obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-				   OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-	obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
-	memcpy(lsm2, lsm, lsm_size);
-	ll_inode_size_lock(inode);
-	rc = obd_create(NULL, exp, oa, &lsm2, &oti);
-	ll_inode_size_unlock(inode);
-
-	kvfree(lsm2);
-	goto out;
-out:
-	ccc_inode_lsm_put(inode, lsm);
-	kmem_cache_free(obdo_cachep, oa);
-	return rc;
-}
-
-static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
-{
-	struct ll_recreate_obj ucreat;
-	struct ost_id		oi;
-
-	if (!capable(CFS_CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (copy_from_user(&ucreat, (struct ll_recreate_obj __user *)arg,
-			   sizeof(ucreat)))
-		return -EFAULT;
-
-	ostid_set_seq_mdt0(&oi);
-	ostid_set_id(&oi, ucreat.lrc_id);
-	return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
-}
-
-static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
-{
-	struct lu_fid	fid;
-	struct ost_id	oi;
-	u32		ost_idx;
-
-	if (!capable(CFS_CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (copy_from_user(&fid, (struct lu_fid __user *)arg, sizeof(fid)))
-		return -EFAULT;
-
-	fid_to_ostid(&fid, &oi);
-	ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
-	return ll_lov_recreate(inode, &oi, ost_idx);
-}
-
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 			     __u64 flags, struct lov_user_md *lum,
 			     int lum_size)
 {
 	struct lov_stripe_md *lsm = NULL;
-	struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
+	struct lookup_intent oit = {
+		.it_op = IT_OPEN,
+		.it_flags = flags | MDS_OPEN_BY_FID,
+	};
 	int rc = 0;
 
 	lsm = ccc_inode_lsm_get(inode);
@@ -1397,11 +1316,11 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 
 	ll_inode_size_lock(inode);
 	rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
-	if (rc)
+	if (rc < 0)
 		goto out_unlock;
 	rc = oit.it_status;
 	if (rc < 0)
-		goto out_req_free;
+		goto out_unlock;
 
 	ll_release_openhandle(inode, &oit);
 
@@ -1411,9 +1330,6 @@ out_unlock:
 	ccc_inode_lsm_put(inode, lsm);
 out:
 	return rc;
-out_req_free:
-	ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
-	goto out;
 }
 
 int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
@@ -1448,9 +1364,9 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 
 	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 
-	lmmsize = body->eadatasize;
+	lmmsize = body->mbo_eadatasize;
 
-	if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+	if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
 	    lmmsize == 0) {
 		rc = -ENODATA;
 		goto out;
@@ -1481,13 +1397,13 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 		 */
 		if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
 			lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
-			if (S_ISREG(body->mode))
+			if (S_ISREG(body->mbo_mode))
 				lustre_swab_lov_user_md_objects(
 				 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
 				 stripe_count);
 		} else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
 			lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
-			if (S_ISREG(body->mode))
+			if (S_ISREG(body->mbo_mode))
 				lustre_swab_lov_user_md_objects(
 				 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
 				 stripe_count);
@@ -1530,55 +1446,48 @@ static int ll_lov_setea(struct inode *inode, struct file *file,
 	return rc;
 }
 
+static int ll_file_getstripe(struct inode *inode,
+			     struct lov_user_md __user *lum)
+{
+	struct lu_env *env;
+	int refcheck;
+	int rc;
+
+	env = cl_env_get(&refcheck);
+	if (IS_ERR(env))
+		return PTR_ERR(env);
+
+	rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
+	cl_env_put(env, &refcheck);
+	return rc;
+}
+
 static int ll_lov_setstripe(struct inode *inode, struct file *file,
 			    unsigned long arg)
 {
-	struct lov_user_md_v3 lumv3;
-	struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
-	struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
-	struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
+	struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
+	struct lov_user_md *klum;
 	int lum_size, rc;
 	__u64 flags = FMODE_WRITE;
 
-	/* first try with v1 which is smaller than v3 */
-	lum_size = sizeof(struct lov_user_md_v1);
-	if (copy_from_user(lumv1, lumv1p, lum_size))
-		return -EFAULT;
-
-	if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
-		lum_size = sizeof(struct lov_user_md_v3);
-		if (copy_from_user(&lumv3, lumv3p, lum_size))
-			return -EFAULT;
-	}
+	rc = ll_copy_user_md(lum, &klum);
+	if (rc < 0)
+		return rc;
 
-	rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1,
+	lum_size = rc;
+	rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum,
 				      lum_size);
 	cl_lov_delay_create_clear(&file->f_flags);
 	if (rc == 0) {
-		struct lov_stripe_md *lsm;
 		__u32 gen;
 
-		put_user(0, &lumv1p->lmm_stripe_count);
+		put_user(0, &lum->lmm_stripe_count);
 
 		ll_layout_refresh(inode, &gen);
-		lsm = ccc_inode_lsm_get(inode);
-		rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
-				   0, lsm, (void __user *)arg);
-		ccc_inode_lsm_put(inode, lsm);
+		rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
 	}
-	return rc;
-}
 
-static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
-{
-	struct lov_stripe_md *lsm;
-	int rc = -ENODATA;
-
-	lsm = ccc_inode_lsm_get(inode);
-	if (lsm)
-		rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
-				   lsm, (void __user *)arg);
-	ccc_inode_lsm_put(inode, lsm);
+	kfree(klum);
 	return rc;
 }
 
@@ -2247,6 +2156,12 @@ free_hss:
 	return rc;
 }
 
+static inline long ll_lease_type_from_fmode(fmode_t fmode)
+{
+	return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
+	       ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
+}
+
 static long
 ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
@@ -2314,11 +2229,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return rc;
 	}
 	case LL_IOC_LOV_GETSTRIPE:
-		return ll_lov_getstripe(inode, arg);
-	case LL_IOC_RECREATE_OBJ:
-		return ll_lov_recreate_obj(inode, arg);
-	case LL_IOC_RECREATE_FID:
-		return ll_lov_recreate_fid(inode, arg);
+		return ll_file_getstripe(inode,
+					 (struct lov_user_md __user *)arg);
 	case FSFILT_IOC_FIEMAP:
 		return ll_ioctl_fiemap(inode, arg);
 	case FSFILT_IOC_GETFLAGS:
@@ -2349,6 +2261,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		return 0;
 	}
+	case LL_IOC_GETPARENT:
+		return ll_getparent(file, (struct getparent __user *)arg);
 	case OBD_IOC_FID2PATH:
 		return ll_fid2path(inode, (void __user *)arg);
 	case LL_IOC_DATA_VERSION: {
@@ -2451,20 +2365,20 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		struct ll_inode_info *lli = ll_i2info(inode);
 		struct obd_client_handle *och = NULL;
 		bool lease_broken;
-		fmode_t mode = 0;
+		fmode_t fmode;
 
 		switch (arg) {
-		case F_WRLCK:
+		case LL_LEASE_WRLCK:
 			if (!(file->f_mode & FMODE_WRITE))
 				return -EPERM;
-			mode = FMODE_WRITE;
+			fmode = FMODE_WRITE;
 			break;
-		case F_RDLCK:
+		case LL_LEASE_RDLCK:
 			if (!(file->f_mode & FMODE_READ))
 				return -EPERM;
-			mode = FMODE_READ;
+			fmode = FMODE_READ;
 			break;
-		case F_UNLCK:
+		case LL_LEASE_UNLCK:
 			mutex_lock(&lli->lli_och_mutex);
 			if (fd->fd_lease_och) {
 				och = fd->fd_lease_och;
@@ -2472,26 +2386,26 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			}
 			mutex_unlock(&lli->lli_och_mutex);
 
-			if (och) {
-				mode = och->och_flags &
-				       (FMODE_READ|FMODE_WRITE);
-				rc = ll_lease_close(och, inode, &lease_broken);
-				if (rc == 0 && lease_broken)
-					mode = 0;
-			} else {
-				rc = -ENOLCK;
-			}
+			if (!och)
+				return -ENOLCK;
+
+			fmode = och->och_flags;
+			rc = ll_lease_close(och, inode, &lease_broken);
+			if (rc < 0)
+				return rc;
+
+			if (lease_broken)
+				fmode = 0;
 
-			/* return the type of lease or error */
-			return rc < 0 ? rc : (int)mode;
+			return ll_lease_type_from_fmode(fmode);
 		default:
 			return -EINVAL;
 		}
 
-		CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
+		CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
 
 		/* apply for lease */
-		och = ll_lease_open(inode, file, mode, 0);
+		och = ll_lease_open(inode, file, fmode, 0);
 		if (IS_ERR(och))
 			return PTR_ERR(och);
 
@@ -2512,8 +2426,8 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case LL_IOC_GET_LEASE: {
 		struct ll_inode_info *lli = ll_i2info(inode);
 		struct ldlm_lock *lock = NULL;
+		fmode_t fmode = 0;
 
-		rc = 0;
 		mutex_lock(&lli->lli_och_mutex);
 		if (fd->fd_lease_och) {
 			struct obd_client_handle *och = fd->fd_lease_och;
@@ -2522,14 +2436,13 @@ ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			if (lock) {
 				lock_res_and_lock(lock);
 				if (!ldlm_is_cancel(lock))
-					rc = och->och_flags &
-						(FMODE_READ | FMODE_WRITE);
+					fmode = och->och_flags;
 				unlock_res_and_lock(lock);
 				LDLM_LOCK_PUT(lock);
 			}
 		}
 		mutex_unlock(&lli->lli_och_mutex);
-		return rc;
+		return ll_lease_type_from_fmode(fmode);
 	}
 	case LL_IOC_HSM_IMPORT: {
 		struct hsm_user_import *hui;
@@ -2574,9 +2487,8 @@ static loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
 		eof = i_size_read(inode);
 	}
 
-	retval = generic_file_llseek_size(file, offset, origin,
-					  ll_file_maxbytes(inode), eof);
-	return retval;
+	return generic_file_llseek_size(file, offset, origin,
+					ll_file_maxbytes(inode), eof);
 }
 
 static int ll_flush(struct file *file, fl_owner_t id)
@@ -2593,9 +2505,11 @@ static int ll_flush(struct file *file, fl_owner_t id)
 	 */
 	rc = lli->lli_async_rc;
 	lli->lli_async_rc = 0;
-	err = lov_read_and_clear_async_rc(lli->lli_clob);
-	if (rc == 0)
-		rc = err;
+	if (lli->lli_clob) {
+		err = lov_read_and_clear_async_rc(lli->lli_clob);
+		if (!rc)
+			rc = err;
+	}
 
 	/* The application has been told about write failure already.
 	 * Do not report failure again.
@@ -2714,6 +2628,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	struct md_op_data *op_data;
 	struct lustre_handle lockh = {0};
 	ldlm_policy_data_t flock = { {0} };
+	int fl_type = file_lock->fl_type;
 	__u64 flags = 0;
 	int rc;
 	int rc2 = 0;
@@ -2744,7 +2659,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
 		flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
 
-	switch (file_lock->fl_type) {
+	switch (fl_type) {
 	case F_RDLCK:
 		einfo.ei_mode = LCK_PR;
 		break;
@@ -2764,8 +2679,7 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 		einfo.ei_mode = LCK_PW;
 		break;
 	default:
-		CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
-		       file_lock->fl_type);
+		CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
 		return -ENOTSUPP;
 	}
 
@@ -2787,16 +2701,18 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	case F_GETLK64:
 #endif
 		flags = LDLM_FL_TEST_LOCK;
-		/* Save the old mode so that if the mode in the lock changes we
-		 * can decrement the appropriate reader or writer refcount.
-		 */
-		file_lock->fl_type = einfo.ei_mode;
 		break;
 	default:
 		CERROR("unknown fcntl lock command: %d\n", cmd);
 		return -EINVAL;
 	}
 
+	/*
+	 * Save the old mode so that if the mode in the lock changes we
+	 * can decrement the appropriate reader or writer refcount.
+	 */
+	file_lock->fl_type = einfo.ei_mode;
+
 	op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
 				     LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
@@ -2806,8 +2722,12 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	       PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
 	       einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
 
-	rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
-			op_data, &lockh, &flock, 0, NULL /* req */, flags);
+	rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
+			flags);
+
+	/* Restore the file lock type if not TEST lock. */
+	if (!(flags & LDLM_FL_TEST_LOCK))
+		file_lock->fl_type = fl_type;
 
 	if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
 	    !(flags & LDLM_FL_TEST_LOCK))
@@ -2815,8 +2735,8 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 
 	if (rc2 && file_lock->fl_type != F_UNLCK) {
 		einfo.ei_mode = LCK_NL;
-		md_enqueue(sbi->ll_md_exp, &einfo, NULL,
-			   op_data, &lockh, &flock, 0, NULL /* req */, flags);
+		md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
+			   &lockh, flags);
 		rc = rc2;
 	}
 
@@ -2825,6 +2745,117 @@ ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
 	return rc;
 }
 
+int ll_get_fid_by_name(struct inode *parent, const char *name,
+		       int namelen, struct lu_fid *fid)
+{
+	struct md_op_data *op_data = NULL;
+	struct ptlrpc_request *req;
+	struct mdt_body *body;
+	int rc;
+
+	op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
+				     LUSTRE_OPC_ANY, NULL);
+	if (IS_ERR(op_data))
+		return PTR_ERR(op_data);
+
+	op_data->op_valid = OBD_MD_FLID;
+	rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
+	ll_finish_md_op_data(op_data);
+	if (rc < 0)
+		return rc;
+
+	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+	if (!body) {
+		rc = -EFAULT;
+		goto out_req;
+	}
+	if (fid)
+		*fid = body->mbo_fid1;
+out_req:
+	ptlrpc_req_finished(req);
+	return rc;
+}
+
+int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
+	       const char *name, int namelen)
+{
+	struct ptlrpc_request *request = NULL;
+	struct inode *child_inode = NULL;
+	struct dentry *dchild = NULL;
+	struct md_op_data *op_data;
+	struct qstr qstr;
+	int rc;
+
+	CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%d\n",
+	       name, PFID(ll_inode2fid(parent)), mdtidx);
+
+	op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
+				     0, LUSTRE_OPC_ANY, NULL);
+	if (IS_ERR(op_data))
+		return PTR_ERR(op_data);
+
+	/* Get child FID first */
+	qstr.hash = full_name_hash(parent, name, namelen);
+	qstr.name = name;
+	qstr.len = namelen;
+	dchild = d_lookup(file_dentry(file), &qstr);
+	if (dchild) {
+		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
+		if (dchild->d_inode) {
+			child_inode = igrab(dchild->d_inode);
+			if (child_inode) {
+				inode_lock(child_inode);
+				op_data->op_fid3 = *ll_inode2fid(child_inode);
+				ll_invalidate_aliases(child_inode);
+			}
+		}
+		dput(dchild);
+	} else {
+		rc = ll_get_fid_by_name(parent, name, namelen,
+					&op_data->op_fid3);
+		if (rc)
+			goto out_free;
+	}
+
+	if (!fid_is_sane(&op_data->op_fid3)) {
+		CERROR("%s: migrate %s, but fid "DFID" is insane\n",
+		       ll_get_fsname(parent->i_sb, NULL, 0), name,
+		       PFID(&op_data->op_fid3));
+		rc = -EINVAL;
+		goto out_free;
+	}
+
+	rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
+	if (rc < 0)
+		goto out_free;
+
+	if (rc == mdtidx) {
+		CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
+		       PFID(&op_data->op_fid3), mdtidx);
+		rc = 0;
+		goto out_free;
+	}
+
+	op_data->op_mds = mdtidx;
+	op_data->op_cli_flags = CLI_MIGRATE;
+	rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
+		       namelen, name, namelen, &request);
+	if (!rc)
+		ll_update_times(request, parent);
+
+	ptlrpc_req_finished(request);
+
+out_free:
+	if (child_inode) {
+		clear_nlink(child_inode);
+		inode_unlock(child_inode);
+		iput(child_inode);
+	}
+
+	ll_finish_md_op_data(op_data);
+	return rc;
+}
+
 static int
 ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
 {
@@ -2847,7 +2878,7 @@ int ll_have_md_lock(struct inode *inode, __u64 *bits,
 	struct lustre_handle lockh;
 	ldlm_policy_data_t policy;
 	enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
-				(LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
+			      (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
 	struct lu_fid *fid;
 	__u64 flags;
 	int i;
@@ -2888,15 +2919,12 @@ enum ldlm_mode ll_take_md_lock(struct inode *inode, __u64 bits,
 {
 	ldlm_policy_data_t policy = { .l_inodebits = {bits} };
 	struct lu_fid *fid;
-	enum ldlm_mode rc;
 
 	fid = &ll_i2info(inode)->lli_fid;
 	CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
 
-	rc = md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
-			   fid, LDLM_IBITS, &policy, mode, lockh);
-
-	return rc;
+	return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
+			     fid, LDLM_IBITS, &policy, mode, lockh);
 }
 
 static int ll_inode_revalidate_fini(struct inode *inode, int rc)
@@ -2949,15 +2977,9 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 		if (IS_ERR(op_data))
 			return PTR_ERR(op_data);
 
-		oit.it_create_mode |= M_CHECK_STALE;
-		rc = md_intent_lock(exp, op_data, NULL, 0,
-				    /* we are not interested in name
-				     * based lookup
-				     */
-				    &oit, 0, &req,
-				    ll_md_blocking_ast, 0);
+		rc = md_intent_lock(exp, op_data, &oit, &req,
+				    &ll_md_blocking_ast, 0);
 		ll_finish_md_op_data(op_data);
-		oit.it_create_mode &= ~M_CHECK_STALE;
 		if (rc < 0) {
 			rc = ll_inode_revalidate_fini(inode, rc);
 			goto out;
@@ -3003,10 +3025,8 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 		op_data->op_valid = valid;
 		rc = md_getattr(sbi->ll_md_exp, op_data, &req);
 		ll_finish_md_op_data(op_data);
-		if (rc) {
-			rc = ll_inode_revalidate_fini(inode, rc);
-			return rc;
-		}
+		if (rc)
+			return ll_inode_revalidate_fini(inode, rc);
 
 		rc = ll_prep_inode(&inode, req, NULL, NULL);
 	}
@@ -3015,6 +3035,28 @@ out:
 	return rc;
 }
 
+static int ll_merge_md_attr(struct inode *inode)
+{
+	struct cl_attr attr = { 0 };
+	int rc;
+
+	LASSERT(ll_i2info(inode)->lli_lsm_md);
+	rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
+			   &attr, ll_md_blocking_ast);
+	if (rc)
+		return rc;
+
+	set_nlink(inode, attr.cat_nlink);
+	inode->i_blocks = attr.cat_blocks;
+	i_size_write(inode, attr.cat_size);
+
+	ll_i2info(inode)->lli_atime = attr.cat_atime;
+	ll_i2info(inode)->lli_mtime = attr.cat_mtime;
+	ll_i2info(inode)->lli_ctime = attr.cat_ctime;
+
+	return 0;
+}
+
 static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 {
 	struct inode *inode = d_inode(dentry);
@@ -3026,6 +3068,13 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 
 	/* if object isn't regular file, don't validate size */
 	if (!S_ISREG(inode->i_mode)) {
+		if (S_ISDIR(inode->i_mode) &&
+		    ll_i2info(inode)->lli_lsm_md) {
+			rc = ll_merge_md_attr(inode);
+			if (rc)
+				return rc;
+		}
+
 		LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
 		LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
 		LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
@@ -3057,13 +3106,14 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
 	if (res)
 		return res;
 
+	OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
+
 	stat->dev = inode->i_sb->s_dev;
 	if (ll_need_32bit_api(sbi))
 		stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
 	else
 		stat->ino = inode->i_ino;
 	stat->mode = inode->i_mode;
-	stat->nlink = inode->i_nlink;
 	stat->uid = inode->i_uid;
 	stat->gid = inode->i_gid;
 	stat->rdev = inode->i_rdev;
@@ -3072,6 +3122,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
 	stat->ctime = inode->i_ctime;
 	stat->blksize = 1 << inode->i_blkbits;
 
+	stat->nlink = inode->i_nlink;
 	stat->size = i_size_read(inode);
 	stat->blocks = inode->i_blocks;
 
@@ -3139,6 +3190,12 @@ struct posix_acl *ll_get_acl(struct inode *inode, int type)
 
 int ll_inode_permission(struct inode *inode, int mask)
 {
+	struct ll_sb_info *sbi;
+	struct root_squash_info *squash;
+	const struct cred *old_cred = NULL;
+	struct cred *cred = NULL;
+	bool squash_id = false;
+	cfs_cap_t cap;
 	int rc = 0;
 
 	if (mask & MAY_NOT_BLOCK)
@@ -3158,9 +3215,46 @@ int ll_inode_permission(struct inode *inode, int mask)
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
 	       PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
 
+	/* squash fsuid/fsgid if needed */
+	sbi = ll_i2sbi(inode);
+	squash = &sbi->ll_squash;
+	if (unlikely(squash->rsi_uid &&
+		     uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
+		     !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
+		squash_id = true;
+	}
+
+	if (squash_id) {
+		CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
+		       __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
+		       squash->rsi_uid, squash->rsi_gid);
+
+		/*
+		 * update current process's credentials
+		 * and FS capability
+		 */
+		cred = prepare_creds();
+		if (!cred)
+			return -ENOMEM;
+
+		cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
+		cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
+		for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
+			if ((1 << cap) & CFS_CAP_FS_MASK)
+				cap_lower(cred->cap_effective, cap);
+		}
+		old_cred = override_creds(cred);
+	}
+
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
 	rc = generic_permission(inode, mask);
 
+	/* restore current process's credentials and FS capability */
+	if (squash_id) {
+		revert_creds(old_cred);
+		put_cred(cred);
+	}
+
 	return rc;
 }
 
@@ -3213,10 +3307,10 @@ const struct inode_operations ll_file_inode_operations = {
 	.setattr	= ll_setattr,
 	.getattr	= ll_getattr,
 	.permission	= ll_inode_permission,
-	.setxattr	= ll_setxattr,
-	.getxattr	= ll_getxattr,
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
 	.listxattr	= ll_listxattr,
-	.removexattr	= ll_removexattr,
+	.removexattr	= generic_removexattr,
 	.fiemap		= ll_fiemap,
 	.get_acl	= ll_get_acl,
 };
@@ -3251,7 +3345,6 @@ void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd)
 	if (!in_data)
 		return NULL;
 
-	memset(in_data, 0, sizeof(*in_data));
 	in_data->iocd_size = size;
 	in_data->iocd_cb = cb;
 	in_data->iocd_count = count;
@@ -3389,7 +3482,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock)
 		goto out;
 	}
 
-	lmmsize = body->eadatasize;
+	lmmsize = body->mbo_eadatasize;
 	if (lmmsize == 0) /* empty layout */ {
 		rc = 0;
 		goto out;
@@ -3447,7 +3540,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
 		   PFID(&lli->lli_fid), inode, reconf);
 
 	/* in case this is a caching lock and reinstate with new inode */
-	md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
+	md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
 
 	lock_res_and_lock(lock);
 	lvb_ready = ldlm_is_lvb_ready(lock);
@@ -3557,8 +3650,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
 	struct ldlm_enqueue_info einfo = {
 		.ei_type = LDLM_IBITS,
 		.ei_mode = LCK_CR,
-		.ei_cb_bl = ll_md_blocking_ast,
-		.ei_cb_cp = ldlm_completion_ast,
+		.ei_cb_bl = &ll_md_blocking_ast,
+		.ei_cb_cp = &ldlm_completion_ast,
 	};
 	int rc;
 
@@ -3604,8 +3697,7 @@ again:
 			  ll_get_fsname(inode->i_sb, NULL, 0),
 			  PFID(&lli->lli_fid), inode);
 
-	rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
-			NULL, 0, NULL, 0);
+	rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
 	ptlrpc_req_finished(it.it_request);
 	it.it_request = NULL;
 
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index 92004a05f9ee..22507b9c6d69 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -42,7 +42,6 @@
 #include "../include/obd.h"
 
 #include "../include/lustre_dlm.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_mdc.h"
 #include <linux/pagemap.h>
 #include <linux/file.h>
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_cl.c b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
index 396e4e4f0715..084330d08f7a 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_cl.c
@@ -49,7 +49,6 @@
 #include "../include/obd.h"
 #include "../include/obd_support.h"
 #include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_dlm.h"
 #include "../include/lustre_ver.h"
 #include "../include/lustre_mdc.h"
@@ -100,6 +99,7 @@ int cl_setattr_ost(struct inode *inode, const struct iattr *attr)
 	io->u.ci_setattr.sa_attr.lvb_ctime = LTIME_S(attr->ia_ctime);
 	io->u.ci_setattr.sa_attr.lvb_size = attr->ia_size;
 	io->u.ci_setattr.sa_valid = attr->ia_valid;
+	io->u.ci_setattr.sa_parent_fid = ll_inode2fid(inode);
 
 again:
 	if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) {
@@ -154,7 +154,7 @@ int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
 	int result = 0;
 	int refcheck;
 
-	LASSERT(md->body->valid & OBD_MD_FLID);
+	LASSERT(md->body->mbo_valid & OBD_MD_FLID);
 	LASSERT(S_ISREG(inode->i_mode));
 
 	env = cl_env_get(&refcheck);
diff --git a/drivers/staging/lustre/lustre/llite/lcommon_misc.c b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
index f6be105eeef7..fb346c12dad2 100644
--- a/drivers/staging/lustre/lustre/llite/lcommon_misc.c
+++ b/drivers/staging/lustre/lustre/llite/lcommon_misc.c
@@ -38,7 +38,6 @@
 #include "../include/obd.h"
 #include "../include/cl_object.h"
 
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
 
 /* Initialize the default and maximum LOV EA and cookie sizes.  This allows
diff --git a/drivers/staging/lustre/lustre/llite/llite_close.c b/drivers/staging/lustre/lustre/llite/llite_close.c
index 2326b40a0870..8644631bf2ba 100644
--- a/drivers/staging/lustre/lustre/llite/llite_close.c
+++ b/drivers/staging/lustre/lustre/llite/llite_close.c
@@ -38,7 +38,6 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
 
 /** records that a write is in flight */
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 4d6d589a1677..3e98bd685061 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -36,14 +36,21 @@
 #include "../include/lustre_ver.h"
 #include "../include/lustre_disk.h"	/* for s2sbi */
 #include "../include/lustre_eacl.h"
+#include "../include/lustre_linkea.h"
 
 /* for struct cl_lock_descr and struct cl_io */
+#include "../include/lustre_patchless_compat.h"
+#include "../include/lustre_compat.h"
 #include "../include/cl_object.h"
+#include "../include/lustre_lmv.h"
 #include "../include/lustre_mdc.h"
 #include "../include/lustre_intent.h"
 #include <linux/compat.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 #include "vvp_internal.h"
+#include "range_lock.h"
 
 #ifndef FMODE_EXEC
 #define FMODE_EXEC 0
@@ -57,6 +64,9 @@
 #define LL_DIR_END_OFF	  0x7fffffffffffffffULL
 #define LL_DIR_END_OFF_32BIT    0x7fffffffUL
 
+/* 4UL * 1024 * 1024 */
+#define LL_MAX_BLKSIZE_BITS 22
+
 #define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
 #define LUSTRE_FPRIVATE(file) ((file)->private_data)
 
@@ -116,9 +126,7 @@ struct ll_inode_info {
 
 	/* identifying fields for both metadata and data stacks. */
 	struct lu_fid		   lli_fid;
-	/* Parent fid for accessing default stripe data on parent directory
-	 * for allocating OST objects after a mknod() and later open-by-FID.
-	 */
+	/* master inode fid for stripe directory */
 	struct lu_fid		   lli_pfid;
 
 	struct list_head	      lli_close_list;
@@ -156,7 +164,7 @@ struct ll_inode_info {
 		/* for directory */
 		struct {
 			/* serialize normal readdir and statahead-readdir. */
-			struct mutex			d_readdir_mutex;
+			struct mutex			lli_readdir_mutex;
 
 			/* metadata statahead */
 			/* since parent-child threads can share the same @file
@@ -164,27 +172,39 @@ struct ll_inode_info {
 			 * case of parent exit before child -- it is me should
 			 * cleanup the dir readahead.
 			 */
-			void			   *d_opendir_key;
-			struct ll_statahead_info       *d_sai;
+			void			       *lli_opendir_key;
+			struct ll_statahead_info       *lli_sai;
 			/* protect statahead stuff. */
-			spinlock_t			d_sa_lock;
+			spinlock_t			lli_sa_lock;
 			/* "opendir_pid" is the token when lookup/revalidate
 			 * -- I am the owner of dir statahead.
 			 */
-			pid_t			   d_opendir_pid;
-		} d;
-
-#define lli_readdir_mutex       u.d.d_readdir_mutex
-#define lli_opendir_key	 u.d.d_opendir_key
-#define lli_sai		 u.d.d_sai
-#define lli_sa_lock	     u.d.d_sa_lock
-#define lli_opendir_pid	 u.d.d_opendir_pid
+			pid_t				lli_opendir_pid;
+			/* stat will try to access statahead entries or start
+			 * statahead if this flag is set, and this flag will be
+			 * set upon dir open, and cleared when dir is closed,
+			 * statahead hit ratio is too low, or start statahead
+			 * thread failed.
+			 */
+			unsigned int			lli_sa_enabled:1;
+			/* generation for statahead */
+			unsigned int			lli_sa_generation;
+			/* directory stripe information */
+			struct lmv_stripe_md	       *lli_lsm_md;
+			/* default directory stripe offset.  This is extracted
+			 * from the "dmv" xattr in order to decide which MDT to
+			 * create a subdirectory on.  The MDS itself fetches
+			 * "dmv" and gets the rest of the default layout itself
+			 * (count, hash, etc).
+			 */
+			__u32				lli_def_stripe_offset;
+		};
 
 		/* for non-directory */
 		struct {
-			struct mutex			f_size_mutex;
-			char				*f_symlink_name;
-			__u64				f_maxbytes;
+			struct mutex			lli_size_mutex;
+			char			       *lli_symlink_name;
+			__u64				lli_maxbytes;
 			/*
 			 * struct rw_semaphore {
 			 *    signed long	count;     // align d.d_def_acl
@@ -192,16 +212,16 @@ struct ll_inode_info {
 			 *    struct list_head wait_list;
 			 * }
 			 */
-			struct rw_semaphore		f_trunc_sem;
-			struct mutex			f_write_mutex;
+			struct rw_semaphore		lli_trunc_sem;
+			struct range_lock_tree		lli_write_tree;
 
-			struct rw_semaphore		f_glimpse_sem;
-			unsigned long			f_glimpse_time;
-			struct list_head			f_agl_list;
-			__u64				f_agl_index;
+			struct rw_semaphore		lli_glimpse_sem;
+			unsigned long			lli_glimpse_time;
+			struct list_head		lli_agl_list;
+			__u64				lli_agl_index;
 
 			/* for writepage() only to communicate to fsync */
-			int				f_async_rc;
+			int				lli_async_rc;
 
 			/*
 			 * whenever a process try to read/write the file, the
@@ -211,22 +231,9 @@ struct ll_inode_info {
 			 * so the read/write statistics for jobid will not be
 			 * accurate if the file is shared by different jobs.
 			 */
-			char		     f_jobid[JOBSTATS_JOBID_SIZE];
-		} f;
-
-#define lli_size_mutex          u.f.f_size_mutex
-#define lli_symlink_name	u.f.f_symlink_name
-#define lli_maxbytes	    u.f.f_maxbytes
-#define lli_trunc_sem	   u.f.f_trunc_sem
-#define lli_write_mutex	 u.f.f_write_mutex
-#define lli_glimpse_sem		u.f.f_glimpse_sem
-#define lli_glimpse_time	u.f.f_glimpse_time
-#define lli_agl_list		u.f.f_agl_list
-#define lli_agl_index		u.f.f_agl_index
-#define lli_async_rc		u.f.f_async_rc
-#define lli_jobid		u.f.f_jobid
-
-	} u;
+			char				lli_jobid[LUSTRE_JOBID_SIZE];
+		};
+	};
 
 	/* XXX: For following frequent used members, although they maybe special
 	 *      used for non-directory object, it is some time-wasting to check
@@ -401,12 +408,13 @@ enum stats_track_type {
 #define LL_SBI_LAYOUT_LOCK    0x20000 /* layout lock support */
 #define LL_SBI_USER_FID2PATH  0x40000 /* allow fid2path by unprivileged users */
 #define LL_SBI_XATTR_CACHE    0x80000 /* support for xattr cache */
+#define LL_SBI_NOROOTSQUASH	0x100000 /* do not apply root squash */
 
 #define LL_SBI_FLAGS {	\
 	"nolck",	\
 	"checksum",	\
 	"flock",	\
-	"xattr",	\
+	"user_xattr",	\
 	"acl",		\
 	"???",		\
 	"???",		\
@@ -422,9 +430,27 @@ enum stats_track_type {
 	"verbose",	\
 	"layout",	\
 	"user_fid2path",\
-	"xattr",	\
+	"xattr_cache",	\
+	"norootsquash",	\
 }
 
+/*
+ * This is embedded into llite super-blocks to keep track of connect
+ * flags (capabilities) supported by all imports given mount is
+ * connected to.
+ */
+struct lustre_client_ocd {
+	/*
+	 * This is conjunction of connect_flags across all imports
+	 * (LOVs) this mount is connected to. This field is updated by
+	 * cl_ocd_update() under ->lco_lock.
+	 */
+	__u64			 lco_flags;
+	struct mutex		 lco_lock;
+	struct obd_export	*lco_md_exp;
+	struct obd_export	*lco_dt_exp;
+};
+
 struct ll_sb_info {
 	/* this protects pglist and ra_info.  It isn't safe to
 	 * grab from interrupt contexts
@@ -461,7 +487,7 @@ struct ll_sb_info {
 	unsigned int	      ll_namelen;
 	struct file_operations   *ll_fop;
 
-	unsigned int	      ll_md_brw_size; /* used by readdir */
+	unsigned int		  ll_md_brw_pages; /* readdir pages per RPC */
 
 	struct lu_site	   *ll_site;
 	struct cl_device	 *ll_cl;
@@ -484,11 +510,17 @@ struct ll_sb_info {
 	atomic_t		  ll_sa_wrong;   /* statahead thread stopped for
 						  * low hit ratio
 						  */
+	atomic_t		ll_sa_running;	/* running statahead thread
+						 * count
+						 */
 	atomic_t		  ll_agl_total;  /* AGL thread started count */
 
 	dev_t			  ll_sdev_orig; /* save s_dev before assign for
 						 * clustered nfs
 						 */
+	/* root squash */
+	struct root_squash_info	  ll_squash;
+
 	__kernel_fsid_t		  ll_fsid;
 	struct kobject		 ll_kobj; /* sysfs object */
 	struct super_block	*ll_sb; /* struct super_block (for sysfs code)*/
@@ -643,25 +675,66 @@ void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
 		       struct ll_file_data *file, loff_t pos,
 		       size_t count, int rw);
 
+enum {
+	LPROC_LL_DIRTY_HITS,
+	LPROC_LL_DIRTY_MISSES,
+	LPROC_LL_READ_BYTES,
+	LPROC_LL_WRITE_BYTES,
+	LPROC_LL_BRW_READ,
+	LPROC_LL_BRW_WRITE,
+	LPROC_LL_OSC_READ,
+	LPROC_LL_OSC_WRITE,
+	LPROC_LL_IOCTL,
+	LPROC_LL_OPEN,
+	LPROC_LL_RELEASE,
+	LPROC_LL_MAP,
+	LPROC_LL_LLSEEK,
+	LPROC_LL_FSYNC,
+	LPROC_LL_READDIR,
+	LPROC_LL_SETATTR,
+	LPROC_LL_TRUNC,
+	LPROC_LL_FLOCK,
+	LPROC_LL_GETATTR,
+	LPROC_LL_CREATE,
+	LPROC_LL_LINK,
+	LPROC_LL_UNLINK,
+	LPROC_LL_SYMLINK,
+	LPROC_LL_MKDIR,
+	LPROC_LL_RMDIR,
+	LPROC_LL_MKNOD,
+	LPROC_LL_RENAME,
+	LPROC_LL_STAFS,
+	LPROC_LL_ALLOC_INODE,
+	LPROC_LL_SETXATTR,
+	LPROC_LL_GETXATTR,
+	LPROC_LL_GETXATTR_HITS,
+	LPROC_LL_LISTXATTR,
+	LPROC_LL_REMOVEXATTR,
+	LPROC_LL_INODE_PERM,
+	LPROC_LL_FILE_OPCODES
+};
+
 /* llite/dir.c */
-void ll_release_page(struct page *page, int remove);
 extern const struct file_operations ll_dir_operations;
 extern const struct inode_operations ll_dir_inode_operations;
-struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
-			     struct ll_dir_chain *chain);
-int ll_dir_read(struct inode *inode, struct dir_context *ctx);
-
+int ll_dir_read(struct inode *inode, __u64 *ppos, struct md_op_data *op_data,
+		struct dir_context *ctx);
 int ll_get_mdt_idx(struct inode *inode);
+int ll_get_mdt_idx_by_fid(struct ll_sb_info *sbi, const struct lu_fid *fid);
+struct page *ll_get_dir_page(struct inode *dir, struct md_op_data *op_data,
+			     __u64 offset);
+void ll_release_page(struct inode *inode, struct page *page, bool remove);
+
 /* llite/namei.c */
 extern const struct inode_operations ll_special_inode_operations;
 
-int ll_objects_destroy(struct ptlrpc_request *request,
-		       struct inode *dir);
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
 		      struct lustre_md *lic);
+int ll_test_inode_by_fid(struct inode *inode, void *opaque);
 int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
 		       void *data, int flag);
 struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
+void ll_update_times(struct ptlrpc_request *request, struct inode *inode);
 
 /* llite/rw.c */
 int ll_writepage(struct page *page, struct writeback_control *wbc);
@@ -704,7 +777,10 @@ void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
 			  struct lustre_handle *fh);
 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
-
+int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
+	       const char *name, int namelen);
+int ll_get_fid_by_name(struct inode *parent, const char *name,
+		       int namelen, struct lu_fid *fid);
 int ll_inode_permission(struct inode *inode, int mask);
 
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
@@ -715,8 +791,8 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 			     struct ptlrpc_request **request);
 int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
 		     int set_default);
-int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
-		     int *lmm_size, struct ptlrpc_request **request);
+int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size,
+		     struct ptlrpc_request **request, u64 valid);
 int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
 int ll_merge_attr(const struct lu_env *env, struct inode *inode);
 int ll_fid2path(struct inode *inode, void __user *arg);
@@ -748,8 +824,8 @@ int ll_setattr(struct dentry *de, struct iattr *attr);
 int ll_statfs(struct dentry *de, struct kstatfs *sfs);
 int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
 		       __u64 max_age, __u32 flags);
-void ll_update_inode(struct inode *inode, struct lustre_md *md);
-void ll_read_inode2(struct inode *inode, void *opaque);
+int ll_update_inode(struct inode *inode, struct lustre_md *md);
+int ll_read_inode2(struct inode *inode, void *opaque);
 void ll_delete_inode(struct inode *inode);
 int ll_iocontrol(struct inode *inode, struct file *file,
 		 unsigned int cmd, unsigned long arg);
@@ -763,15 +839,46 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 int ll_obd_statfs(struct inode *inode, void __user *arg);
 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
 int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
+int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize);
 int ll_process_config(struct lustre_cfg *lcfg);
+
+enum {
+	LUSTRE_OPC_MKDIR	= 0,
+	LUSTRE_OPC_SYMLINK	= 1,
+	LUSTRE_OPC_MKNOD	= 2,
+	LUSTRE_OPC_CREATE	= 3,
+	LUSTRE_OPC_ANY		= 5,
+};
+
 struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
 				      struct inode *i1, struct inode *i2,
-				      const char *name, int namelen,
-				      int mode, __u32 opc, void *data);
+				      const char *name, size_t namelen,
+				      u32 mode, __u32 opc, void *data);
 void ll_finish_md_op_data(struct md_op_data *op_data);
 int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
 char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
+void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
 void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+			struct lov_user_md **kbuf);
+
+/* Compute expected user md size when passing in a md from user space */
+static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
+{
+	switch (lum->lmm_magic) {
+	case LOV_USER_MAGIC_V1:
+		return sizeof(struct lov_user_md_v1);
+	case LOV_USER_MAGIC_V3:
+		return sizeof(struct lov_user_md_v3);
+	case LOV_USER_MAGIC_SPECIFIC:
+		if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
+			return -EINVAL;
+
+		return lov_user_md_size(lum->lmm_stripe_count,
+					LOV_USER_MAGIC_SPECIFIC);
+	}
+	return -EINVAL;
+}
 
 /* llite/llite_nfs.c */
 extern const struct export_operations lustre_export_operations;
@@ -779,6 +886,7 @@ __u32 get_uuid2int(const char *name, int len);
 void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid);
 struct inode *search_inode_for_lustre(struct super_block *sb,
 				      const struct lu_fid *fid);
+int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid);
 
 /* llite/symlink.c */
 extern const struct inode_operations ll_fast_symlink_inode_operations;
@@ -933,12 +1041,19 @@ static inline __u64 ll_file_maxbytes(struct inode *inode)
 }
 
 /* llite/xattr.c */
-int ll_setxattr(struct dentry *dentry, struct inode *inode,
-		const char *name, const void *value, size_t size, int flags);
-ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
-		    const char *name, void *buffer, size_t size);
+extern const struct xattr_handler *ll_xattr_handlers[];
+
+#define XATTR_USER_T		1
+#define XATTR_TRUSTED_T		2
+#define XATTR_SECURITY_T	3
+#define XATTR_ACL_ACCESS_T	4
+#define XATTR_ACL_DEFAULT_T	5
+#define XATTR_LUSTRE_T		6
+#define XATTR_OTHER_T		7
+
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int ll_removexattr(struct dentry *dentry, const char *name);
+int ll_xattr_list(struct inode *inode, const char *name, int type,
+		  void *buffer, size_t size, __u64 valid);
 
 /**
  * Common IO arguments for various VFS I/O interfaces.
@@ -964,11 +1079,10 @@ void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
 
 /* per inode struct, for dir only */
 struct ll_statahead_info {
-	struct inode	   *sai_inode;
+	struct dentry	   *sai_dentry;
 	atomic_t	    sai_refcount;   /* when access this struct, hold
 					     * refcount
 					     */
-	unsigned int	    sai_generation; /* generation for statahead */
 	unsigned int	    sai_max;	/* max ahead of lookup */
 	__u64		   sai_sent;       /* stat requests sent count */
 	__u64		   sai_replied;    /* stat requests which received
@@ -995,22 +1109,25 @@ struct ll_statahead_info {
 	unsigned int	    sai_ls_all:1,   /* "ls -al", do stat-ahead for
 					     * hidden entries
 					     */
-				sai_agl_valid:1;/* AGL is valid for the dir */
+				sai_agl_valid:1,/* AGL is valid for the dir */
+				sai_in_readpage:1;/* statahead is in readdir() */
 	wait_queue_head_t	sai_waitq;      /* stat-ahead wait queue */
 	struct ptlrpc_thread    sai_thread;     /* stat-ahead thread */
 	struct ptlrpc_thread    sai_agl_thread; /* AGL thread */
-	struct list_head	sai_entries;    /* entry list */
-	struct list_head	sai_entries_received; /* entries returned */
-	struct list_head	sai_entries_stated;   /* entries stated */
-	struct list_head	sai_entries_agl; /* AGL entries to be sent */
+	struct list_head	sai_interim_entries; /* entries which got async
+						      * stat reply, but not
+						      * instantiated
+						      */
+	struct list_head	sai_entries;	/* completed entries */
+	struct list_head	sai_agls;	/* AGLs to be sent */
 	struct list_head	sai_cache[LL_SA_CACHE_SIZE];
 	spinlock_t		sai_cache_lock[LL_SA_CACHE_SIZE];
 	atomic_t		sai_cache_count; /* entry count in cache */
 };
 
-int do_statahead_enter(struct inode *dir, struct dentry **dentry,
-		       int only_unplug);
-void ll_stop_statahead(struct inode *dir, void *key);
+int ll_statahead(struct inode *dir, struct dentry **dentry, bool unplug);
+void ll_authorize_statahead(struct inode *dir, void *key);
+void ll_deauthorize_statahead(struct inode *dir, void *key);
 
 blkcnt_t dirty_cnt(struct inode *inode);
 
@@ -1040,73 +1157,53 @@ static inline int ll_glimpse_size(struct inode *inode)
 	return rc;
 }
 
-static inline void
-ll_statahead_mark(struct inode *dir, struct dentry *dentry)
-{
-	struct ll_inode_info     *lli = ll_i2info(dir);
-	struct ll_statahead_info *sai = lli->lli_sai;
-	struct ll_dentry_data    *ldd = ll_d2d(dentry);
-
-	/* not the same process, don't mark */
-	if (lli->lli_opendir_pid != current_pid())
-		return;
-
-	LASSERT(ldd);
-	if (sai)
-		ldd->lld_sa_generation = sai->sai_generation;
-}
-
-static inline int
-d_need_statahead(struct inode *dir, struct dentry *dentryp)
+/*
+ * dentry may statahead when statahead is enabled and current process has opened
+ * parent directory, and this dentry hasn't accessed statahead cache before
+ */
+static inline bool
+dentry_may_statahead(struct inode *dir, struct dentry *dentry)
 {
 	struct ll_inode_info  *lli;
 	struct ll_dentry_data *ldd;
 
 	if (ll_i2sbi(dir)->ll_sa_max == 0)
-		return -EAGAIN;
+		return false;
 
 	lli = ll_i2info(dir);
+
+	/*
+	 * statahead is not allowed for this dir, there may be three causes:
+	 * 1. dir is not opened.
+	 * 2. statahead hit ratio is too low.
+	 * 3. previous stat started statahead thread failed.
+	 */
+	if (!lli->lli_sa_enabled)
+		return false;
+
 	/* not the same process, don't statahead */
 	if (lli->lli_opendir_pid != current_pid())
-		return -EAGAIN;
-
-	/* statahead has been stopped */
-	if (!lli->lli_opendir_key)
-		return -EAGAIN;
+		return false;
 
-	ldd = ll_d2d(dentryp);
 	/*
-	 * When stats a dentry, the system trigger more than once "revalidate"
-	 * or "lookup", for "getattr", for "getxattr", and maybe for others.
-	 * Under patchless client mode, the operation intent is not accurate,
-	 * which maybe misguide the statahead thread. For example:
-	 * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
-	 * have the same operation intent -- "IT_GETATTR".
-	 * In fact, one dentry should has only one chance to interact with the
-	 * statahead thread, otherwise the statahead windows will be confused.
+	 * When stating a dentry, kernel may trigger 'revalidate' or 'lookup'
+	 * multiple times, eg. for 'getattr', 'getxattr' and etc.
+	 * For patchless client, lookup intent is not accurate, which may
+	 * misguide statahead. For example:
+	 * The 'revalidate' call for 'getattr' and 'getxattr' of a dentry will
+	 * have the same intent -- IT_GETATTR, while one dentry should access
+	 * statahead cache once, otherwise statahead windows is messed up.
 	 * The solution is as following:
-	 * Assign "lld_sa_generation" with "sai_generation" when a dentry
-	 * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
-	 * will bypass interacting with statahead thread for checking:
-	 * "lld_sa_generation == lli_sai->sai_generation"
+	 * Assign 'lld_sa_generation' with 'lli_sa_generation' when a dentry
+	 * IT_GETATTR for the first time, and subsequent IT_GETATTR will
+	 * bypass interacting with statahead cache by checking
+	 * 'lld_sa_generation == lli->lli_sa_generation'.
 	 */
-	if (ldd && lli->lli_sai &&
-	    ldd->lld_sa_generation == lli->lli_sai->sai_generation)
-		return -EAGAIN;
+	ldd = ll_d2d(dentry);
+	if (ldd && ldd->lld_sa_generation == lli->lli_sa_generation)
+		return false;
 
-	return 1;
-}
-
-static inline int
-ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
-{
-	int ret;
-
-	ret = d_need_statahead(dir, *dentryp);
-	if (ret <= 0)
-		return ret;
-
-	return do_statahead_enter(dir, dentryp, only_unplug);
+	return true;
 }
 
 /* llite ioctl register support routine */
@@ -1213,7 +1310,7 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 			CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for remote lock %#llx\n",
 			       PFID(ll_inode2fid(inode)), inode,
 			       handle.cookie);
-			md_set_lock_data(exp, &handle.cookie, inode, NULL);
+			md_set_lock_data(exp, &handle, inode, NULL);
 		}
 
 		handle.cookie = it->it_lock_handle;
@@ -1221,8 +1318,7 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
 		CDEBUG(D_DLMTRACE, "setting l_data to inode "DFID"%p for lock %#llx\n",
 		       PFID(ll_inode2fid(inode)), inode, handle.cookie);
 
-		md_set_lock_data(exp, &handle.cookie, inode,
-				 &it->it_lock_bits);
+		md_set_lock_data(exp, &handle, inode, &it->it_lock_bits);
 		it->it_lock_set = 1;
 	}
 
@@ -1295,6 +1391,8 @@ void ll_xattr_fini(void);
 int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
 		    struct cl_page *page, enum cl_req_type crt);
 
+int ll_getparent(struct file *file, struct getparent __user *arg);
+
 /* lcommon_cl.c */
 int cl_setattr_ost(struct inode *inode, const struct iattr *attr);
 
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 546063e728db..6bb41b09172e 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -41,7 +41,7 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 
-#include "../include/lustre_lite.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_ha.h"
 #include "../include/lustre_dlm.h"
 #include "../include/lprocfs_status.h"
@@ -115,9 +115,16 @@ static struct ll_sb_info *ll_init_sbi(struct super_block *sb)
 	sbi->ll_sa_max = LL_SA_RPC_DEF;
 	atomic_set(&sbi->ll_sa_total, 0);
 	atomic_set(&sbi->ll_sa_wrong, 0);
+	atomic_set(&sbi->ll_sa_running, 0);
 	atomic_set(&sbi->ll_agl_total, 0);
 	sbi->ll_flags |= LL_SBI_AGL_ENABLED;
 
+	/* root squash */
+	sbi->ll_squash.rsi_uid = 0;
+	sbi->ll_squash.rsi_gid = 0;
+	INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
+	init_rwsem(&sbi->ll_squash.rsi_sem);
+
 	sbi->ll_sb = sb;
 
 	return sbi;
@@ -128,6 +135,8 @@ static void ll_free_sbi(struct super_block *sb)
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 
 	if (sbi->ll_cache) {
+		if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
+			cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
 		cl_cache_decref(sbi->ll_cache);
 		sbi->ll_cache = NULL;
 	}
@@ -180,7 +189,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 				  OBD_CONNECT_PINGLESS |
 				  OBD_CONNECT_MAX_EASIZE |
 				  OBD_CONNECT_FLOCK_DEAD |
-				  OBD_CONNECT_DISP_STRIPE;
+				  OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
+				  OBD_CONNECT_OPEN_BY_FID |
+				  OBD_CONNECT_DIR_STRIPE;
 
 	if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
 		data->ocd_connect_flags |= OBD_CONNECT_SOM;
@@ -310,9 +321,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 		sbi->ll_flags |= LL_SBI_64BIT_HASH;
 
 	if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
-		sbi->ll_md_brw_size = data->ocd_brw_size;
+		sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT;
 	else
-		sbi->ll_md_brw_size = PAGE_SIZE;
+		sbi->ll_md_brw_pages = 1;
 
 	if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
 		sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
@@ -418,6 +429,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
 
 	sb->s_op = &lustre_super_operations;
+	sb->s_xattr = ll_xattr_handlers;
 #if THREAD_SIZE >= 8192 /*b=17630*/
 	sb->s_export_op = &lustre_export_operations;
 #endif
@@ -462,7 +474,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	md_free_lustre_md(sbi->ll_md_exp, &lmd);
 	ptlrpc_req_finished(request);
 
-	if (!(root)) {
+	if (IS_ERR(root)) {
 		if (lmd.lsm)
 			obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
 #ifdef CONFIG_FS_POSIX_ACL
@@ -486,11 +498,21 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
 				 KEY_CHECKSUM, sizeof(checksum), &checksum,
 				 NULL);
+	if (err) {
+		CERROR("%s: Set checksum failed: rc = %d\n",
+		       sbi->ll_dt_exp->exp_obd->obd_name, err);
+		goto out_root;
+	}
 	cl_sb_init(sb);
 
 	err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
 				 KEY_CACHE_SET, sizeof(*sbi->ll_cache),
 				 sbi->ll_cache, NULL);
+	if (err) {
+		CERROR("%s: Set cache_set failed: rc = %d\n",
+		       sbi->ll_dt_exp->exp_obd->obd_name, err);
+		goto out_root;
+	}
 
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
@@ -560,6 +582,17 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 	return rc;
 }
 
+/**
+ * Get the value of the default_easize parameter.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in]  sbi	superblock info for this filesystem
+ * \param[out] lmmsize	pointer to storage location for value
+ *
+ * \retval 0		on success
+ * \retval negative	negated errno on failure
+ */
 int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 {
 	int size, rc;
@@ -573,6 +606,29 @@ int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 	return rc;
 }
 
+/**
+ * Set the default_easize parameter to the given value.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] sbi	superblock info for this filesystem
+ * \param[in] lmmsize	the size to set
+ *
+ * \retval 0		on success
+ * \retval negative	negated errno on failure
+ */
+int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
+{
+	if (lmmsize < sizeof(struct lov_mds_md) ||
+	    lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
+		return -EINVAL;
+
+	return obd_set_info_async(NULL, sbi->ll_md_exp,
+				  sizeof(KEY_DEFAULT_EASIZE),
+				  KEY_DEFAULT_EASIZE,
+				  sizeof(int), &lmmsize, NULL);
+}
+
 static void client_common_put_super(struct super_block *sb)
 {
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
@@ -608,6 +664,12 @@ void ll_kill_super(struct super_block *sb)
 	if (sbi) {
 		sb->s_dev = sbi->ll_sdev_orig;
 		sbi->ll_umounting = 1;
+
+		/* wait running statahead threads to quit */
+		while (atomic_read(&sbi->ll_sa_running) > 0) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
+		}
 	}
 }
 
@@ -647,7 +709,8 @@ static int ll_options(char *options, int *flags)
 			*flags |= tmp;
 			goto next;
 		}
-		tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
+		tmp = ll_set_opt("noflock", s1,
+				 LL_SBI_FLOCK | LL_SBI_LOCALFLOCK);
 		if (tmp) {
 			*flags &= ~tmp;
 			goto next;
@@ -772,11 +835,13 @@ void ll_lli_init(struct ll_inode_info *lli)
 		lli->lli_sai = NULL;
 		spin_lock_init(&lli->lli_sa_lock);
 		lli->lli_opendir_pid = 0;
+		lli->lli_sa_enabled = 0;
+		lli->lli_def_stripe_offset = -1;
 	} else {
 		mutex_init(&lli->lli_size_mutex);
 		lli->lli_symlink_name = NULL;
 		init_rwsem(&lli->lli_trunc_sem);
-		mutex_init(&lli->lli_write_mutex);
+		range_lock_tree_init(&lli->lli_write_tree);
 		init_rwsem(&lli->lli_glimpse_sem);
 		lli->lli_glimpse_time = 0;
 		INIT_LIST_HEAD(&lli->lli_agl_list);
@@ -896,7 +961,8 @@ void ll_put_super(struct super_block *sb)
 	struct lustre_sb_info *lsi = s2lsi(sb);
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	char *profilenm = get_profile_name(sb);
-	int ccc_count, next, force = 1, rc = 0;
+	int next, force = 1, rc = 0;
+	long ccc_count;
 
 	CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
 
@@ -917,13 +983,13 @@ void ll_put_super(struct super_block *sb)
 		struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
 
 		rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
-				  !atomic_read(&sbi->ll_cache->ccc_unstable_nr),
+				  !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr),
 				  &lwi);
 	}
 
-	ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr);
+	ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
 	if (!force && rc != -EINTR)
-		LASSERTF(!ccc_count, "count: %i\n", ccc_count);
+		LASSERTF(!ccc_count, "count: %li\n", ccc_count);
 
 	/* We need to set force before the lov_disconnect in
 	 * lustre_common_put_super, since l_d cleans up osc's as well.
@@ -991,6 +1057,206 @@ struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
 	return inode;
 }
 
+static void ll_dir_clear_lsm_md(struct inode *inode)
+{
+	struct ll_inode_info *lli = ll_i2info(inode);
+
+	LASSERT(S_ISDIR(inode->i_mode));
+
+	if (lli->lli_lsm_md) {
+		lmv_free_memmd(lli->lli_lsm_md);
+		lli->lli_lsm_md = NULL;
+	}
+}
+
+static struct inode *ll_iget_anon_dir(struct super_block *sb,
+				      const struct lu_fid *fid,
+				      struct lustre_md *md)
+{
+	struct ll_sb_info *sbi = ll_s2sbi(sb);
+	struct mdt_body *body = md->body;
+	struct inode *inode;
+	ino_t ino;
+
+	ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
+	inode = iget_locked(sb, ino);
+	if (!inode) {
+		CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n",
+		       ll_get_fsname(sb, NULL, 0), PFID(fid));
+		return ERR_PTR(-ENOENT);
+	}
+
+	if (inode->i_state & I_NEW) {
+		struct ll_inode_info *lli = ll_i2info(inode);
+		struct lmv_stripe_md *lsm = md->lmv;
+
+		inode->i_mode = (inode->i_mode & ~S_IFMT) |
+				(body->mbo_mode & S_IFMT);
+		LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n",
+			 PFID(fid));
+
+		LTIME_S(inode->i_mtime) = 0;
+		LTIME_S(inode->i_atime) = 0;
+		LTIME_S(inode->i_ctime) = 0;
+		inode->i_rdev = 0;
+
+		inode->i_op = &ll_dir_inode_operations;
+		inode->i_fop = &ll_dir_operations;
+		lli->lli_fid = *fid;
+		ll_lli_init(lli);
+
+		LASSERT(lsm);
+		/* master object FID */
+		lli->lli_pfid = body->mbo_fid1;
+		CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n",
+		       lli, PFID(fid), PFID(&lli->lli_pfid));
+		unlock_new_inode(inode);
+	}
+
+	return inode;
+}
+
+static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
+{
+	struct lmv_stripe_md *lsm = md->lmv;
+	struct lu_fid *fid;
+	int i;
+
+	LASSERT(lsm);
+	/*
+	 * XXX sigh, this lsm_root initialization should be in
+	 * LMV layer, but it needs ll_iget right now, so we
+	 * put this here right now.
+	 */
+	for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+		fid = &lsm->lsm_md_oinfo[i].lmo_fid;
+		LASSERT(!lsm->lsm_md_oinfo[i].lmo_root);
+		/* Unfortunately ll_iget will call ll_update_inode,
+		 * where the initialization of slave inode is slightly
+		 * different, so it reset lsm_md to NULL to avoid
+		 * initializing lsm for slave inode.
+		 */
+		/* For migrating inode, master stripe and master object will
+		 * be same, so we only need assign this inode
+		 */
+		if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i)
+			lsm->lsm_md_oinfo[i].lmo_root = inode;
+		else
+			lsm->lsm_md_oinfo[i].lmo_root =
+				ll_iget_anon_dir(inode->i_sb, fid, md);
+		if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
+			int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
+
+			lsm->lsm_md_oinfo[i].lmo_root = NULL;
+			return rc;
+		}
+	}
+
+	return 0;
+}
+
+static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
+				const struct lmv_stripe_md *lsm_md2)
+{
+	return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic &&
+	       lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count &&
+	       lsm_md1->lsm_md_master_mdt_index ==
+			lsm_md2->lsm_md_master_mdt_index &&
+	       lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type &&
+	       lsm_md1->lsm_md_layout_version ==
+			lsm_md2->lsm_md_layout_version &&
+	       !strcmp(lsm_md1->lsm_md_pool_name,
+		       lsm_md2->lsm_md_pool_name);
+}
+
+static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
+{
+	struct ll_inode_info *lli = ll_i2info(inode);
+	struct lmv_stripe_md *lsm = md->lmv;
+	int rc;
+
+	LASSERT(S_ISDIR(inode->i_mode));
+	CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md,
+	       PFID(ll_inode2fid(inode)));
+
+	/* no striped information from request. */
+	if (!lsm) {
+		if (!lli->lli_lsm_md) {
+			return 0;
+		} else if (lli->lli_lsm_md->lsm_md_hash_type &
+			   LMV_HASH_FLAG_MIGRATION) {
+			/*
+			 * migration is done, the temporay MIGRATE layout has
+			 * been removed
+			 */
+			CDEBUG(D_INODE, DFID" finish migration.\n",
+			       PFID(ll_inode2fid(inode)));
+			lmv_free_memmd(lli->lli_lsm_md);
+			lli->lli_lsm_md = NULL;
+			return 0;
+		} else {
+			/*
+			 * The lustre_md from req does not include stripeEA,
+			 * see ll_md_setattr
+			 */
+			return 0;
+		}
+	}
+
+	/* set the directory layout */
+	if (!lli->lli_lsm_md) {
+		rc = ll_init_lsm_md(inode, md);
+		if (rc)
+			return rc;
+
+		lli->lli_lsm_md = lsm;
+		/*
+		 * set lsm_md to NULL, so the following free lustre_md
+		 * will not free this lsm
+		 */
+		md->lmv = NULL;
+		CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm,
+		       lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
+		return 0;
+	}
+
+	/* Compare the old and new stripe information */
+	if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
+		struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
+		int idx;
+
+		CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
+		       ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
+		       inode, lsm, old_lsm,
+		       lsm->lsm_md_magic, old_lsm->lsm_md_magic,
+		       lsm->lsm_md_stripe_count,
+		       old_lsm->lsm_md_stripe_count,
+		       lsm->lsm_md_master_mdt_index,
+		       old_lsm->lsm_md_master_mdt_index,
+		       lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
+		       lsm->lsm_md_layout_version,
+		       old_lsm->lsm_md_layout_version,
+		       lsm->lsm_md_pool_name,
+		       old_lsm->lsm_md_pool_name);
+
+		for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
+			CERROR("%s: sub FIDs in old lsm idx %d, old: "DFID"\n",
+			       ll_get_fsname(inode->i_sb, NULL, 0), idx,
+			       PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
+		}
+
+		for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
+			CERROR("%s: sub FIDs in new lsm idx %d, new: "DFID"\n",
+			       ll_get_fsname(inode->i_sb, NULL, 0), idx,
+			       PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
+		}
+
+		return -EIO;
+	}
+
+	return 0;
+}
+
 void ll_clear_inode(struct inode *inode)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
@@ -1031,14 +1297,15 @@ void ll_clear_inode(struct inode *inode)
 
 #ifdef CONFIG_FS_POSIX_ACL
 	if (lli->lli_posix_acl) {
-		LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
 		posix_acl_release(lli->lli_posix_acl);
 		lli->lli_posix_acl = NULL;
 	}
 #endif
 	lli->lli_inode_magic = LLI_INODE_DEAD;
 
-	if (!S_ISDIR(inode->i_mode))
+	if (S_ISDIR(inode->i_mode))
+		ll_dir_clear_lsm_md(inode);
+	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
 		LASSERT(list_empty(&lli->lli_agl_list));
 
 	/*
@@ -1103,10 +1370,10 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
 	op_data->op_attr.ia_valid = ia_valid;
 
 	/* Extract epoch data if obtained. */
-	op_data->op_handle = md.body->handle;
-	op_data->op_ioepoch = md.body->ioepoch;
+	op_data->op_handle = md.body->mbo_handle;
+	op_data->op_ioepoch = md.body->mbo_ioepoch;
 
-	ll_update_inode(inode, &md);
+	rc = ll_update_inode(inode, &md);
 	ptlrpc_req_finished(request);
 
 	return rc;
@@ -1138,8 +1405,8 @@ static int ll_setattr_done_writing(struct inode *inode,
 		rc = ll_som_update(inode, op_data);
 	else if (rc) {
 		CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
-		      ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
-		      PFID(ll_inode2fid(inode)), rc);
+		       ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
+		       PFID(ll_inode2fid(inode)), rc);
 	}
 	return rc;
 }
@@ -1331,14 +1598,14 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
 {
 	int mode = d_inode(de)->i_mode;
 
-	if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
-			      (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
+	if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) ==
+			      (ATTR_CTIME | ATTR_SIZE | ATTR_MODE))
 		attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
 
-	if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
-			       (ATTR_SIZE|ATTR_MODE)) &&
+	if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
+			       (ATTR_SIZE | ATTR_MODE)) &&
 	    (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
-	     (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
+	     (((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
 	      !(attr->ia_mode & S_ISGID))))
 		attr->ia_valid |= ATTR_FORCE;
 
@@ -1349,7 +1616,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
 		attr->ia_valid |= ATTR_KILL_SUID;
 
 	if ((attr->ia_valid & ATTR_MODE) &&
-	    ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
+	    ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
 	    !(attr->ia_mode & S_ISGID) &&
 	    !(attr->ia_valid & ATTR_KILL_SGID))
 		attr->ia_valid |= ATTR_KILL_SGID;
@@ -1465,14 +1732,14 @@ void ll_inode_size_unlock(struct inode *inode)
 	mutex_unlock(&lli->lli_size_mutex);
 }
 
-void ll_update_inode(struct inode *inode, struct lustre_md *md)
+int ll_update_inode(struct inode *inode, struct lustre_md *md)
 {
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct mdt_body *body = md->body;
 	struct lov_stripe_md *lsm = md->lsm;
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 
-	LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
+	LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0));
 	if (lsm) {
 		if (!lli->lli_has_smd &&
 		    !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
@@ -1483,8 +1750,16 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 			lli->lli_maxbytes = MAX_LFS_FILESIZE;
 	}
 
+	if (S_ISDIR(inode->i_mode)) {
+		int rc;
+
+		rc = ll_update_lsm_md(inode, md);
+		if (rc)
+			return rc;
+	}
+
 #ifdef CONFIG_FS_POSIX_ACL
-	if (body->valid & OBD_MD_FLACL) {
+	if (body->mbo_valid & OBD_MD_FLACL) {
 		spin_lock(&lli->lli_lock);
 		if (lli->lli_posix_acl)
 			posix_acl_release(lli->lli_posix_acl);
@@ -1492,65 +1767,67 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 		spin_unlock(&lli->lli_lock);
 	}
 #endif
-	inode->i_ino = cl_fid_build_ino(&body->fid1,
+	inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
 					sbi->ll_flags & LL_SBI_32BIT_API);
-	inode->i_generation = cl_fid_build_gen(&body->fid1);
+	inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
 
-	if (body->valid & OBD_MD_FLATIME) {
-		if (body->atime > LTIME_S(inode->i_atime))
-			LTIME_S(inode->i_atime) = body->atime;
-		lli->lli_atime = body->atime;
+	if (body->mbo_valid & OBD_MD_FLATIME) {
+		if (body->mbo_atime > LTIME_S(inode->i_atime))
+			LTIME_S(inode->i_atime) = body->mbo_atime;
+		lli->lli_atime = body->mbo_atime;
 	}
-	if (body->valid & OBD_MD_FLMTIME) {
-		if (body->mtime > LTIME_S(inode->i_mtime)) {
+	if (body->mbo_valid & OBD_MD_FLMTIME) {
+		if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
 			CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
 			       inode->i_ino, LTIME_S(inode->i_mtime),
-			       body->mtime);
-			LTIME_S(inode->i_mtime) = body->mtime;
+			       body->mbo_mtime);
+			LTIME_S(inode->i_mtime) = body->mbo_mtime;
 		}
-		lli->lli_mtime = body->mtime;
-	}
-	if (body->valid & OBD_MD_FLCTIME) {
-		if (body->ctime > LTIME_S(inode->i_ctime))
-			LTIME_S(inode->i_ctime) = body->ctime;
-		lli->lli_ctime = body->ctime;
-	}
-	if (body->valid & OBD_MD_FLMODE)
-		inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
-	if (body->valid & OBD_MD_FLTYPE)
-		inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
+		lli->lli_mtime = body->mbo_mtime;
+	}
+	if (body->mbo_valid & OBD_MD_FLCTIME) {
+		if (body->mbo_ctime > LTIME_S(inode->i_ctime))
+			LTIME_S(inode->i_ctime) = body->mbo_ctime;
+		lli->lli_ctime = body->mbo_ctime;
+	}
+	if (body->mbo_valid & OBD_MD_FLMODE)
+		inode->i_mode = (inode->i_mode & S_IFMT) |
+				(body->mbo_mode & ~S_IFMT);
+	if (body->mbo_valid & OBD_MD_FLTYPE)
+		inode->i_mode = (inode->i_mode & ~S_IFMT) |
+				(body->mbo_mode & S_IFMT);
 	LASSERT(inode->i_mode != 0);
 	if (S_ISREG(inode->i_mode))
 		inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
 				       LL_MAX_BLKSIZE_BITS);
 	else
 		inode->i_blkbits = inode->i_sb->s_blocksize_bits;
-	if (body->valid & OBD_MD_FLUID)
-		inode->i_uid = make_kuid(&init_user_ns, body->uid);
-	if (body->valid & OBD_MD_FLGID)
-		inode->i_gid = make_kgid(&init_user_ns, body->gid);
-	if (body->valid & OBD_MD_FLFLAGS)
-		inode->i_flags = ll_ext_to_inode_flags(body->flags);
-	if (body->valid & OBD_MD_FLNLINK)
-		set_nlink(inode, body->nlink);
-	if (body->valid & OBD_MD_FLRDEV)
-		inode->i_rdev = old_decode_dev(body->rdev);
-
-	if (body->valid & OBD_MD_FLID) {
+	if (body->mbo_valid & OBD_MD_FLUID)
+		inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
+	if (body->mbo_valid & OBD_MD_FLGID)
+		inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
+	if (body->mbo_valid & OBD_MD_FLFLAGS)
+		inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
+	if (body->mbo_valid & OBD_MD_FLNLINK)
+		set_nlink(inode, body->mbo_nlink);
+	if (body->mbo_valid & OBD_MD_FLRDEV)
+		inode->i_rdev = old_decode_dev(body->mbo_rdev);
+
+	if (body->mbo_valid & OBD_MD_FLID) {
 		/* FID shouldn't be changed! */
 		if (fid_is_sane(&lli->lli_fid)) {
-			LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
+			LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
 				 "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
-				 PFID(&lli->lli_fid), PFID(&body->fid1),
+				 PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
 				 PFID(ll_inode2fid(inode)), inode);
 		} else {
-			lli->lli_fid = body->fid1;
+			lli->lli_fid = body->mbo_fid1;
 		}
 	}
 
 	LASSERT(fid_seq(&lli->lli_fid) != 0);
 
-	if (body->valid & OBD_MD_FLSIZE) {
+	if (body->mbo_valid & OBD_MD_FLSIZE) {
 		if (exp_connect_som(ll_i2mdexp(inode)) &&
 		    S_ISREG(inode->i_mode)) {
 			struct lustre_handle lockh;
@@ -1577,7 +1854,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 					/* Use old size assignment to avoid
 					 * deadlock bz14138 & bz14326
 					 */
-					i_size_write(inode, body->size);
+					i_size_write(inode, body->mbo_size);
 					spin_lock(&lli->lli_lock);
 					lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
 					spin_unlock(&lli->lli_lock);
@@ -1588,26 +1865,29 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 			/* Use old size assignment to avoid
 			 * deadlock bz14138 & bz14326
 			 */
-			i_size_write(inode, body->size);
+			i_size_write(inode, body->mbo_size);
 
 			CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n",
-			       inode->i_ino, (unsigned long long)body->size);
+			       inode->i_ino, (unsigned long long)body->mbo_size);
 		}
 
-		if (body->valid & OBD_MD_FLBLOCKS)
-			inode->i_blocks = body->blocks;
+		if (body->mbo_valid & OBD_MD_FLBLOCKS)
+			inode->i_blocks = body->mbo_blocks;
 	}
 
-	if (body->valid & OBD_MD_TSTATE) {
-		if (body->t_state & MS_RESTORE)
+	if (body->mbo_valid & OBD_MD_TSTATE) {
+		if (body->mbo_t_state & MS_RESTORE)
 			lli->lli_flags |= LLIF_FILE_RESTORING;
 	}
+
+	return 0;
 }
 
-void ll_read_inode2(struct inode *inode, void *opaque)
+int ll_read_inode2(struct inode *inode, void *opaque)
 {
 	struct lustre_md *md = opaque;
 	struct ll_inode_info *lli = ll_i2info(inode);
+	int rc;
 
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
 	       PFID(&lli->lli_fid), inode);
@@ -1623,7 +1903,9 @@ void ll_read_inode2(struct inode *inode, void *opaque)
 	LTIME_S(inode->i_atime) = 0;
 	LTIME_S(inode->i_ctime) = 0;
 	inode->i_rdev = 0;
-	ll_update_inode(inode, md);
+	rc = ll_update_inode(inode, md);
+	if (rc)
+		return rc;
 
 	/* OIDEBUG(inode); */
 
@@ -1644,6 +1926,8 @@ void ll_read_inode2(struct inode *inode, void *opaque)
 		init_special_inode(inode, inode->i_mode,
 				   inode->i_rdev);
 	}
+
+	return 0;
 }
 
 void ll_delete_inode(struct inode *inode)
@@ -1655,20 +1939,13 @@ void ll_delete_inode(struct inode *inode)
 		 * osc_extent implementation at LU-1030.
 		 */
 		cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
-				   CL_FSYNC_DISCARD, 1);
+				   CL_FSYNC_LOCAL, 1);
 
 	truncate_inode_pages_final(&inode->i_data);
 
-	/* Workaround for LU-118 */
-	if (inode->i_data.nrpages) {
-		spin_lock_irq(&inode->i_data.tree_lock);
-		spin_unlock_irq(&inode->i_data.tree_lock);
-		LASSERTF(inode->i_data.nrpages == 0,
-			 "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
-			 PFID(ll_inode2fid(inode)), inode,
-			 inode->i_data.nrpages);
-	}
-	/* Workaround end */
+	LASSERTF(!inode->i_data.nrpages,
+		 "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
+		 PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
 
 	ll_clear_inode(inode);
 	clear_inode(inode);
@@ -1704,7 +1981,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
 		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 
-		flags = body->flags;
+		flags = body->mbo_flags;
 
 		ptlrpc_req_finished(req);
 
@@ -1886,9 +2163,9 @@ void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
 	if (!op_data)
 		return;
 
-	op_data->op_fid1 = body->fid1;
-	op_data->op_ioepoch = body->ioepoch;
-	op_data->op_handle = body->handle;
+	op_data->op_fid1 = body->mbo_fid1;
+	op_data->op_ioepoch = body->mbo_ioepoch;
+	op_data->op_handle = body->mbo_handle;
 	op_data->op_mod_time = get_seconds();
 	md_close(exp, op_data, NULL, &close_req);
 	ptlrpc_req_finished(close_req);
@@ -1910,7 +2187,9 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 		goto cleanup;
 
 	if (*inode) {
-		ll_update_inode(*inode, &md);
+		rc = ll_update_inode(*inode, &md);
+		if (rc)
+			goto out;
 	} else {
 		LASSERT(sb);
 
@@ -1918,18 +2197,18 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 		 * At this point server returns to client's same fid as client
 		 * generated for creating. So using ->fid1 is okay here.
 		 */
-		if (!fid_is_sane(&md.body->fid1)) {
+		if (!fid_is_sane(&md.body->mbo_fid1)) {
 			CERROR("%s: Fid is insane " DFID "\n",
 			       ll_get_fsname(sb, NULL, 0),
-			       PFID(&md.body->fid1));
+			       PFID(&md.body->mbo_fid1));
 			rc = -EINVAL;
 			goto out;
 		}
 
-		*inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
+		*inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
 					     sbi->ll_flags & LL_SBI_32BIT_API),
 				 &md);
-		if (!*inode) {
+		if (IS_ERR(*inode)) {
 #ifdef CONFIG_FS_POSIX_ACL
 			if (md.posix_acl) {
 				posix_acl_release(md.posix_acl);
@@ -2075,11 +2354,20 @@ int ll_process_config(struct lustre_cfg *lcfg)
 /* this function prepares md_op_data hint for passing ot down to MD stack. */
 struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
 				      struct inode *i1, struct inode *i2,
-				      const char *name, int namelen,
-				      int mode, __u32 opc, void *data)
+				      const char *name, size_t namelen,
+				      u32 mode, __u32 opc, void *data)
 {
-	if (namelen > ll_i2sbi(i1)->ll_namelen)
-		return ERR_PTR(-ENAMETOOLONG);
+	if (!name) {
+		/* Do not reuse namelen for something else. */
+		if (namelen)
+			return ERR_PTR(-EINVAL);
+	} else {
+		if (namelen > ll_i2sbi(i1)->ll_namelen)
+			return ERR_PTR(-ENAMETOOLONG);
+
+		if (!lu_name_is_valid_2(name, namelen))
+			return ERR_PTR(-EINVAL);
+	}
 
 	if (!op_data)
 		op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -2089,11 +2377,26 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
 
 	ll_i2gids(op_data->op_suppgids, i1, i2);
 	op_data->op_fid1 = *ll_inode2fid(i1);
+	op_data->op_default_stripe_offset = -1;
+	if (S_ISDIR(i1->i_mode)) {
+		op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
+		op_data->op_default_stripe_offset =
+			ll_i2info(i1)->lli_def_stripe_offset;
+	}
 
-	if (i2)
+	if (i2) {
 		op_data->op_fid2 = *ll_inode2fid(i2);
-	else
+		if (S_ISDIR(i2->i_mode))
+			op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
+	} else {
 		fid_zero(&op_data->op_fid2);
+	}
+
+	if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
+		op_data->op_cli_flags |= CLI_HASH64;
+
+	if (ll_need_32bit_api(ll_i2sbi(i1)))
+		op_data->op_cli_flags |= CLI_API32;
 
 	op_data->op_name = name;
 	op_data->op_namelen = namelen;
@@ -2105,26 +2408,12 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
 	op_data->op_bias = 0;
 	op_data->op_cli_flags = 0;
 	if ((opc == LUSTRE_OPC_CREATE) && name &&
-	    filename_is_volatile(name, namelen, NULL))
+	    filename_is_volatile(name, namelen, &op_data->op_mds))
 		op_data->op_bias |= MDS_CREATE_VOLATILE;
-	op_data->op_opc = opc;
-	op_data->op_mds = 0;
+	else
+		op_data->op_mds = 0;
 	op_data->op_data = data;
 
-	/* If the file is being opened after mknod() (normally due to NFS)
-	 * try to use the default stripe data from parent directory for
-	 * allocating OST objects.  Try to pass the parent FID to MDS.
-	 */
-	if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
-	    !ll_i2info(i2)->lli_has_smd) {
-		struct ll_inode_info *lli = ll_i2info(i2);
-
-		spin_lock(&lli->lli_lock);
-		if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
-			op_data->op_fid1 = lli->lli_pfid;
-		spin_unlock(&lli->lli_lock);
-	}
-
 	/* When called by ll_setattr_raw, file is i1. */
 	if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
 		op_data->op_bias |= MDS_DATA_MODIFIED;
@@ -2251,3 +2540,197 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
 	if (buf)
 		free_page((unsigned long)buf);
 }
+
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+			struct lov_user_md **kbuf)
+{
+	struct lov_user_md lum;
+	ssize_t lum_size;
+
+	if (copy_from_user(&lum, md, sizeof(lum))) {
+		lum_size = -EFAULT;
+		goto no_kbuf;
+	}
+
+	lum_size = ll_lov_user_md_size(&lum);
+	if (lum_size < 0)
+		goto no_kbuf;
+
+	*kbuf = kzalloc(lum_size, GFP_NOFS);
+	if (!*kbuf) {
+		lum_size = -ENOMEM;
+		goto no_kbuf;
+	}
+
+	if (copy_from_user(*kbuf, md, lum_size) != 0) {
+		kfree(*kbuf);
+		*kbuf = NULL;
+		lum_size = -EFAULT;
+	}
+no_kbuf:
+	return lum_size;
+}
+
+/*
+ * Compute llite root squash state after a change of root squash
+ * configuration setting or add/remove of a lnet nid
+ */
+void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
+{
+	struct root_squash_info *squash = &sbi->ll_squash;
+	lnet_process_id_t id;
+	bool matched;
+	int i;
+
+	/* Update norootsquash flag */
+	down_write(&squash->rsi_sem);
+	if (list_empty(&squash->rsi_nosquash_nids)) {
+		sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
+	} else {
+		/*
+		 * Do not apply root squash as soon as one of our NIDs is
+		 * in the nosquash_nids list
+		 */
+		matched = false;
+		i = 0;
+
+		while (LNetGetId(i++, &id) != -ENOENT) {
+			if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
+				continue;
+			if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
+				matched = true;
+				break;
+			}
+		}
+		if (matched)
+			sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
+		else
+			sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
+	}
+	up_write(&squash->rsi_sem);
+}
+
+/**
+ * Parse linkea content to extract information about a given hardlink
+ *
+ * \param[in]	ldata		- Initialized linkea data
+ * \param[in]	linkno		- Link identifier
+ * \param[out]	parent_fid	- The entry's parent FID
+ * \param[in]	size		- Entry name destination buffer
+ *
+ * \retval 0 on success
+ * \retval Appropriate negative error code on failure
+ */
+static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
+			    struct lu_fid *parent_fid, struct lu_name *ln)
+{
+	unsigned int idx;
+	int rc;
+
+	rc = linkea_init(ldata);
+	if (rc < 0)
+		return rc;
+
+	if (linkno >= ldata->ld_leh->leh_reccount)
+		/* beyond last link */
+		return -ENODATA;
+
+	linkea_first_entry(ldata);
+	for (idx = 0; ldata->ld_lee; idx++) {
+		linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
+				    parent_fid);
+		if (idx == linkno)
+			break;
+
+		linkea_next_entry(ldata);
+	}
+
+	if (idx < linkno)
+		return -ENODATA;
+
+	return 0;
+}
+
+/**
+ * Get parent FID and name of an identified link. Operation is performed for
+ * a given link number, letting the caller iterate over linkno to list one or
+ * all links of an entry.
+ *
+ * \param[in]	  file	- File descriptor against which to perform the operation
+ * \param[in,out] arg	- User-filled structure containing the linkno to operate
+ *			  on and the available size. It is eventually filled with
+ *			  the requested information or left untouched on error
+ *
+ * \retval - 0 on success
+ * \retval - Appropriate negative error code on failure
+ */
+int ll_getparent(struct file *file, struct getparent __user *arg)
+{
+	struct inode *inode = file_inode(file);
+	struct linkea_data *ldata;
+	struct lu_fid parent_fid;
+	struct lu_buf buf = {
+		.lb_buf = NULL,
+		.lb_len = 0
+	};
+	struct lu_name ln;
+	u32 name_size;
+	u32 linkno;
+	int rc;
+
+	if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
+	    !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
+		return -EPERM;
+
+	if (get_user(name_size, &arg->gp_name_size))
+		return -EFAULT;
+
+	if (get_user(linkno, &arg->gp_linkno))
+		return -EFAULT;
+
+	if (name_size > PATH_MAX)
+		return -EINVAL;
+
+	ldata = kzalloc(sizeof(*ldata), GFP_NOFS);
+	if (!ldata)
+		return -ENOMEM;
+
+	rc = linkea_data_new(ldata, &buf);
+	if (rc < 0)
+		goto ldata_free;
+
+	rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
+			   buf.lb_len, OBD_MD_FLXATTR);
+	if (rc < 0)
+		goto lb_free;
+
+	rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
+	if (rc < 0)
+		goto lb_free;
+
+	if (ln.ln_namelen >= name_size) {
+		rc = -EOVERFLOW;
+		goto lb_free;
+	}
+
+	if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) {
+		rc = -EFAULT;
+		goto lb_free;
+	}
+
+	if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) {
+		rc = -EFAULT;
+		goto lb_free;
+	}
+
+	if (put_user('\0', arg->gp_name + ln.ln_namelen)) {
+		rc = -EFAULT;
+		goto lb_free;
+	}
+
+lb_free:
+	lu_buf_free(&buf);
+ldata_free:
+	kfree(ldata);
+	return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/llite_mmap.c b/drivers/staging/lustre/lustre/llite/llite_mmap.c
index 66ee5db5fce8..436691814a5e 100644
--- a/drivers/staging/lustre/lustre/llite/llite_mmap.c
+++ b/drivers/staging/lustre/lustre/llite/llite_mmap.c
@@ -43,9 +43,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
-#include "../include/linux/lustre_compat25.h"
 
 static const struct vm_operations_struct ll_file_vm_ops;
 
@@ -126,7 +124,7 @@ restart:
 
 	fio = &io->u.ci_fault;
 	fio->ft_index      = index;
-	fio->ft_executable = vma->vm_flags&VM_EXEC;
+	fio->ft_executable = vma->vm_flags & VM_EXEC;
 
 	/*
 	 * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
@@ -134,7 +132,7 @@ restart:
 	 * filemap_nopage. we do our readahead in ll_readpage.
 	 */
 	if (ra_flags)
-		*ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
+		*ra_flags = vma->vm_flags & (VM_RAND_READ | VM_SEQ_READ);
 	vma->vm_flags &= ~VM_SEQ_READ;
 	vma->vm_flags |= VM_RAND_READ;
 
@@ -429,7 +427,6 @@ static void ll_vm_open(struct vm_area_struct *vma)
 	struct inode *inode    = file_inode(vma->vm_file);
 	struct vvp_object *vob = cl_inode2vvp(inode);
 
-	LASSERT(vma->vm_file);
 	LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
 	atomic_inc(&vob->vob_mmap_cnt);
 }
@@ -442,7 +439,6 @@ static void ll_vm_close(struct vm_area_struct *vma)
 	struct inode      *inode = file_inode(vma->vm_file);
 	struct vvp_object *vob   = cl_inode2vvp(inode);
 
-	LASSERT(vma->vm_file);
 	atomic_dec(&vob->vob_mmap_cnt);
 	LASSERT(atomic_read(&vob->vob_mmap_cnt) >= 0);
 }
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index 65972c892731..709230571b4b 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -38,7 +38,6 @@
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
 #include <linux/exportfs.h>
 
@@ -73,11 +72,6 @@ void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid)
 	fsid->val[1] = key >> 32;
 }
 
-static int ll_nfs_test_inode(struct inode *inode, void *opaque)
-{
-	return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
-}
-
 struct inode *search_inode_for_lustre(struct super_block *sb,
 				      const struct lu_fid *fid)
 {
@@ -92,7 +86,7 @@ struct inode *search_inode_for_lustre(struct super_block *sb,
 
 	CDEBUG(D_INFO, "searching inode for:(%lu,"DFID")\n", hash, PFID(fid));
 
-	inode = ilookup5(sb, hash, ll_nfs_test_inode, (void *)fid);
+	inode = ilookup5(sb, hash, ll_test_inode_by_fid, (void *)fid);
 	if (inode)
 		return inode;
 
@@ -153,12 +147,18 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren
 		return ERR_PTR(-ESTALE);
 	}
 
+	result = d_obtain_alias(inode);
+	if (IS_ERR(result)) {
+		iput(inode);
+		return result;
+	}
+
 	/**
-	 * It is an anonymous dentry without OST objects created yet.
-	 * We have to find the parent to tell MDS how to init lov objects.
+	 * In case d_obtain_alias() found a disconnected dentry, always update
+	 * lli_pfid to allow later operation (normally open) have parent fid,
+	 * which may be used by MDS to create data.
 	 */
-	if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd &&
-	    parent && !fid_is_zero(parent)) {
+	if (parent) {
 		struct ll_inode_info *lli = ll_i2info(inode);
 
 		spin_lock(&lli->lli_lock);
@@ -255,6 +255,8 @@ static int ll_get_name(struct dentry *dentry, char *name,
 		.lgd_fid = ll_i2info(d_inode(child))->lli_fid,
 		.ctx.actor = ll_nfs_get_name_filldir,
 	};
+	struct md_op_data *op_data;
+	__u64 pos = 0;
 
 	if (!dir || !S_ISDIR(dir->i_mode)) {
 		rc = -ENOTDIR;
@@ -266,9 +268,18 @@ static int ll_get_name(struct dentry *dentry, char *name,
 		goto out;
 	}
 
+	op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
+				     LUSTRE_OPC_ANY, dir);
+	if (IS_ERR(op_data)) {
+		rc = PTR_ERR(op_data);
+		goto out;
+	}
+
+	op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
 	inode_lock(dir);
-	rc = ll_dir_read(dir, &lgd.ctx);
+	rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx);
 	inode_unlock(dir);
+	ll_finish_md_op_data(op_data);
 	if (!rc && !lgd.lgd_found)
 		rc = -ENOENT;
 out:
@@ -297,14 +308,12 @@ static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
 	return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
 }
 
-static struct dentry *ll_get_parent(struct dentry *dchild)
+int ll_dir_get_parent_fid(struct inode *dir, struct lu_fid *parent_fid)
 {
 	struct ptlrpc_request *req = NULL;
-	struct inode	  *dir = d_inode(dchild);
 	struct ll_sb_info     *sbi;
-	struct dentry	 *result = NULL;
 	struct mdt_body       *body;
-	static char	   dotdot[] = "..";
+	static const char dotdot[] = "..";
 	struct md_op_data     *op_data;
 	int		   rc;
 	int		      lmmsize;
@@ -319,13 +328,13 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
 
 	rc = ll_get_default_mdsize(sbi, &lmmsize);
 	if (rc != 0)
-		return ERR_PTR(rc);
+		return rc;
 
 	op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
 				     strlen(dotdot), lmmsize,
 				     LUSTRE_OPC_ANY, NULL);
 	if (IS_ERR(op_data))
-		return (void *)op_data;
+		return PTR_ERR(op_data);
 
 	rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
 	ll_finish_md_op_data(op_data);
@@ -333,21 +342,36 @@ static struct dentry *ll_get_parent(struct dentry *dchild)
 		CERROR("%s: failure inode "DFID" get parent: rc = %d\n",
 		       ll_get_fsname(dir->i_sb, NULL, 0),
 		       PFID(ll_inode2fid(dir)), rc);
-		return ERR_PTR(rc);
+		return rc;
 	}
 	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 	/*
 	 * LU-3952: MDT may lost the FID of its parent, we should not crash
 	 * the NFS server, ll_iget_for_nfs() will handle the error.
 	 */
-	if (body->valid & OBD_MD_FLID) {
+	if (body->mbo_valid & OBD_MD_FLID) {
 		CDEBUG(D_INFO, "parent for " DFID " is " DFID "\n",
-		       PFID(ll_inode2fid(dir)), PFID(&body->fid1));
+		       PFID(ll_inode2fid(dir)), PFID(&body->mbo_fid1));
+		*parent_fid = body->mbo_fid1;
 	}
-	result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);
 
 	ptlrpc_req_finished(req);
-	return result;
+	return 0;
+}
+
+static struct dentry *ll_get_parent(struct dentry *dchild)
+{
+	struct lu_fid parent_fid = { 0 };
+	struct dentry *dentry;
+	int rc;
+
+	rc = ll_dir_get_parent_fid(dchild->d_inode, &parent_fid);
+	if (rc)
+		return ERR_PTR(rc);
+
+	dentry = ll_iget_for_nfs(dchild->d_inode->i_sb, &parent_fid, NULL);
+
+	return dentry;
 }
 
 const struct export_operations lustre_export_operations = {
diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c
index e86bf3c53be3..6eae60595905 100644
--- a/drivers/staging/lustre/lustre/llite/lproc_llite.c
+++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c
@@ -31,7 +31,6 @@
  */
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/lustre_lite.h"
 #include "../include/lprocfs_status.h"
 #include <linux/seq_file.h>
 #include "../include/obd_support.h"
@@ -358,16 +357,16 @@ static int ll_max_cached_mb_seq_show(struct seq_file *m, void *v)
 	struct ll_sb_info      *sbi   = ll_s2sbi(sb);
 	struct cl_client_cache *cache = sbi->ll_cache;
 	int shift = 20 - PAGE_SHIFT;
-	int max_cached_mb;
-	int unused_mb;
+	long max_cached_mb;
+	long unused_mb;
 
 	max_cached_mb = cache->ccc_lru_max >> shift;
-	unused_mb = atomic_read(&cache->ccc_lru_left) >> shift;
+	unused_mb = atomic_long_read(&cache->ccc_lru_left) >> shift;
 	seq_printf(m,
 		   "users: %d\n"
-		   "max_cached_mb: %d\n"
-		   "used_mb: %d\n"
-		   "unused_mb: %d\n"
+		   "max_cached_mb: %ld\n"
+		   "used_mb: %ld\n"
+		   "unused_mb: %ld\n"
 		   "reclaim_count: %u\n",
 		   atomic_read(&cache->ccc_users),
 		   max_cached_mb,
@@ -385,10 +384,13 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 	struct ll_sb_info *sbi = ll_s2sbi(sb);
 	struct cl_client_cache *cache = sbi->ll_cache;
 	struct lu_env *env;
+	long diff = 0;
+	long nrpages = 0;
 	int refcheck;
-	int mult, rc, pages_number;
-	int diff = 0;
-	int nrpages = 0;
+	long pages_number;
+	int mult;
+	long rc;
+	u64 val;
 	char kernbuf[128];
 
 	if (count >= sizeof(kernbuf))
@@ -401,10 +403,14 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 	mult = 1 << (20 - PAGE_SHIFT);
 	buffer += lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count) -
 		  kernbuf;
-	rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+	rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
 	if (rc)
 		return rc;
 
+	if (val > LONG_MAX)
+		return -ERANGE;
+	pages_number = (long)val;
+
 	if (pages_number < 0 || pages_number > totalram_pages) {
 		CERROR("%s: can't set max cache more than %lu MB\n",
 		       ll_get_fsname(sb, NULL, 0),
@@ -418,7 +424,7 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
 	/* easy - add more LRU slots. */
 	if (diff >= 0) {
-		atomic_add(diff, &cache->ccc_lru_left);
+		atomic_long_add(diff, &cache->ccc_lru_left);
 		rc = 0;
 		goto out;
 	}
@@ -429,18 +435,18 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
 	diff = -diff;
 	while (diff > 0) {
-		int tmp;
+		long tmp;
 
 		/* reduce LRU budget from free slots. */
 		do {
-			int ov, nv;
+			long ov, nv;
 
-			ov = atomic_read(&cache->ccc_lru_left);
+			ov = atomic_long_read(&cache->ccc_lru_left);
 			if (ov == 0)
 				break;
 
 			nv = ov > diff ? ov - diff : 0;
-			rc = atomic_cmpxchg(&cache->ccc_lru_left, ov, nv);
+			rc = atomic_long_cmpxchg(&cache->ccc_lru_left, ov, nv);
 			if (likely(ov == rc)) {
 				diff -= ov - nv;
 				nrpages += ov - nv;
@@ -474,7 +480,7 @@ out:
 		spin_unlock(&sbi->ll_lock);
 		rc = count;
 	} else {
-		atomic_add(nrpages, &cache->ccc_lru_left);
+		atomic_long_add(nrpages, &cache->ccc_lru_left);
 	}
 	return rc;
 }
@@ -738,6 +744,18 @@ static ssize_t max_easize_show(struct kobject *kobj,
 }
 LUSTRE_RO_ATTR(max_easize);
 
+/**
+ * Get default_easize.
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] kobj	kernel object for sysfs tree
+ * \param[in] attr	attribute of this kernel object
+ * \param[in] buf	buffer to write data into
+ *
+ * \retval positive	\a count on success
+ * \retval negative	negated errno on failure
+ */
 static ssize_t default_easize_show(struct kobject *kobj,
 				   struct attribute *attr,
 				   char *buf)
@@ -753,7 +771,44 @@ static ssize_t default_easize_show(struct kobject *kobj,
 
 	return sprintf(buf, "%u\n", ealen);
 }
-LUSTRE_RO_ATTR(default_easize);
+
+/**
+ * Set default_easize.
+ *
+ * Range checking on the passed value is handled by
+ * ll_set_default_mdsize().
+ *
+ * \see client_obd::cl_default_mds_easize
+ *
+ * \param[in] kobj	kernel object for sysfs tree
+ * \param[in] attr	attribute of this kernel object
+ * \param[in] buffer	string passed from user space
+ * \param[in] count	\a buffer length
+ *
+ * \retval positive	\a count on success
+ * \retval negative	negated errno on failure
+ */
+static ssize_t default_easize_store(struct kobject *kobj,
+				    struct attribute *attr,
+				    const char *buffer,
+				    size_t count)
+{
+	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+					      ll_kobj);
+	unsigned long val;
+	int rc;
+
+	rc = kstrtoul(buffer, 10, &val);
+	if (rc)
+		return rc;
+
+	rc = ll_set_default_mdsize(sbi, val);
+	if (rc)
+		return rc;
+
+	return count;
+}
+LUSTRE_RW_ATTR(default_easize);
 
 static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
 {
@@ -774,7 +829,7 @@ static int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
 		flags >>= 1;
 		++i;
 	}
-	seq_printf(m, "\b\n");
+	seq_puts(m, "\b\n");
 	return 0;
 }
 
@@ -823,15 +878,116 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
 	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
 					      ll_kobj);
 	struct cl_client_cache *cache = sbi->ll_cache;
-	int pages, mb;
+	long pages;
+	int mb;
 
-	pages = atomic_read(&cache->ccc_unstable_nr);
+	pages = atomic_long_read(&cache->ccc_unstable_nr);
 	mb = (pages * PAGE_SIZE) >> 20;
 
-	return sprintf(buf, "unstable_pages: %8d\n"
-			    "unstable_mb:    %8d\n", pages, mb);
+	return sprintf(buf, "unstable_check:     %8d\n"
+			    "unstable_pages: %12ld\n"
+			    "unstable_mb:        %8d\n",
+			    cache->ccc_unstable_check, pages, mb);
 }
-LUSTRE_RO_ATTR(unstable_stats);
+
+static ssize_t unstable_stats_store(struct kobject *kobj,
+				    struct attribute *attr,
+				    const char *buffer,
+				    size_t count)
+{
+	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+					      ll_kobj);
+	char kernbuf[128];
+	int val, rc;
+
+	if (!count)
+		return 0;
+	if (count >= sizeof(kernbuf))
+		return -EINVAL;
+
+	if (copy_from_user(kernbuf, buffer, count))
+		return -EFAULT;
+	kernbuf[count] = 0;
+
+	buffer += lprocfs_find_named_value(kernbuf, "unstable_check:", &count) -
+		  kernbuf;
+	rc = lprocfs_write_helper(buffer, count, &val);
+	if (rc < 0)
+		return rc;
+
+	/* borrow lru lock to set the value */
+	spin_lock(&sbi->ll_cache->ccc_lru_lock);
+	sbi->ll_cache->ccc_unstable_check = !!val;
+	spin_unlock(&sbi->ll_cache->ccc_lru_lock);
+
+	return count;
+}
+LUSTRE_RW_ATTR(unstable_stats);
+
+static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr,
+				char *buf)
+{
+	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+					      ll_kobj);
+	struct root_squash_info *squash = &sbi->ll_squash;
+
+	return sprintf(buf, "%u:%u\n", squash->rsi_uid, squash->rsi_gid);
+}
+
+static ssize_t root_squash_store(struct kobject *kobj, struct attribute *attr,
+				 const char *buffer, size_t count)
+{
+	struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+					      ll_kobj);
+	struct root_squash_info *squash = &sbi->ll_squash;
+
+	return lprocfs_wr_root_squash(buffer, count, squash,
+				      ll_get_fsname(sbi->ll_sb, NULL, 0));
+}
+LUSTRE_RW_ATTR(root_squash);
+
+static int ll_nosquash_nids_seq_show(struct seq_file *m, void *v)
+{
+	struct super_block *sb = m->private;
+	struct ll_sb_info *sbi = ll_s2sbi(sb);
+	struct root_squash_info *squash = &sbi->ll_squash;
+	int len;
+
+	down_read(&squash->rsi_sem);
+	if (!list_empty(&squash->rsi_nosquash_nids)) {
+		len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
+					&squash->rsi_nosquash_nids);
+		m->count += len;
+		seq_puts(m, "\n");
+	} else {
+		seq_puts(m, "NONE\n");
+	}
+	up_read(&squash->rsi_sem);
+
+	return 0;
+}
+
+static ssize_t ll_nosquash_nids_seq_write(struct file *file,
+					  const char __user *buffer,
+					  size_t count, loff_t *off)
+{
+	struct seq_file *m = file->private_data;
+	struct super_block *sb = m->private;
+	struct ll_sb_info *sbi = ll_s2sbi(sb);
+	struct root_squash_info *squash = &sbi->ll_squash;
+	int rc;
+
+	rc = lprocfs_wr_nosquash_nids(buffer, count, squash,
+				      ll_get_fsname(sb, NULL, 0));
+	if (rc < 0)
+		return rc;
+
+	ll_compute_rootsquash_state(sbi);
+
+	return rc;
+}
+
+LPROC_SEQ_FOPS(ll_nosquash_nids);
 
 static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
 	/* { "mntpt_path",   ll_rd_path,	     0, 0 }, */
@@ -840,6 +996,8 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
 	{ "max_cached_mb",    &ll_max_cached_mb_fops, NULL },
 	{ "statahead_stats",  &ll_statahead_stats_fops, NULL, 0 },
 	{ "sbi_flags",	      &ll_sbi_flags_fops, NULL, 0 },
+	{ .name =		"nosquash_nids",
+	  .fops =		&ll_nosquash_nids_fops		},
 	{ NULL }
 };
 
@@ -869,6 +1027,7 @@ static struct attribute *llite_attrs[] = {
 	&lustre_attr_default_easize.attr,
 	&lustre_attr_xattr_cache.attr,
 	&lustre_attr_unstable_stats.attr,
+	&lustre_attr_root_squash.attr,
 	NULL,
 };
 
@@ -893,17 +1052,17 @@ static const struct llite_file_opcode {
 	/* file operation */
 	{ LPROC_LL_DIRTY_HITS,     LPROCFS_TYPE_REGS, "dirty_pages_hits" },
 	{ LPROC_LL_DIRTY_MISSES,   LPROCFS_TYPE_REGS, "dirty_pages_misses" },
-	{ LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+	{ LPROC_LL_READ_BYTES,     LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
 				   "read_bytes" },
-	{ LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+	{ LPROC_LL_WRITE_BYTES,    LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
 				   "write_bytes" },
-	{ LPROC_LL_BRW_READ,       LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+	{ LPROC_LL_BRW_READ,       LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
 				   "brw_read" },
-	{ LPROC_LL_BRW_WRITE,      LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
+	{ LPROC_LL_BRW_WRITE,      LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_PAGES,
 				   "brw_write" },
-	{ LPROC_LL_OSC_READ,       LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+	{ LPROC_LL_OSC_READ,       LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
 				   "osc_read" },
-	{ LPROC_LL_OSC_WRITE,      LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES,
+	{ LPROC_LL_OSC_WRITE,      LPROCFS_CNTR_AVGMINMAX | LPROCFS_TYPE_BYTES,
 				   "osc_write" },
 	{ LPROC_LL_IOCTL,	  LPROCFS_TYPE_REGS, "ioctl" },
 	{ LPROC_LL_OPEN,	   LPROCFS_TYPE_REGS, "open" },
@@ -1150,7 +1309,7 @@ static void ll_display_extents_info(struct ll_rw_extents_info *io_extents,
 			   r, pct(r, read_tot), pct(read_cum, read_tot),
 			   w, pct(w, write_tot), pct(write_cum, write_tot));
 		start = end;
-		if (start == 1<<10) {
+		if (start == 1 << 10) {
 			start = 1;
 			units += 10;
 			unitp++;
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 2c4dc69731e8..dfa36d34c645 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -42,13 +42,12 @@
 
 #include "../include/obd_support.h"
 #include "../include/lustre_fid.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_dlm.h"
 #include "../include/lustre_ver.h"
 #include "llite_internal.h"
 
-static int ll_create_it(struct inode *, struct dentry *,
-			int, struct lookup_intent *);
+static int ll_create_it(struct inode *dir, struct dentry *dentry,
+			struct lookup_intent *it);
 
 /* called from iget5_locked->find_inode() under inode_hash_lock spinlock */
 static int ll_test_inode(struct inode *inode, void *opaque)
@@ -56,12 +55,12 @@ static int ll_test_inode(struct inode *inode, void *opaque)
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct lustre_md     *md = opaque;
 
-	if (unlikely(!(md->body->valid & OBD_MD_FLID))) {
+	if (unlikely(!(md->body->mbo_valid & OBD_MD_FLID))) {
 		CERROR("MDS body missing FID\n");
 		return 0;
 	}
 
-	if (!lu_fid_eq(&lli->lli_fid, &md->body->fid1))
+	if (!lu_fid_eq(&lli->lli_fid, &md->body->mbo_fid1))
 		return 0;
 
 	return 1;
@@ -72,20 +71,20 @@ static int ll_set_inode(struct inode *inode, void *opaque)
 	struct ll_inode_info *lli = ll_i2info(inode);
 	struct mdt_body *body = ((struct lustre_md *)opaque)->body;
 
-	if (unlikely(!(body->valid & OBD_MD_FLID))) {
+	if (unlikely(!(body->mbo_valid & OBD_MD_FLID))) {
 		CERROR("MDS body missing FID\n");
 		return -EINVAL;
 	}
 
-	lli->lli_fid = body->fid1;
-	if (unlikely(!(body->valid & OBD_MD_FLTYPE))) {
+	lli->lli_fid = body->mbo_fid1;
+	if (unlikely(!(body->mbo_valid & OBD_MD_FLTYPE))) {
 		CERROR("Can not initialize inode " DFID
 		       " without object type: valid = %#llx\n",
-		       PFID(&lli->lli_fid), body->valid);
+		       PFID(&lli->lli_fid), body->mbo_valid);
 		return -EINVAL;
 	}
 
-	inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mode & S_IFMT);
+	inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mbo_mode & S_IFMT);
 	if (unlikely(inode->i_mode == 0)) {
 		CERROR("Invalid inode "DFID" type\n", PFID(&lli->lli_fid));
 		return -EINVAL;
@@ -96,41 +95,45 @@ static int ll_set_inode(struct inode *inode, void *opaque)
 	return 0;
 }
 
-/*
- * Get an inode by inode number (already instantiated by the intent lookup).
- * Returns inode or NULL
+/**
+ * Get an inode by inode number(@hash), which is already instantiated by
+ * the intent lookup).
  */
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
 		      struct lustre_md *md)
 {
 	struct inode	 *inode;
+	int rc = 0;
 
 	LASSERT(hash != 0);
 	inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
-
-	if (inode) {
-		if (inode->i_state & I_NEW) {
-			int rc = 0;
-
-			ll_read_inode2(inode, md);
-			if (S_ISREG(inode->i_mode) &&
-			    !ll_i2info(inode)->lli_clob) {
-				CDEBUG(D_INODE,
-				       "%s: apply lsm %p to inode " DFID ".\n",
-				       ll_get_fsname(sb, NULL, 0), md->lsm,
-				       PFID(ll_inode2fid(inode)));
-				rc = cl_file_inode_init(inode, md);
-			}
-			if (rc != 0) {
-				iget_failed(inode);
-				inode = NULL;
-			} else {
-				unlock_new_inode(inode);
-			}
-		} else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
-			ll_update_inode(inode, md);
-			CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p)\n",
-			       PFID(&md->body->fid1), inode);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	if (inode->i_state & I_NEW) {
+		rc = ll_read_inode2(inode, md);
+		if (!rc && S_ISREG(inode->i_mode) &&
+		    !ll_i2info(inode)->lli_clob) {
+			CDEBUG(D_INODE, "%s: apply lsm %p to inode "DFID"\n",
+			       ll_get_fsname(sb, NULL, 0), md->lsm,
+			       PFID(ll_inode2fid(inode)));
+			rc = cl_file_inode_init(inode, md);
+		}
+		if (rc) {
+			make_bad_inode(inode);
+			unlock_new_inode(inode);
+			iput(inode);
+			inode = ERR_PTR(rc);
+		} else {
+			unlock_new_inode(inode);
+		}
+	} else if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
+		rc = ll_update_inode(inode, md);
+		CDEBUG(D_VFSTRACE, "got inode: "DFID"(%p): rc = %d\n",
+		       PFID(&md->body->mbo_fid1), inode, rc);
+		if (rc) {
+			iput(inode);
+			inode = ERR_PTR(rc);
 		}
 	}
 	return inode;
@@ -158,6 +161,11 @@ static void ll_invalidate_negative_children(struct inode *dir)
 	spin_unlock(&dir->i_lock);
 }
 
+int ll_test_inode_by_fid(struct inode *inode, void *opaque)
+{
+	return lu_fid_eq(&ll_i2info(inode)->lli_fid, opaque);
+}
+
 int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 		       void *data, int flag)
 {
@@ -196,6 +204,8 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 		}
 
 		if (bits & MDS_INODELOCK_XATTR) {
+			if (S_ISDIR(inode->i_mode))
+				ll_i2info(inode)->lli_def_stripe_offset = -1;
 			ll_xattr_cache_destroy(inode);
 			bits &= ~MDS_INODELOCK_XATTR;
 		}
@@ -253,10 +263,41 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 		}
 
 		if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) {
-			CDEBUG(D_INODE, "invalidating inode "DFID"\n",
-			       PFID(ll_inode2fid(inode)));
+			struct ll_inode_info *lli = ll_i2info(inode);
+
+			CDEBUG(D_INODE, "invalidating inode "DFID" lli = %p, pfid  = "DFID"\n",
+			       PFID(ll_inode2fid(inode)), lli,
+			       PFID(&lli->lli_pfid));
+
 			truncate_inode_pages(inode->i_mapping, 0);
-			ll_invalidate_negative_children(inode);
+
+			if (unlikely(!fid_is_zero(&lli->lli_pfid))) {
+				struct inode *master_inode = NULL;
+				unsigned long hash;
+
+				/*
+				 * This is slave inode, since all of the child
+				 * dentry is connected on the master inode, so
+				 * we have to invalidate the negative children
+				 * on master inode
+				 */
+				CDEBUG(D_INODE, "Invalidate s"DFID" m"DFID"\n",
+				       PFID(ll_inode2fid(inode)),
+				       PFID(&lli->lli_pfid));
+
+				hash = cl_fid_build_ino(&lli->lli_pfid,
+							ll_need_32bit_api(ll_i2sbi(inode)));
+
+				master_inode = ilookup5(inode->i_sb, hash,
+							ll_test_inode_by_fid,
+							(void *)&lli->lli_pfid);
+				if (master_inode && !IS_ERR(master_inode)) {
+					ll_invalidate_negative_children(master_inode);
+					iput(master_inode);
+				}
+			} else {
+				ll_invalidate_negative_children(inode);
+			}
 		}
 
 		if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
@@ -322,7 +363,8 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 		LASSERT(alias != dentry);
 
 		spin_lock(&alias->d_lock);
-		if (alias->d_flags & DCACHE_DISCONNECTED)
+		if ((alias->d_flags & DCACHE_DISCONNECTED) &&
+		    S_ISDIR(inode->i_mode))
 			/* LASSERT(last_discon == NULL); LU-405, bz 20055 */
 			discon_alias = alias;
 		else if (alias->d_parent == dentry->d_parent	     &&
@@ -433,9 +475,20 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
 		struct lookup_intent parent_it = {
 					.it_op = IT_GETATTR,
 					.it_lock_handle = 0 };
+		struct lu_fid fid = ll_i2info(parent)->lli_fid;
+
+		/* If it is striped directory, get the real stripe parent */
+		if (unlikely(ll_i2info(parent)->lli_lsm_md)) {
+			rc = md_get_fid_from_lsm(ll_i2mdexp(parent),
+						 ll_i2info(parent)->lli_lsm_md,
+						 (*de)->d_name.name,
+						 (*de)->d_name.len, &fid);
+			if (rc)
+				return rc;
+		}
 
-		if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it,
-				       &ll_i2info(parent)->lli_fid, NULL)) {
+		if (md_revalidate_lock(ll_i2mdexp(parent), &parent_it, &fid,
+				       NULL)) {
 			d_lustre_revalidate(*de);
 			ll_intent_release(&parent_it);
 		}
@@ -449,13 +502,13 @@ out:
 }
 
 static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
-				   struct lookup_intent *it, int lookup_flags)
+				   struct lookup_intent *it)
 {
 	struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
 	struct dentry *save = dentry, *retval;
 	struct ptlrpc_request *req = NULL;
+	struct md_op_data *op_data = NULL;
 	struct inode *inode;
-	struct md_op_data *op_data;
 	__u32 opc;
 	int rc;
 
@@ -471,8 +524,8 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
 	if (!it || it->it_op == IT_GETXATTR)
 		it = &lookup_it;
 
-	if (it->it_op == IT_GETATTR) {
-		rc = ll_statahead_enter(parent, &dentry, 0);
+	if (it->it_op == IT_GETATTR && dentry_may_statahead(parent, dentry)) {
+		rc = ll_statahead(parent, &dentry, 0);
 		if (rc == 1) {
 			if (dentry == save)
 				retval = NULL;
@@ -488,8 +541,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
 		opc = LUSTRE_OPC_ANY;
 
 	op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
-				     dentry->d_name.len, lookup_flags, opc,
-				     NULL);
+				     dentry->d_name.len, 0, opc, NULL);
 	if (IS_ERR(op_data))
 		return (void *)op_data;
 
@@ -497,9 +549,38 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
 	if (!IS_POSIXACL(parent) || !exp_connect_umask(ll_i2mdexp(parent)))
 		it->it_create_mode &= ~current_umask();
 
-	rc = md_intent_lock(ll_i2mdexp(parent), op_data, NULL, 0, it,
-			    lookup_flags, &req, ll_md_blocking_ast, 0);
-	ll_finish_md_op_data(op_data);
+	rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
+			    &ll_md_blocking_ast, 0);
+	/*
+	 * If the MDS allows the client to chgrp (CFS_SETGRP_PERM), but the
+	 * client does not know which suppgid should be sent to the MDS, or
+	 * some other(s) changed the target file's GID after this RPC sent
+	 * to the MDS with the suppgid as the original GID, then we should
+	 * try again with right suppgid.
+	 */
+	if (rc == -EACCES && it->it_op & IT_OPEN &&
+	    it_disposition(it, DISP_OPEN_DENY)) {
+		struct mdt_body *body;
+
+		LASSERT(req);
+
+		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+		if (op_data->op_suppgids[0] == body->mbo_gid ||
+		    op_data->op_suppgids[1] == body->mbo_gid ||
+		    !in_group_p(make_kgid(&init_user_ns, body->mbo_gid))) {
+			retval = ERR_PTR(-EACCES);
+			goto out;
+		}
+
+		fid_zero(&op_data->op_fid2);
+		op_data->op_suppgids[1] = body->mbo_gid;
+		ptlrpc_req_finished(req);
+		req = NULL;
+		ll_intent_release(it);
+		rc = md_intent_lock(ll_i2mdexp(parent), op_data, it, &req,
+				    ll_md_blocking_ast, 0);
+	}
+
 	if (rc < 0) {
 		retval = ERR_PTR(rc);
 		goto out;
@@ -524,11 +605,11 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
 		retval = NULL;
 	else
 		retval = dentry;
- out:
-	if (req)
-		ptlrpc_req_finished(req);
-	if (it->it_op == IT_GETATTR && (!retval || retval == dentry))
-		ll_statahead_mark(parent, dentry);
+out:
+	if (op_data && !IS_ERR(op_data))
+		ll_finish_md_op_data(op_data);
+
+	ptlrpc_req_finished(req);
 	return retval;
 }
 
@@ -541,15 +622,19 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
 	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),flags=%u\n",
 	       dentry, PFID(ll_inode2fid(parent)), parent, flags);
 
-	/* Optimize away (CREATE && !OPEN). Let .create handle the race. */
-	if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN))
+	/* Optimize away (CREATE && !OPEN). Let .create handle the race.
+	 * but only if we have write permissions there, otherwise we need
+	 * to proceed with lookup. LU-4185
+	 */
+	if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
+	    (inode_permission(parent, MAY_WRITE | MAY_EXEC) == 0))
 		return NULL;
 
-	if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
+	if (flags & (LOOKUP_PARENT | LOOKUP_OPEN | LOOKUP_CREATE))
 		itp = NULL;
 	else
 		itp = &it;
-	de = ll_lookup_it(parent, dentry, itp, 0);
+	de = ll_lookup_it(parent, dentry, itp);
 
 	if (itp)
 		ll_intent_release(itp);
@@ -567,7 +652,6 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 {
 	struct lookup_intent *it;
 	struct dentry *de;
-	long long lookup_flags = LOOKUP_OPEN;
 	int rc = 0;
 
 	CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p),file %p,open_flags %x,mode %x opened %d\n",
@@ -597,15 +681,14 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 		return -ENOMEM;
 
 	it->it_op = IT_OPEN;
-	if (open_flags & O_CREAT) {
+	if (open_flags & O_CREAT)
 		it->it_op |= IT_CREAT;
-		lookup_flags |= LOOKUP_CREATE;
-	}
 	it->it_create_mode = (mode & S_IALLUGO) | S_IFREG;
 	it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags);
+	it->it_flags &= ~MDS_OPEN_FL_INTERNAL;
 
 	/* Dentry added to dcache tree in ll_lookup_it */
-	de = ll_lookup_it(dir, dentry, it, lookup_flags);
+	de = ll_lookup_it(dir, dentry, it);
 	if (IS_ERR(de))
 		rc = PTR_ERR(de);
 	else if (de)
@@ -614,7 +697,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (!rc) {
 		if (it_disposition(it, DISP_OPEN_CREATE)) {
 			/* Dentry instantiated in ll_create_it. */
-			rc = ll_create_it(dir, dentry, mode, it);
+			rc = ll_create_it(dir, dentry, it);
 			if (rc) {
 				/* We dget in ll_splice_alias. */
 				if (de)
@@ -700,7 +783,7 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
+static int ll_create_it(struct inode *dir, struct dentry *dentry,
 			struct lookup_intent *it)
 {
 	struct inode *inode;
@@ -721,27 +804,26 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
 	return 0;
 }
 
-static void ll_update_times(struct ptlrpc_request *request,
-			    struct inode *inode)
+void ll_update_times(struct ptlrpc_request *request, struct inode *inode)
 {
 	struct mdt_body *body = req_capsule_server_get(&request->rq_pill,
 						       &RMF_MDT_BODY);
 
 	LASSERT(body);
-	if (body->valid & OBD_MD_FLMTIME &&
-	    body->mtime > LTIME_S(inode->i_mtime)) {
+	if (body->mbo_valid & OBD_MD_FLMTIME &&
+	    body->mbo_mtime > LTIME_S(inode->i_mtime)) {
 		CDEBUG(D_INODE, "setting fid "DFID" mtime from %lu to %llu\n",
 		       PFID(ll_inode2fid(inode)), LTIME_S(inode->i_mtime),
-		       body->mtime);
-		LTIME_S(inode->i_mtime) = body->mtime;
+		       body->mbo_mtime);
+		LTIME_S(inode->i_mtime) = body->mbo_mtime;
 	}
-	if (body->valid & OBD_MD_FLCTIME &&
-	    body->ctime > LTIME_S(inode->i_ctime))
-		LTIME_S(inode->i_ctime) = body->ctime;
+	if (body->mbo_valid & OBD_MD_FLCTIME &&
+	    body->mbo_ctime > LTIME_S(inode->i_ctime))
+		LTIME_S(inode->i_ctime) = body->mbo_ctime;
 }
 
 static int ll_new_node(struct inode *dir, struct dentry *dentry,
-		       const char *tgt, int mode, int rdev,
+		       const char *tgt, umode_t mode, int rdev,
 		       __u32 opc)
 {
 	struct ptlrpc_request *request = NULL;
@@ -753,7 +835,7 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(tgt))
 		tgt_len = strlen(tgt) + 1;
-
+again:
 	op_data = ll_prep_md_op_data(NULL, dir, NULL,
 				     dentry->d_name.name,
 				     dentry->d_name.len,
@@ -768,9 +850,45 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
 			from_kgid(&init_user_ns, current_fsgid()),
 			cfs_curproc_cap_pack(), rdev, &request);
 	ll_finish_md_op_data(op_data);
-	if (err)
+	if (err < 0 && err != -EREMOTE)
 		goto err_exit;
 
+	/*
+	 * If the client doesn't know where to create a subdirectory (or
+	 * in case of a race that sends the RPC to the wrong MDS), the
+	 * MDS will return -EREMOTE and the client will fetch the layout
+	 * of the directory, then create the directory on the right MDT.
+	 */
+	if (unlikely(err == -EREMOTE)) {
+		struct ll_inode_info *lli = ll_i2info(dir);
+		struct lmv_user_md *lum;
+		int lumsize, err2;
+
+		ptlrpc_req_finished(request);
+		request = NULL;
+
+		err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request,
+					OBD_MD_DEFAULT_MEA);
+		if (!err2) {
+			/* Update stripe_offset and retry */
+			lli->lli_def_stripe_offset = lum->lum_stripe_offset;
+		} else if (err2 == -ENODATA &&
+			   lli->lli_def_stripe_offset != -1) {
+			/*
+			 * If there are no default stripe EA on the MDT, but the
+			 * client has default stripe, then it probably means
+			 * default stripe EA has just been deleted.
+			 */
+			lli->lli_def_stripe_offset = -1;
+		} else {
+			goto err_exit;
+		}
+
+		ptlrpc_req_finished(request);
+		request = NULL;
+		goto again;
+	}
+
 	ll_update_times(request, dir);
 
 	err = ll_prep_inode(&inode, request, dir->i_sb, NULL);
@@ -779,7 +897,8 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
 
 	d_instantiate(dentry, inode);
 err_exit:
-	ptlrpc_req_finished(request);
+	if (request)
+		ptlrpc_req_finished(request);
 
 	return err;
 }
@@ -842,77 +961,6 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 	return rc;
 }
 
-int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
-{
-	struct mdt_body *body;
-	struct lov_mds_md *eadata;
-	struct lov_stripe_md *lsm = NULL;
-	struct obd_trans_info oti = { 0 };
-	struct obdo *oa;
-	int rc;
-
-	/* req is swabbed so this is safe */
-	body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
-	if (!(body->valid & OBD_MD_FLEASIZE))
-		return 0;
-
-	if (body->eadatasize == 0) {
-		CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
-		rc = -EPROTO;
-		goto out;
-	}
-
-	/* The MDS sent back the EA because we unlinked the last reference
-	 * to this file. Use this EA to unlink the objects on the OST.
-	 * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
-	 * check it is complete and sensible.
-	 */
-	eadata = req_capsule_server_sized_get(&request->rq_pill, &RMF_MDT_MD,
-					      body->eadatasize);
-	LASSERT(eadata);
-
-	rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize);
-	if (rc < 0) {
-		CERROR("obd_unpackmd: %d\n", rc);
-		goto out;
-	}
-	LASSERT(rc >= sizeof(*lsm));
-
-	oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
-	if (!oa) {
-		rc = -ENOMEM;
-		goto out_free_memmd;
-	}
-
-	oa->o_oi = lsm->lsm_oi;
-	oa->o_mode = body->mode & S_IFMT;
-	oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
-
-	if (body->valid & OBD_MD_FLCOOKIE) {
-		oa->o_valid |= OBD_MD_FLCOOKIE;
-		oti.oti_logcookies =
-			req_capsule_server_sized_get(&request->rq_pill,
-						     &RMF_LOGCOOKIES,
-						   sizeof(struct llog_cookie) *
-						     lsm->lsm_stripe_count);
-		if (!oti.oti_logcookies) {
-			oa->o_valid &= ~OBD_MD_FLCOOKIE;
-			body->valid &= ~OBD_MD_FLCOOKIE;
-		}
-	}
-
-	rc = obd_destroy(NULL, ll_i2dtexp(dir), oa, lsm, &oti,
-			 ll_i2mdexp(dir));
-	if (rc)
-		CERROR("obd destroy objid "DOSTID" error %d\n",
-		       POSTID(&lsm->lsm_oi), rc);
-out_free_memmd:
-	obd_free_memmd(ll_i2dtexp(dir), &lsm);
-	kmem_cache_free(obdo_cachep, oa);
-out:
-	return rc;
-}
-
 /* ll_unlink() doesn't update the inode with the new link count.
  * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
  * is any lock existing. They will recycle dentries and inodes based upon locks
@@ -934,7 +982,7 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	if (dchild && dchild->d_inode)
+	if (dchild->d_inode)
 		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
 
 	op_data->op_fid2 = op_data->op_fid3;
@@ -946,7 +994,6 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild)
 	ll_update_times(request, dir);
 	ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, 1);
 
-	rc = ll_objects_destroy(request, dir);
  out:
 	ptlrpc_req_finished(request);
 	return rc;
@@ -961,9 +1008,9 @@ static int ll_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	if (!IS_POSIXACL(dir) || !exp_connect_umask(ll_i2mdexp(dir)))
 		mode &= ~current_umask();
-	mode = (mode & (S_IRWXUGO|S_ISVTX)) | S_IFDIR;
-	err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
+	mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
 
+	err = ll_new_node(dir, dentry, NULL, mode, 0, LUSTRE_OPC_MKDIR);
 	if (!err)
 		ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, 1);
 
@@ -986,7 +1033,7 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild)
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	if (dchild && dchild->d_inode)
+	if (dchild->d_inode)
 		op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
 
 	op_data->op_fid2 = op_data->op_fid3;
@@ -1067,9 +1114,9 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
 	if (IS_ERR(op_data))
 		return PTR_ERR(op_data);
 
-	if (src_dchild && src_dchild->d_inode)
+	if (src_dchild->d_inode)
 		op_data->op_fid3 = *ll_inode2fid(src_dchild->d_inode);
-	if (tgt_dchild && tgt_dchild->d_inode)
+	if (tgt_dchild->d_inode)
 		op_data->op_fid4 = *ll_inode2fid(tgt_dchild->d_inode);
 
 	err = md_rename(sbi->ll_md_exp, op_data,
@@ -1082,7 +1129,6 @@ static int ll_rename(struct inode *src, struct dentry *src_dchild,
 		ll_update_times(request, src);
 		ll_update_times(request, tgt);
 		ll_stats_ops_tally(sbi, LPROC_LL_RENAME, 1);
-		err = ll_objects_destroy(request, src);
 	}
 
 	ptlrpc_req_finished(request);
@@ -1106,10 +1152,10 @@ const struct inode_operations ll_dir_inode_operations = {
 	.setattr	    = ll_setattr,
 	.getattr	    = ll_getattr,
 	.permission	 = ll_inode_permission,
-	.setxattr	   = ll_setxattr,
-	.getxattr	   = ll_getxattr,
+	.setxattr	   = generic_setxattr,
+	.getxattr	   = generic_getxattr,
 	.listxattr	  = ll_listxattr,
-	.removexattr	= ll_removexattr,
+	.removexattr	= generic_removexattr,
 	.get_acl	    = ll_get_acl,
 };
 
@@ -1117,9 +1163,9 @@ const struct inode_operations ll_special_inode_operations = {
 	.setattr	= ll_setattr,
 	.getattr	= ll_getattr,
 	.permission     = ll_inode_permission,
-	.setxattr       = ll_setxattr,
-	.getxattr       = ll_getxattr,
+	.setxattr       = generic_setxattr,
+	.getxattr       = generic_getxattr,
 	.listxattr      = ll_listxattr,
-	.removexattr    = ll_removexattr,
+	.removexattr    = generic_removexattr,
 	.get_acl	    = ll_get_acl,
 };
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.c b/drivers/staging/lustre/lustre/llite/range_lock.c
new file mode 100644
index 000000000000..94c818f1478b
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/range_lock.c
@@ -0,0 +1,233 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Range lock is used to allow multiple threads writing a single shared
+ * file given each thread is writing to a non-overlapping portion of the
+ * file.
+ *
+ * Refer to the possible upstream kernel version of range lock by
+ * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
+ *
+ * This file could later replaced by the upstream kernel version.
+ */
+/*
+ * Author: Prakash Surya <surya1@llnl.gov>
+ * Author: Bobi Jam <bobijam.xu@intel.com>
+ */
+#include "range_lock.h"
+#include "../include/lustre/lustre_user.h"
+
+/**
+ * Initialize a range lock tree
+ *
+ * \param tree [in]	an empty range lock tree
+ *
+ * Pre:  Caller should have allocated the range lock tree.
+ * Post: The range lock tree is ready to function.
+ */
+void range_lock_tree_init(struct range_lock_tree *tree)
+{
+	tree->rlt_root = NULL;
+	tree->rlt_sequence = 0;
+	spin_lock_init(&tree->rlt_lock);
+}
+
+/**
+ * Initialize a range lock node
+ *
+ * \param lock  [in]	an empty range lock node
+ * \param start [in]	start of the covering region
+ * \param end   [in]	end of the covering region
+ *
+ * Pre:  Caller should have allocated the range lock node.
+ * Post: The range lock node is meant to cover [start, end] region
+ */
+void range_lock_init(struct range_lock *lock, __u64 start, __u64 end)
+{
+	memset(&lock->rl_node, 0, sizeof(lock->rl_node));
+	if (end != LUSTRE_EOF)
+		end >>= PAGE_SHIFT;
+	interval_set(&lock->rl_node, start >> PAGE_SHIFT, end);
+	INIT_LIST_HEAD(&lock->rl_next_lock);
+	lock->rl_task = NULL;
+	lock->rl_lock_count = 0;
+	lock->rl_blocking_ranges = 0;
+	lock->rl_sequence = 0;
+}
+
+static inline struct range_lock *next_lock(struct range_lock *lock)
+{
+	return list_entry(lock->rl_next_lock.next, typeof(*lock), rl_next_lock);
+}
+
+/**
+ * Helper function of range_unlock()
+ *
+ * \param node [in]	a range lock found overlapped during interval node
+ *			search
+ * \param arg [in]	the range lock to be tested
+ *
+ * \retval INTERVAL_ITER_CONT	indicate to continue the search for next
+ *				overlapping range node
+ * \retval INTERVAL_ITER_STOP	indicate to stop the search
+ */
+static enum interval_iter range_unlock_cb(struct interval_node *node, void *arg)
+{
+	struct range_lock *lock = arg;
+	struct range_lock *overlap = node2rangelock(node);
+	struct range_lock *iter;
+
+	list_for_each_entry(iter, &overlap->rl_next_lock, rl_next_lock) {
+		if (iter->rl_sequence > lock->rl_sequence) {
+			--iter->rl_blocking_ranges;
+			LASSERT(iter->rl_blocking_ranges > 0);
+		}
+	}
+	if (overlap->rl_sequence > lock->rl_sequence) {
+		--overlap->rl_blocking_ranges;
+		if (overlap->rl_blocking_ranges == 0)
+			wake_up_process(overlap->rl_task);
+	}
+	return INTERVAL_ITER_CONT;
+}
+
+/**
+ * Unlock a range lock, wake up locks blocked by this lock.
+ *
+ * \param tree [in]	range lock tree
+ * \param lock [in]	range lock to be deleted
+ *
+ * If this lock has been granted, relase it; if not, just delete it from
+ * the tree or the same region lock list. Wake up those locks only blocked
+ * by this lock through range_unlock_cb().
+ */
+void range_unlock(struct range_lock_tree *tree, struct range_lock *lock)
+{
+	spin_lock(&tree->rlt_lock);
+	if (!list_empty(&lock->rl_next_lock)) {
+		struct range_lock *next;
+
+		if (interval_is_intree(&lock->rl_node)) { /* first lock */
+			/* Insert the next same range lock into the tree */
+			next = next_lock(lock);
+			next->rl_lock_count = lock->rl_lock_count - 1;
+			interval_erase(&lock->rl_node, &tree->rlt_root);
+			interval_insert(&next->rl_node, &tree->rlt_root);
+		} else {
+			/* find the first lock in tree */
+			list_for_each_entry(next, &lock->rl_next_lock,
+					    rl_next_lock) {
+				if (!interval_is_intree(&next->rl_node))
+					continue;
+
+				LASSERT(next->rl_lock_count > 0);
+				next->rl_lock_count--;
+				break;
+			}
+		}
+		list_del_init(&lock->rl_next_lock);
+	} else {
+		LASSERT(interval_is_intree(&lock->rl_node));
+		interval_erase(&lock->rl_node, &tree->rlt_root);
+	}
+
+	interval_search(tree->rlt_root, &lock->rl_node.in_extent,
+			range_unlock_cb, lock);
+	spin_unlock(&tree->rlt_lock);
+}
+
+/**
+ * Helper function of range_lock()
+ *
+ * \param node [in]	a range lock found overlapped during interval node
+ *			search
+ * \param arg [in]	the range lock to be tested
+ *
+ * \retval INTERVAL_ITER_CONT	indicate to continue the search for next
+ *				overlapping range node
+ * \retval INTERVAL_ITER_STOP	indicate to stop the search
+ */
+static enum interval_iter range_lock_cb(struct interval_node *node, void *arg)
+{
+	struct range_lock *lock = (struct range_lock *)arg;
+	struct range_lock *overlap = node2rangelock(node);
+
+	lock->rl_blocking_ranges += overlap->rl_lock_count + 1;
+	return INTERVAL_ITER_CONT;
+}
+
+/**
+ * Lock a region
+ *
+ * \param tree [in]	range lock tree
+ * \param lock [in]	range lock node containing the region span
+ *
+ * \retval 0	get the range lock
+ * \retval <0	error code while not getting the range lock
+ *
+ * If there exists overlapping range lock, the new lock will wait and
+ * retry, if later it find that it is not the chosen one to wake up,
+ * it wait again.
+ */
+int range_lock(struct range_lock_tree *tree, struct range_lock *lock)
+{
+	struct interval_node *node;
+	int rc = 0;
+
+	spin_lock(&tree->rlt_lock);
+	/*
+	 * We need to check for all conflicting intervals
+	 * already in the tree.
+	 */
+	interval_search(tree->rlt_root, &lock->rl_node.in_extent,
+			range_lock_cb, lock);
+	/*
+	 * Insert to the tree if I am unique, otherwise I've been linked to
+	 * the rl_next_lock of another lock which has the same range as mine
+	 * in range_lock_cb().
+	 */
+	node = interval_insert(&lock->rl_node, &tree->rlt_root);
+	if (node) {
+		struct range_lock *tmp = node2rangelock(node);
+
+		list_add_tail(&lock->rl_next_lock, &tmp->rl_next_lock);
+		tmp->rl_lock_count++;
+	}
+	lock->rl_sequence = ++tree->rlt_sequence;
+
+	while (lock->rl_blocking_ranges > 0) {
+		lock->rl_task = current;
+		__set_current_state(TASK_INTERRUPTIBLE);
+		spin_unlock(&tree->rlt_lock);
+		schedule();
+
+		if (signal_pending(current)) {
+			range_unlock(tree, lock);
+			rc = -EINTR;
+			goto out;
+		}
+		spin_lock(&tree->rlt_lock);
+	}
+	spin_unlock(&tree->rlt_lock);
+out:
+	return rc;
+}
diff --git a/drivers/staging/lustre/lustre/llite/range_lock.h b/drivers/staging/lustre/lustre/llite/range_lock.h
new file mode 100644
index 000000000000..c6d04a6f99fd
--- /dev/null
+++ b/drivers/staging/lustre/lustre/llite/range_lock.h
@@ -0,0 +1,82 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Range lock is used to allow multiple threads writing a single shared
+ * file given each thread is writing to a non-overlapping portion of the
+ * file.
+ *
+ * Refer to the possible upstream kernel version of range lock by
+ * Jan Kara <jack@suse.cz>: https://lkml.org/lkml/2013/1/31/480
+ *
+ * This file could later replaced by the upstream kernel version.
+ */
+/*
+ * Author: Prakash Surya <surya1@llnl.gov>
+ * Author: Bobi Jam <bobijam.xu@intel.com>
+ */
+#ifndef _RANGE_LOCK_H
+#define _RANGE_LOCK_H
+
+#include "../../include/linux/libcfs/libcfs.h"
+#include "../include/interval_tree.h"
+
+struct range_lock {
+	struct interval_node	rl_node;
+	/**
+	 * Process to enqueue this lock.
+	 */
+	struct task_struct	*rl_task;
+	/**
+	 * List of locks with the same range.
+	 */
+	struct list_head	rl_next_lock;
+	/**
+	 * Number of locks in the list rl_next_lock
+	 */
+	unsigned int		rl_lock_count;
+	/**
+	 * Number of ranges which are blocking acquisition of the lock
+	 */
+	unsigned int		rl_blocking_ranges;
+	/**
+	 * Sequence number of range lock. This number is used to get to know
+	 * the order the locks are queued; this is required for range_cancel().
+	 */
+	__u64			rl_sequence;
+};
+
+static inline struct range_lock *node2rangelock(const struct interval_node *n)
+{
+	return container_of(n, struct range_lock, rl_node);
+}
+
+struct range_lock_tree {
+	struct interval_node	*rlt_root;
+	spinlock_t		 rlt_lock;	/* protect range lock tree */
+	__u64			 rlt_sequence;
+};
+
+void range_lock_tree_init(struct range_lock_tree *tree);
+void range_lock_init(struct range_lock *lock, __u64 start, __u64 end);
+int  range_lock(struct range_lock_tree *tree, struct range_lock *lock);
+void range_unlock(struct range_lock_tree *tree, struct range_lock *lock);
+#endif
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 87393c4bd51e..50c0152ba022 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -50,10 +50,8 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/lustre_lite.h"
 #include "../include/obd_cksum.h"
 #include "llite_internal.h"
-#include "../include/linux/lustre_compat25.h"
 
 static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
 
@@ -413,7 +411,7 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 			 * forward read-ahead, it will be fixed when backward
 			 * read-ahead is implemented
 			 */
-			LASSERTF(page_idx > ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
+			LASSERTF(page_idx >= ria->ria_stoff, "Invalid page_idx %lu rs %lu re %lu ro %lu rl %lu rp %lu\n",
 				 page_idx,
 				 ria->ria_start, ria->ria_end, ria->ria_stoff,
 				 ria->ria_length, ria->ria_pages);
@@ -474,10 +472,22 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	}
 
 	/* Reserve a part of the read-ahead window that we'll be issuing */
-	if (ras->ras_window_len) {
-		start = ras->ras_next_readahead;
+	if (ras->ras_window_len > 0) {
+		/*
+		 * Note: other thread might rollback the ras_next_readahead,
+		 * if it can not get the full size of prepared pages, see the
+		 * end of this function. For stride read ahead, it needs to
+		 * make sure the offset is no less than ras_stride_offset,
+		 * so that stride read ahead can work correctly.
+		 */
+		if (stride_io_mode(ras))
+			start = max(ras->ras_next_readahead,
+				    ras->ras_stride_offset);
+		else
+			start = ras->ras_next_readahead;
 		end = ras->ras_window_start + ras->ras_window_len - 1;
 	}
+
 	if (end != 0) {
 		unsigned long rpc_boundary;
 		/*
@@ -648,10 +658,11 @@ static void ras_update_stride_detector(struct ll_readahead_state *ras,
 {
 	unsigned long stride_gap = index - ras->ras_last_readpage - 1;
 
-	if (!stride_io_mode(ras) && (stride_gap != 0 ||
-	    ras->ras_consecutive_stride_requests == 0)) {
+	if ((stride_gap != 0 || ras->ras_consecutive_stride_requests == 0) &&
+	    !stride_io_mode(ras)) {
 		ras->ras_stride_pages = ras->ras_consecutive_pages;
-		ras->ras_stride_length = stride_gap+ras->ras_consecutive_pages;
+		ras->ras_stride_length = ras->ras_consecutive_pages +
+					 stride_gap;
 	}
 	LASSERT(ras->ras_request_index == 0);
 	LASSERT(ras->ras_consecutive_stride_requests == 0);
@@ -663,10 +674,9 @@ static void ras_update_stride_detector(struct ll_readahead_state *ras,
 	}
 
 	ras->ras_stride_pages = ras->ras_consecutive_pages;
-	ras->ras_stride_length = stride_gap+ras->ras_consecutive_pages;
+	ras->ras_stride_length = stride_gap + ras->ras_consecutive_pages;
 
 	RAS_CDEBUG(ras);
-	return;
 }
 
 /* Stride Read-ahead window will be increased inc_len according to
@@ -882,7 +892,6 @@ out_unlock:
 	RAS_CDEBUG(ras);
 	ras->ras_request_index++;
 	spin_unlock(&ras->ras_lock);
-	return;
 }
 
 int ll_writepage(struct page *vmpage, struct writeback_control *wbc)
@@ -1015,11 +1024,15 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		 * is called later on.
 		 */
 		ignore_layout = 1;
+
+	if (!ll_i2info(inode)->lli_clob)
+		return 0;
+
 	result = cl_sync_file_range(inode, start, end, mode, ignore_layout);
 	if (result > 0) {
 		wbc->nr_to_write -= result;
 		result = 0;
-	 }
+	}
 
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) {
 		if (end == OBD_OBJECT_EOF)
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index d98c7acc0832..26f3a37873a7 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -51,9 +51,7 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
-#include "../include/linux/lustre_compat25.h"
 
 /**
  * Implements Linux VM address_space::invalidatepage() method. This method is
@@ -161,7 +159,7 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 	return result;
 }
 
-#define MAX_DIRECTIO_SIZE (2*1024*1024*1024UL)
+#define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL)
 
 static inline int ll_get_user_pages(int rw, unsigned long user_addr,
 				    size_t size, struct page ***pages,
@@ -214,10 +212,10 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
 	int i;
 	ssize_t rc = 0;
 	loff_t file_offset  = pv->ldp_start_offset;
-	long size	   = pv->ldp_size;
+	size_t size = pv->ldp_size;
 	int page_count      = pv->ldp_nr;
 	struct page **pages = pv->ldp_pages;
-	long page_size      = cl_page_size(obj);
+	size_t page_size = cl_page_size(obj);
 	bool do_io;
 	int  io_pages       = 0;
 
@@ -346,7 +344,6 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
 	struct cl_io *io;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	struct vvp_object *obj = cl_inode2vvp(inode);
 	loff_t file_offset = iocb->ki_pos;
 	ssize_t count = iov_iter_count(iter);
 	ssize_t tot_bytes = 0, result = 0;
@@ -375,14 +372,6 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
 	io = vvp_env_io(env)->vui_cl.cis_io;
 	LASSERT(io);
 
-	/* 0. Need locking between buffered and direct access. and race with
-	 *    size changing by concurrent truncates and writes.
-	 * 1. Need inode mutex to operate transient pages.
-	 */
-	if (iov_iter_rw(iter) == READ)
-		inode_lock(inode);
-
-	LASSERT(obj->vob_transient_pages == 0);
 	while (iov_iter_count(iter)) {
 		struct page **pages;
 		size_t offs;
@@ -430,10 +419,6 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter)
 		file_offset += result;
 	}
 out:
-	LASSERT(obj->vob_transient_pages == 0);
-	if (iov_iter_rw(iter) == READ)
-		inode_unlock(inode);
-
 	if (tot_bytes > 0) {
 		struct vvp_io *vio = vvp_env_io(env);
 
@@ -616,6 +601,13 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
 			LASSERT(from == 0);
 		vio->u.write.vui_to = from + copied;
 
+		/*
+		 * To address the deadlock in balance_dirty_pages() where
+		 * this dirty page may be written back in the same thread.
+		 */
+		if (PageDirty(vmpage))
+			unplug = true;
+
 		/* We may have one full RPC, commit it soon */
 		if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
 			unplug = true;
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c
index c1cb6b19e724..0677513476ec 100644
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -39,7 +39,6 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_dlm.h"
 #include "llite_internal.h"
 
@@ -50,24 +49,26 @@ enum se_stat {
 	SA_ENTRY_INIT = 0,      /** init entry */
 	SA_ENTRY_SUCC = 1,      /** stat succeed */
 	SA_ENTRY_INVA = 2,      /** invalid entry */
-	SA_ENTRY_DEST = 3,      /** entry to be destroyed */
 };
 
-struct ll_sa_entry {
-	/* link into sai->sai_entries */
-	struct list_head	      se_link;
-	/* link into sai->sai_entries_{received,stated} */
+/*
+ * sa_entry is not refcounted: statahead thread allocates it and do async stat,
+ * and in async stat callback ll_statahead_interpret() will add it into
+ * sai_interim_entries, later statahead thread will call sa_handle_callback() to
+ * instantiate entry and move it into sai_entries, and then only scanner process
+ * can access and free it.
+ */
+struct sa_entry {
+	/* link into sai_interim_entries or sai_entries */
 	struct list_head	      se_list;
 	/* link into sai hash table locally */
 	struct list_head	      se_hash;
-	/* entry reference count */
-	atomic_t	    se_refcount;
 	/* entry index in the sai */
 	__u64		   se_index;
 	/* low layer ldlm lock handle */
 	__u64		   se_handle;
 	/* entry status */
-	enum se_stat	   se_stat;
+	enum se_stat		se_state;
 	/* entry size, contains name */
 	int		     se_size;
 	/* pointer to async getattr enqueue info */
@@ -83,27 +84,24 @@ struct ll_sa_entry {
 static unsigned int sai_generation;
 static DEFINE_SPINLOCK(sai_generation_lock);
 
-/*
- * The entry only can be released by the caller, it is necessary to hold lock.
- */
-static inline int ll_sa_entry_stated(struct ll_sa_entry *entry)
+/* sa_entry is ready to use */
+static inline int sa_ready(struct sa_entry *entry)
 {
 	smp_rmb();
-	return (entry->se_stat != SA_ENTRY_INIT);
+	return (entry->se_state != SA_ENTRY_INIT);
 }
 
-static inline int ll_sa_entry_hash(int val)
+/* hash value to put in sai_cache */
+static inline int sa_hash(int val)
 {
 	return val & LL_SA_CACHE_MASK;
 }
 
-/*
- * Insert entry to hash SA table.
- */
+/* hash entry into sai_cache */
 static inline void
-ll_sa_entry_enhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_rehash(struct ll_statahead_info *sai, struct sa_entry *entry)
 {
-	int i = ll_sa_entry_hash(entry->se_qstr.hash);
+	int i = sa_hash(entry->se_qstr.hash);
 
 	spin_lock(&sai->sai_cache_lock[i]);
 	list_add_tail(&entry->se_hash, &sai->sai_cache[i]);
@@ -114,9 +112,9 @@ ll_sa_entry_enhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
  * Remove entry from SA table.
  */
 static inline void
-ll_sa_entry_unhash(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_unhash(struct ll_statahead_info *sai, struct sa_entry *entry)
 {
-	int i = ll_sa_entry_hash(entry->se_qstr.hash);
+	int i = sa_hash(entry->se_qstr.hash);
 
 	spin_lock(&sai->sai_cache_lock[i]);
 	list_del_init(&entry->se_hash);
@@ -129,19 +127,21 @@ static inline int agl_should_run(struct ll_statahead_info *sai,
 	return (inode && S_ISREG(inode->i_mode) && sai->sai_agl_valid);
 }
 
+/* statahead window is full */
 static inline int sa_sent_full(struct ll_statahead_info *sai)
 {
 	return atomic_read(&sai->sai_cache_count) >= sai->sai_max;
 }
 
-static inline int sa_received_empty(struct ll_statahead_info *sai)
+/* got async stat replies */
+static inline int sa_has_callback(struct ll_statahead_info *sai)
 {
-	return list_empty(&sai->sai_entries_received);
+	return !list_empty(&sai->sai_interim_entries);
 }
 
 static inline int agl_list_empty(struct ll_statahead_info *sai)
 {
-	return list_empty(&sai->sai_entries_agl);
+	return list_empty(&sai->sai_agls);
 }
 
 /**
@@ -157,7 +157,7 @@ static inline int sa_low_hit(struct ll_statahead_info *sai)
 }
 
 /*
- * If the given index is behind of statahead window more than
+ * if the given index is behind of statahead window more than
  * SA_OMITTED_ENTRY_MAX, then it is old.
  */
 static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
@@ -166,20 +166,17 @@ static inline int is_omitted_entry(struct ll_statahead_info *sai, __u64 index)
 		 sai->sai_index);
 }
 
-/*
- * Insert it into sai_entries tail when init.
- */
-static struct ll_sa_entry *
-ll_sa_entry_alloc(struct dentry *parent,
-		  struct ll_statahead_info *sai, __u64 index,
-		  const char *name, int len)
+/* allocate sa_entry and hash it to allow scanner process to find it */
+static struct sa_entry *
+sa_alloc(struct dentry *parent, struct ll_statahead_info *sai, __u64 index,
+	 const char *name, int len)
 {
 	struct ll_inode_info *lli;
-	struct ll_sa_entry   *entry;
+	struct sa_entry   *entry;
 	int		   entry_size;
 	char		 *dname;
 
-	entry_size = sizeof(struct ll_sa_entry) + (len & ~3) + 4;
+	entry_size = sizeof(struct sa_entry) + (len & ~3) + 4;
 	entry = kzalloc(entry_size, GFP_NOFS);
 	if (unlikely(!entry))
 		return ERR_PTR(-ENOMEM);
@@ -188,34 +185,9 @@ ll_sa_entry_alloc(struct dentry *parent,
 	       len, name, entry, index);
 
 	entry->se_index = index;
-
-	/*
-	 * Statahead entry reference rules:
-	 *
-	 * 1) When statahead entry is initialized, its reference is set as 2.
-	 *    One reference is used by the directory scanner. When the scanner
-	 *    searches the statahead cache for the given name, it can perform
-	 *    lockless hash lookup (only the scanner can remove entry from hash
-	 *    list), and once found, it needn't to call "atomic_inc()" for the
-	 *    entry reference. So the performance is improved. After using the
-	 *    statahead entry, the scanner will call "atomic_dec()" to drop the
-	 *    reference held when initialization. If it is the last reference,
-	 *    the statahead entry will be freed.
-	 *
-	 * 2) All other threads, including statahead thread and ptlrpcd thread,
-	 *    when they process the statahead entry, the reference for target
-	 *    should be held to guarantee the entry will not be released by the
-	 *    directory scanner. After processing the entry, these threads will
-	 *    drop the entry reference. If it is the last reference, the entry
-	 *    will be freed.
-	 *
-	 *    The second reference when initializes the statahead entry is used
-	 *    by the statahead thread, following the rule 2).
-	 */
-	atomic_set(&entry->se_refcount, 2);
-	entry->se_stat = SA_ENTRY_INIT;
+	entry->se_state = SA_ENTRY_INIT;
 	entry->se_size = entry_size;
-	dname = (char *)entry + sizeof(struct ll_sa_entry);
+	dname = (char *)entry + sizeof(struct sa_entry);
 	memcpy(dname, name, len);
 	dname[len] = 0;
 
@@ -223,11 +195,10 @@ ll_sa_entry_alloc(struct dentry *parent,
 	entry->se_qstr.len = len;
 	entry->se_qstr.name = dname;
 
-	lli = ll_i2info(sai->sai_inode);
+	lli = ll_i2info(sai->sai_dentry->d_inode);
 	spin_lock(&lli->lli_sa_lock);
-	list_add_tail(&entry->se_link, &sai->sai_entries);
 	INIT_LIST_HEAD(&entry->se_list);
-	ll_sa_entry_enhash(sai, entry);
+	sa_rehash(sai, entry);
 	spin_unlock(&lli->lli_sa_lock);
 
 	atomic_inc(&sai->sai_cache_count);
@@ -235,18 +206,29 @@ ll_sa_entry_alloc(struct dentry *parent,
 	return entry;
 }
 
+/* free sa_entry, which should have been unhashed and not in any list */
+static void sa_free(struct ll_statahead_info *sai, struct sa_entry *entry)
+{
+	CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
+	       entry->se_qstr.len, entry->se_qstr.name, entry,
+	       entry->se_index);
+
+	LASSERT(list_empty(&entry->se_list));
+	LASSERT(list_empty(&entry->se_hash));
+
+	kfree(entry);
+	atomic_dec(&sai->sai_cache_count);
+}
+
 /*
- * Used by the directory scanner to search entry with name.
- *
- * Only the caller can remove the entry from hash, so it is unnecessary to hold
- * hash lock. It is caller's duty to release the init refcount on the entry, so
- * it is also unnecessary to increase refcount on the entry.
+ * find sa_entry by name, used by directory scanner, lock is not needed because
+ * only scanner can remove the entry from cache.
  */
-static struct ll_sa_entry *
-ll_sa_entry_get_byname(struct ll_statahead_info *sai, const struct qstr *qstr)
+static struct sa_entry *
+sa_get(struct ll_statahead_info *sai, const struct qstr *qstr)
 {
-	struct ll_sa_entry *entry;
-	int i = ll_sa_entry_hash(qstr->hash);
+	struct sa_entry *entry;
+	int i = sa_hash(qstr->hash);
 
 	list_for_each_entry(entry, &sai->sai_cache[i], se_hash) {
 		if (entry->se_qstr.hash == qstr->hash &&
@@ -257,164 +239,126 @@ ll_sa_entry_get_byname(struct ll_statahead_info *sai, const struct qstr *qstr)
 	return NULL;
 }
 
-/*
- * Used by the async getattr request callback to find entry with index.
- *
- * Inside lli_sa_lock to prevent others to change the list during the search.
- * It needs to increase entry refcount before returning to guarantee that the
- * entry cannot be freed by others.
- */
-static struct ll_sa_entry *
-ll_sa_entry_get_byindex(struct ll_statahead_info *sai, __u64 index)
-{
-	struct ll_sa_entry *entry;
-
-	list_for_each_entry(entry, &sai->sai_entries, se_link) {
-		if (entry->se_index == index) {
-			LASSERT(atomic_read(&entry->se_refcount) > 0);
-			atomic_inc(&entry->se_refcount);
-			return entry;
-		}
-		if (entry->se_index > index)
-			break;
-	}
-	return NULL;
-}
-
-static void ll_sa_entry_cleanup(struct ll_statahead_info *sai,
-				struct ll_sa_entry *entry)
-{
-	struct md_enqueue_info *minfo = entry->se_minfo;
-	struct ptlrpc_request  *req   = entry->se_req;
-
-	if (minfo) {
-		entry->se_minfo = NULL;
-		ll_intent_release(&minfo->mi_it);
-		iput(minfo->mi_dir);
-		kfree(minfo);
-	}
-
-	if (req) {
-		entry->se_req = NULL;
-		ptlrpc_req_finished(req);
-	}
-}
-
-static void ll_sa_entry_put(struct ll_statahead_info *sai,
-			    struct ll_sa_entry *entry)
-{
-	if (atomic_dec_and_test(&entry->se_refcount)) {
-		CDEBUG(D_READA, "free sa entry %.*s(%p) index %llu\n",
-		       entry->se_qstr.len, entry->se_qstr.name, entry,
-		       entry->se_index);
-
-		LASSERT(list_empty(&entry->se_link));
-		LASSERT(list_empty(&entry->se_list));
-		LASSERT(list_empty(&entry->se_hash));
-
-		ll_sa_entry_cleanup(sai, entry);
-		iput(entry->se_inode);
-
-		kfree(entry);
-		atomic_dec(&sai->sai_cache_count);
-	}
-}
-
+/* unhash and unlink sa_entry, and then free it */
 static inline void
-do_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_kill(struct ll_statahead_info *sai, struct sa_entry *entry)
 {
-	struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
+	struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
 
 	LASSERT(!list_empty(&entry->se_hash));
-	LASSERT(!list_empty(&entry->se_link));
+	LASSERT(!list_empty(&entry->se_list));
+	LASSERT(sa_ready(entry));
 
-	ll_sa_entry_unhash(sai, entry);
+	sa_unhash(sai, entry);
 
 	spin_lock(&lli->lli_sa_lock);
-	entry->se_stat = SA_ENTRY_DEST;
-	list_del_init(&entry->se_link);
-	if (likely(!list_empty(&entry->se_list)))
-		list_del_init(&entry->se_list);
+	list_del_init(&entry->se_list);
 	spin_unlock(&lli->lli_sa_lock);
 
-	ll_sa_entry_put(sai, entry);
+	if (entry->se_inode)
+		iput(entry->se_inode);
+
+	sa_free(sai, entry);
 }
 
-/*
- * Delete it from sai_entries_stated list when fini.
- */
+/* called by scanner after use, sa_entry will be killed */
 static void
-ll_sa_entry_fini(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
+sa_put(struct ll_statahead_info *sai, struct sa_entry *entry)
 {
-	struct ll_sa_entry *pos, *next;
+	struct sa_entry *tmp, *next;
+
+	if (entry && entry->se_state == SA_ENTRY_SUCC) {
+		struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
+
+		sai->sai_hit++;
+		sai->sai_consecutive_miss = 0;
+		sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
+	} else {
+		sai->sai_miss++;
+		sai->sai_consecutive_miss++;
+	}
 
 	if (entry)
-		do_sa_entry_fini(sai, entry);
+		sa_kill(sai, entry);
 
-	/* drop old entry, only 'scanner' process does this, no need to lock */
-	list_for_each_entry_safe(pos, next, &sai->sai_entries, se_link) {
-		if (!is_omitted_entry(sai, pos->se_index))
+	/*
+	 * kill old completed entries, only scanner process does this, no need
+	 * to lock
+	 */
+	list_for_each_entry_safe(tmp, next, &sai->sai_entries, se_list) {
+		if (!is_omitted_entry(sai, tmp->se_index))
 			break;
-		do_sa_entry_fini(sai, pos);
+		sa_kill(sai, tmp);
 	}
+
+	wake_up(&sai->sai_thread.t_ctl_waitq);
 }
 
 /*
- * Inside lli_sa_lock.
+ * update state and sort add entry to sai_entries by index, return true if
+ * scanner is waiting on this entry.
  */
-static void
-do_sa_entry_to_stated(struct ll_statahead_info *sai,
-		      struct ll_sa_entry *entry, enum se_stat stat)
+static bool
+__sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
 {
-	struct ll_sa_entry *se;
-	struct list_head	 *pos = &sai->sai_entries_stated;
+	struct list_head *pos = &sai->sai_entries;
+	__u64 index = entry->se_index;
+	struct sa_entry *se;
 
-	if (!list_empty(&entry->se_list))
-		list_del_init(&entry->se_list);
+	LASSERT(!sa_ready(entry));
+	LASSERT(list_empty(&entry->se_list));
 
-	list_for_each_entry_reverse(se, &sai->sai_entries_stated, se_list) {
+	list_for_each_entry_reverse(se, &sai->sai_entries, se_list) {
 		if (se->se_index < entry->se_index) {
 			pos = &se->se_list;
 			break;
 		}
 	}
-
 	list_add(&entry->se_list, pos);
-	entry->se_stat = stat;
+	entry->se_state = ret < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC;
+
+	return (index == sai->sai_index_wait);
 }
 
 /*
- * Move entry to sai_entries_stated and sort with the index.
- * \retval 1    -- entry to be destroyed.
- * \retval 0    -- entry is inserted into stated list.
+ * release resources used in async stat RPC, update entry state and wakeup if
+ * scanner process it waiting on this entry.
  */
-static int
-ll_sa_entry_to_stated(struct ll_statahead_info *sai,
-		      struct ll_sa_entry *entry, enum se_stat stat)
+static void
+sa_make_ready(struct ll_statahead_info *sai, struct sa_entry *entry, int ret)
 {
-	struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
-	int		   ret = 1;
+	struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
+	struct md_enqueue_info *minfo = entry->se_minfo;
+	struct ptlrpc_request *req = entry->se_req;
+	bool wakeup;
+
+	/* release resources used in RPC */
+	if (minfo) {
+		entry->se_minfo = NULL;
+		ll_intent_release(&minfo->mi_it);
+		iput(minfo->mi_dir);
+		kfree(minfo);
+	}
 
-	ll_sa_entry_cleanup(sai, entry);
+	if (req) {
+		entry->se_req = NULL;
+		ptlrpc_req_finished(req);
+	}
 
 	spin_lock(&lli->lli_sa_lock);
-	if (likely(entry->se_stat != SA_ENTRY_DEST)) {
-		do_sa_entry_to_stated(sai, entry, stat);
-		ret = 0;
-	}
+	wakeup = __sa_make_ready(sai, entry, ret);
 	spin_unlock(&lli->lli_sa_lock);
 
-	return ret;
+	if (wakeup)
+		wake_up(&sai->sai_waitq);
 }
 
-/*
- * Insert inode into the list of sai_entries_agl.
- */
+/* Insert inode into the list of sai_agls. */
 static void ll_agl_add(struct ll_statahead_info *sai,
 		       struct inode *inode, int index)
 {
 	struct ll_inode_info *child  = ll_i2info(inode);
-	struct ll_inode_info *parent = ll_i2info(sai->sai_inode);
+	struct ll_inode_info *parent = ll_i2info(sai->sai_dentry->d_inode);
 	int		   added  = 0;
 
 	spin_lock(&child->lli_agl_lock);
@@ -426,9 +370,9 @@ static void ll_agl_add(struct ll_statahead_info *sai,
 
 		igrab(inode);
 		spin_lock(&parent->lli_agl_lock);
-		if (list_empty(&sai->sai_entries_agl))
+		if (list_empty(&sai->sai_agls))
 			added = 1;
-		list_add_tail(&child->lli_agl_list, &sai->sai_entries_agl);
+		list_add_tail(&child->lli_agl_list, &sai->sai_agls);
 		spin_unlock(&parent->lli_agl_lock);
 	} else {
 		spin_unlock(&child->lli_agl_lock);
@@ -438,8 +382,10 @@ static void ll_agl_add(struct ll_statahead_info *sai,
 		wake_up(&sai->sai_agl_thread.t_ctl_waitq);
 }
 
-static struct ll_statahead_info *ll_sai_alloc(void)
+/* allocate sai */
+static struct ll_statahead_info *ll_sai_alloc(struct dentry *dentry)
 {
+	struct ll_inode_info *lli = ll_i2info(dentry->d_inode);
 	struct ll_statahead_info *sai;
 	int		       i;
 
@@ -447,24 +393,18 @@ static struct ll_statahead_info *ll_sai_alloc(void)
 	if (!sai)
 		return NULL;
 
+	sai->sai_dentry = dget(dentry);
 	atomic_set(&sai->sai_refcount, 1);
 
-	spin_lock(&sai_generation_lock);
-	sai->sai_generation = ++sai_generation;
-	if (unlikely(sai_generation == 0))
-		sai->sai_generation = ++sai_generation;
-	spin_unlock(&sai_generation_lock);
-
 	sai->sai_max = LL_SA_RPC_MIN;
 	sai->sai_index = 1;
 	init_waitqueue_head(&sai->sai_waitq);
 	init_waitqueue_head(&sai->sai_thread.t_ctl_waitq);
 	init_waitqueue_head(&sai->sai_agl_thread.t_ctl_waitq);
 
+	INIT_LIST_HEAD(&sai->sai_interim_entries);
 	INIT_LIST_HEAD(&sai->sai_entries);
-	INIT_LIST_HEAD(&sai->sai_entries_received);
-	INIT_LIST_HEAD(&sai->sai_entries_stated);
-	INIT_LIST_HEAD(&sai->sai_entries_agl);
+	INIT_LIST_HEAD(&sai->sai_agls);
 
 	for (i = 0; i < LL_SA_CACHE_SIZE; i++) {
 		INIT_LIST_HEAD(&sai->sai_cache[i]);
@@ -472,63 +412,74 @@ static struct ll_statahead_info *ll_sai_alloc(void)
 	}
 	atomic_set(&sai->sai_cache_count, 0);
 
+	spin_lock(&sai_generation_lock);
+	lli->lli_sa_generation = ++sai_generation;
+	if (unlikely(!sai_generation))
+		lli->lli_sa_generation = ++sai_generation;
+	spin_unlock(&sai_generation_lock);
+
 	return sai;
 }
 
-static inline struct ll_statahead_info *
-ll_sai_get(struct ll_statahead_info *sai)
+/* free sai */
+static inline void ll_sai_free(struct ll_statahead_info *sai)
 {
-	atomic_inc(&sai->sai_refcount);
+	LASSERT(sai->sai_dentry);
+	dput(sai->sai_dentry);
+	kfree(sai);
+}
+
+/*
+ * take refcount of sai if sai for @dir exists, which means statahead is on for
+ * this directory.
+ */
+static inline struct ll_statahead_info *ll_sai_get(struct inode *dir)
+{
+	struct ll_inode_info *lli = ll_i2info(dir);
+	struct ll_statahead_info *sai = NULL;
+
+	spin_lock(&lli->lli_sa_lock);
+	sai = lli->lli_sai;
+	if (sai)
+		atomic_inc(&sai->sai_refcount);
+	spin_unlock(&lli->lli_sa_lock);
+
 	return sai;
 }
 
+/*
+ * put sai refcount after use, if refcount reaches zero, free sai and sa_entries
+ * attached to it.
+ */
 static void ll_sai_put(struct ll_statahead_info *sai)
 {
-	struct inode	 *inode = sai->sai_inode;
-	struct ll_inode_info *lli   = ll_i2info(inode);
+	struct ll_inode_info *lli = ll_i2info(sai->sai_dentry->d_inode);
 
 	if (atomic_dec_and_lock(&sai->sai_refcount, &lli->lli_sa_lock)) {
-		struct ll_sa_entry *entry, *next;
-
-		if (unlikely(atomic_read(&sai->sai_refcount) > 0)) {
-			/* It is race case, the interpret callback just hold
-			 * a reference count
-			 */
-			spin_unlock(&lli->lli_sa_lock);
-			return;
-		}
-
-		LASSERT(!lli->lli_opendir_key);
-		LASSERT(thread_is_stopped(&sai->sai_thread));
-		LASSERT(thread_is_stopped(&sai->sai_agl_thread));
+		struct ll_sb_info *sbi = ll_i2sbi(sai->sai_dentry->d_inode);
+		struct sa_entry *entry, *next;
 
 		lli->lli_sai = NULL;
-		lli->lli_opendir_pid = 0;
 		spin_unlock(&lli->lli_sa_lock);
 
-		if (sai->sai_sent > sai->sai_replied)
-			CDEBUG(D_READA, "statahead for dir "DFID
-			      " does not finish: [sent:%llu] [replied:%llu]\n",
-			      PFID(&lli->lli_fid),
-			      sai->sai_sent, sai->sai_replied);
+		LASSERT(thread_is_stopped(&sai->sai_thread));
+		LASSERT(thread_is_stopped(&sai->sai_agl_thread));
+		LASSERT(sai->sai_sent == sai->sai_replied);
+		LASSERT(!sa_has_callback(sai));
 
 		list_for_each_entry_safe(entry, next, &sai->sai_entries,
-					 se_link)
-			do_sa_entry_fini(sai, entry);
-
-		LASSERT(list_empty(&sai->sai_entries));
-		LASSERT(list_empty(&sai->sai_entries_received));
-		LASSERT(list_empty(&sai->sai_entries_stated));
+					 se_list)
+			sa_kill(sai, entry);
 
 		LASSERT(atomic_read(&sai->sai_cache_count) == 0);
-		LASSERT(list_empty(&sai->sai_entries_agl));
+		LASSERT(list_empty(&sai->sai_agls));
 
-		iput(inode);
-		kfree(sai);
+		ll_sai_free(sai);
+		atomic_dec(&sbi->ll_sa_running);
 	}
 }
 
-/* Do NOT forget to drop inode refcount when into sai_entries_agl. */
+/* Do NOT forget to drop inode refcount when into sai_agls. */
 static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
 {
 	struct ll_inode_info *lli   = ll_i2info(inode);
@@ -588,29 +539,21 @@ static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai)
 	iput(inode);
 }
 
-static void ll_post_statahead(struct ll_statahead_info *sai)
+/*
+ * prepare inode for sa entry, add it into agl list, now sa_entry is ready
+ * to be used by scanner process.
+ */
+static void sa_instantiate(struct ll_statahead_info *sai,
+			   struct sa_entry *entry)
 {
-	struct inode	   *dir   = sai->sai_inode;
+	struct inode *dir = sai->sai_dentry->d_inode;
 	struct inode	   *child;
-	struct ll_inode_info   *lli   = ll_i2info(dir);
-	struct ll_sa_entry     *entry;
 	struct md_enqueue_info *minfo;
 	struct lookup_intent   *it;
 	struct ptlrpc_request  *req;
 	struct mdt_body	*body;
 	int		     rc    = 0;
 
-	spin_lock(&lli->lli_sa_lock);
-	if (unlikely(list_empty(&sai->sai_entries_received))) {
-		spin_unlock(&lli->lli_sa_lock);
-		return;
-	}
-	entry = list_entry(sai->sai_entries_received.next,
-			   struct ll_sa_entry, se_list);
-	atomic_inc(&entry->se_refcount);
-	list_del_init(&entry->se_list);
-	spin_unlock(&lli->lli_sa_lock);
-
 	LASSERT(entry->se_handle != 0);
 
 	minfo = entry->se_minfo;
@@ -632,7 +575,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 		/* XXX: No fid in reply, this is probably cross-ref case.
 		 * SA can't handle it yet.
 		 */
-		if (body->valid & OBD_MD_MDS) {
+		if (body->mbo_valid & OBD_MD_MDS) {
 			rc = -EAGAIN;
 			goto out;
 		}
@@ -641,7 +584,7 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 		 * revalidate.
 		 */
 		/* unlinked and re-created with the same name */
-		if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->fid1))) {
+		if (unlikely(!lu_fid_eq(&minfo->mi_data.op_fid2, &body->mbo_fid1))) {
 			entry->se_inode = NULL;
 			iput(child);
 			child = NULL;
@@ -659,8 +602,9 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 	if (rc)
 		goto out;
 
-	CDEBUG(D_DLMTRACE, "%s: setting l_data to inode "DFID"%p\n",
+	CDEBUG(D_READA, "%s: setting %.*s" DFID " l_data to inode %p\n",
 	       ll_get_fsname(child->i_sb, NULL, 0),
+	       entry->se_qstr.len, entry->se_qstr.name,
 	       PFID(ll_inode2fid(child)), child);
 	ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, child, it, NULL);
 
@@ -670,34 +614,75 @@ static void ll_post_statahead(struct ll_statahead_info *sai)
 		ll_agl_add(sai, child, entry->se_index);
 
 out:
-	/* The "ll_sa_entry_to_stated()" will drop related ldlm ibits lock
-	 * reference count by calling "ll_intent_drop_lock()" in spite of the
-	 * above operations failed or not. Do not worry about calling
-	 * "ll_intent_drop_lock()" more than once.
+	/*
+	 * sa_make_ready() will drop ldlm ibits lock refcount by calling
+	 * ll_intent_drop_lock() in spite of failures. Do not worry about
+	 * calling ll_intent_drop_lock() more than once.
 	 */
-	rc = ll_sa_entry_to_stated(sai, entry,
-				   rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
-	if (rc == 0 && entry->se_index == sai->sai_index_wait)
-		wake_up(&sai->sai_waitq);
-	ll_sa_entry_put(sai, entry);
+	sa_make_ready(sai, entry, rc);
 }
 
+/* once there are async stat replies, instantiate sa_entry from replies */
+static void sa_handle_callback(struct ll_statahead_info *sai)
+{
+	struct ll_inode_info *lli;
+
+	lli = ll_i2info(sai->sai_dentry->d_inode);
+
+	while (sa_has_callback(sai)) {
+		struct sa_entry *entry;
+
+		spin_lock(&lli->lli_sa_lock);
+		if (unlikely(!sa_has_callback(sai))) {
+			spin_unlock(&lli->lli_sa_lock);
+			break;
+		}
+		entry = list_entry(sai->sai_interim_entries.next,
+				   struct sa_entry, se_list);
+		list_del_init(&entry->se_list);
+		spin_unlock(&lli->lli_sa_lock);
+
+		sa_instantiate(sai, entry);
+	}
+}
+
+/*
+ * callback for async stat, because this is called in ptlrpcd context, we only
+ * put sa_entry in sai_cb_entries list, and let sa_handle_callback() to really
+ * prepare inode and instantiate sa_entry later.
+ */
 static int ll_statahead_interpret(struct ptlrpc_request *req,
 				  struct md_enqueue_info *minfo, int rc)
 {
 	struct lookup_intent     *it  = &minfo->mi_it;
 	struct inode	     *dir = minfo->mi_dir;
 	struct ll_inode_info     *lli = ll_i2info(dir);
-	struct ll_statahead_info *sai = NULL;
-	struct ll_sa_entry       *entry;
-	__u64			  handle = 0;
-	int		       wakeup;
+	struct ll_statahead_info *sai = lli->lli_sai;
+	struct sa_entry *entry = (struct sa_entry *)minfo->mi_cbdata;
+	__u64 handle = 0;
+	bool wakeup;
 
 	if (it_disposition(it, DISP_LOOKUP_NEG))
 		rc = -ENOENT;
 
-	if (rc == 0) {
-		/* release ibits lock ASAP to avoid deadlock when statahead
+	/*
+	 * because statahead thread will wait for all inflight RPC to finish,
+	 * sai should be always valid, no need to refcount
+	 */
+	LASSERT(sai);
+	LASSERT(!thread_is_stopped(&sai->sai_thread));
+	LASSERT(entry);
+
+	CDEBUG(D_READA, "sa_entry %.*s rc %d\n",
+	       entry->se_qstr.len, entry->se_qstr.name, rc);
+
+	if (rc) {
+		ll_intent_release(it);
+		iput(dir);
+		kfree(minfo);
+	} else {
+		/*
+		 * release ibits lock ASAP to avoid deadlock when statahead
 		 * thread enqueues lock on parent in readdir and another
 		 * process enqueues lock on child with parent lock held, eg.
 		 * unlink.
@@ -707,65 +692,32 @@ static int ll_statahead_interpret(struct ptlrpc_request *req,
 	}
 
 	spin_lock(&lli->lli_sa_lock);
-	/* stale entry */
-	if (unlikely(!lli->lli_sai ||
-		     lli->lli_sai->sai_generation != minfo->mi_generation)) {
-		spin_unlock(&lli->lli_sa_lock);
-		rc = -ESTALE;
-		goto out;
+	if (rc) {
+		wakeup = __sa_make_ready(sai, entry, rc);
 	} else {
-		sai = ll_sai_get(lli->lli_sai);
-		if (unlikely(!thread_is_running(&sai->sai_thread))) {
-			sai->sai_replied++;
-			spin_unlock(&lli->lli_sa_lock);
-			rc = -EBADFD;
-			goto out;
-		}
-
-		entry = ll_sa_entry_get_byindex(sai, minfo->mi_cbdata);
-		if (!entry) {
-			sai->sai_replied++;
-			spin_unlock(&lli->lli_sa_lock);
-			rc = -EIDRM;
-			goto out;
-		}
-
-		if (rc != 0) {
-			do_sa_entry_to_stated(sai, entry, SA_ENTRY_INVA);
-			wakeup = (entry->se_index == sai->sai_index_wait);
-		} else {
-			entry->se_minfo = minfo;
-			entry->se_req = ptlrpc_request_addref(req);
-			/* Release the async ibits lock ASAP to avoid deadlock
-			 * when statahead thread tries to enqueue lock on parent
-			 * for readpage and other tries to enqueue lock on child
-			 * with parent's lock held, for example: unlink.
-			 */
-			entry->se_handle = handle;
-			wakeup = list_empty(&sai->sai_entries_received);
-			list_add_tail(&entry->se_list,
-				      &sai->sai_entries_received);
-		}
-		sai->sai_replied++;
-		spin_unlock(&lli->lli_sa_lock);
-
-		ll_sa_entry_put(sai, entry);
-		if (wakeup)
-			wake_up(&sai->sai_thread.t_ctl_waitq);
+		entry->se_minfo = minfo;
+		entry->se_req = ptlrpc_request_addref(req);
+		/*
+		 * Release the async ibits lock ASAP to avoid deadlock
+		 * when statahead thread tries to enqueue lock on parent
+		 * for readpage and other tries to enqueue lock on child
+		 * with parent's lock held, for example: unlink.
+		 */
+		entry->se_handle = handle;
+		wakeup = !sa_has_callback(sai);
+		list_add_tail(&entry->se_list, &sai->sai_interim_entries);
 	}
+	sai->sai_replied++;
+
+	if (wakeup)
+		wake_up(&sai->sai_thread.t_ctl_waitq);
+	spin_unlock(&lli->lli_sa_lock);
 
-out:
-	if (rc != 0) {
-		ll_intent_release(it);
-		iput(dir);
-		kfree(minfo);
-	}
-	if (sai)
-		ll_sai_put(sai);
 	return rc;
 }
 
-static void sa_args_fini(struct md_enqueue_info *minfo,
+/* finish async stat RPC arguments */
+static void sa_fini_data(struct md_enqueue_info *minfo,
 			 struct ldlm_enqueue_info *einfo)
 {
 	LASSERT(minfo && einfo);
@@ -777,12 +729,11 @@ static void sa_args_fini(struct md_enqueue_info *minfo,
 /**
  * prepare arguments for async stat RPC.
  */
-static int sa_args_init(struct inode *dir, struct inode *child,
-			struct ll_sa_entry *entry, struct md_enqueue_info **pmi,
+static int sa_prep_data(struct inode *dir, struct inode *child,
+			struct sa_entry *entry, struct md_enqueue_info **pmi,
 			struct ldlm_enqueue_info **pei)
 {
 	const struct qstr      *qstr = &entry->se_qstr;
-	struct ll_inode_info     *lli  = ll_i2info(dir);
 	struct md_enqueue_info   *minfo;
 	struct ldlm_enqueue_info *einfo;
 	struct md_op_data	*op_data;
@@ -808,8 +759,7 @@ static int sa_args_init(struct inode *dir, struct inode *child,
 	minfo->mi_it.it_op = IT_GETATTR;
 	minfo->mi_dir = igrab(dir);
 	minfo->mi_cb = ll_statahead_interpret;
-	minfo->mi_generation = lli->lli_sai->sai_generation;
-	minfo->mi_cbdata = entry->se_index;
+	minfo->mi_cbdata = entry;
 
 	einfo->ei_type   = LDLM_IBITS;
 	einfo->ei_mode   = it_to_lock_mode(&minfo->mi_it);
@@ -824,31 +774,33 @@ static int sa_args_init(struct inode *dir, struct inode *child,
 	return 0;
 }
 
-static int do_sa_lookup(struct inode *dir, struct ll_sa_entry *entry)
+/* async stat for file not found in dcache */
+static int sa_lookup(struct inode *dir, struct sa_entry *entry)
 {
 	struct md_enqueue_info   *minfo;
 	struct ldlm_enqueue_info *einfo;
 	int		       rc;
 
-	rc = sa_args_init(dir, NULL, entry, &minfo, &einfo);
+	rc = sa_prep_data(dir, NULL, entry, &minfo, &einfo);
 	if (rc)
 		return rc;
 
 	rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
-	if (rc < 0)
-		sa_args_fini(minfo, einfo);
+	if (rc)
+		sa_fini_data(minfo, einfo);
 
 	return rc;
 }
 
 /**
- * similar to ll_revalidate_it().
- * \retval      1 -- dentry valid
- * \retval      0 -- will send stat-ahead request
- * \retval others -- prepare stat-ahead request failed
+ * async stat for file found in dcache, similar to .revalidate
+ *
+ * \retval	1 dentry valid, no RPC sent
+ * \retval	0 dentry invalid, will send async stat RPC
+ * \retval	negative number upon error
  */
-static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
-			    struct dentry *dentry)
+static int sa_revalidate(struct inode *dir, struct sa_entry *entry,
+			 struct dentry *dentry)
 {
 	struct inode	     *inode = d_inode(dentry);
 	struct lookup_intent      it = { .it_op = IT_GETATTR,
@@ -872,7 +824,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 		return 1;
 	}
 
-	rc = sa_args_init(dir, inode, entry, &minfo, &einfo);
+	rc = sa_prep_data(dir, inode, entry, &minfo, &einfo);
 	if (rc) {
 		entry->se_inode = NULL;
 		iput(inode);
@@ -880,56 +832,50 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 	}
 
 	rc = md_intent_getattr_async(ll_i2mdexp(dir), minfo, einfo);
-	if (rc < 0) {
+	if (rc) {
 		entry->se_inode = NULL;
 		iput(inode);
-		sa_args_fini(minfo, einfo);
+		sa_fini_data(minfo, einfo);
 	}
 
 	return rc;
 }
 
-static void ll_statahead_one(struct dentry *parent, const char *entry_name,
-			     int entry_name_len)
+/* async stat for file with @name */
+static void sa_statahead(struct dentry *parent, const char *name, int len)
 {
 	struct inode	     *dir    = d_inode(parent);
 	struct ll_inode_info     *lli    = ll_i2info(dir);
 	struct ll_statahead_info *sai    = lli->lli_sai;
 	struct dentry	    *dentry = NULL;
-	struct ll_sa_entry       *entry;
+	struct sa_entry *entry;
 	int		       rc;
-	int		       rc1;
 
-	entry = ll_sa_entry_alloc(parent, sai, sai->sai_index, entry_name,
-				  entry_name_len);
+	entry = sa_alloc(parent, sai, sai->sai_index, name, len);
 	if (IS_ERR(entry))
 		return;
 
 	dentry = d_lookup(parent, &entry->se_qstr);
 	if (!dentry) {
-		rc = do_sa_lookup(dir, entry);
+		rc = sa_lookup(dir, entry);
 	} else {
-		rc = do_sa_revalidate(dir, entry, dentry);
+		rc = sa_revalidate(dir, entry, dentry);
 		if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
 			ll_agl_add(sai, d_inode(dentry), entry->se_index);
+	}
 
+	if (dentry)
 		dput(dentry);
-	}
 
-	if (rc) {
-		rc1 = ll_sa_entry_to_stated(sai, entry,
-					rc < 0 ? SA_ENTRY_INVA : SA_ENTRY_SUCC);
-		if (rc1 == 0 && entry->se_index == sai->sai_index_wait)
-			wake_up(&sai->sai_waitq);
-	} else {
+	if (rc)
+		sa_make_ready(sai, entry, rc);
+	else
 		sai->sai_sent++;
-	}
 
 	sai->sai_index++;
-	/* drop one refcount on entry by ll_sa_entry_alloc */
-	ll_sa_entry_put(sai, entry);
 }
 
+/* async glimpse (agl) thread main function */
 static int ll_agl_thread(void *arg)
 {
 	struct dentry	    *parent = arg;
@@ -937,10 +883,12 @@ static int ll_agl_thread(void *arg)
 	struct ll_inode_info     *plli   = ll_i2info(dir);
 	struct ll_inode_info     *clli;
 	struct ll_sb_info	*sbi    = ll_i2sbi(dir);
-	struct ll_statahead_info *sai    = ll_sai_get(plli->lli_sai);
-	struct ptlrpc_thread     *thread = &sai->sai_agl_thread;
+	struct ll_statahead_info *sai;
+	struct ptlrpc_thread *thread;
 	struct l_wait_info	lwi    = { 0 };
 
+	sai = ll_sai_get(dir);
+	thread = &sai->sai_agl_thread;
 	thread->t_pid = current_pid();
 	CDEBUG(D_READA, "agl thread started: sai %p, parent %pd\n",
 	       sai, parent);
@@ -959,7 +907,7 @@ static int ll_agl_thread(void *arg)
 
 	while (1) {
 		l_wait_event(thread->t_ctl_waitq,
-			     !list_empty(&sai->sai_entries_agl) ||
+			     !list_empty(&sai->sai_agls) ||
 			     !thread_is_running(thread),
 			     &lwi);
 
@@ -970,8 +918,8 @@ static int ll_agl_thread(void *arg)
 		/* The statahead thread maybe help to process AGL entries,
 		 * so check whether list empty again.
 		 */
-		if (!list_empty(&sai->sai_entries_agl)) {
-			clli = list_entry(sai->sai_entries_agl.next,
+		if (!list_empty(&sai->sai_agls)) {
+			clli = list_entry(sai->sai_agls.next,
 					  struct ll_inode_info, lli_agl_list);
 			list_del_init(&clli->lli_agl_list);
 			spin_unlock(&plli->lli_agl_lock);
@@ -983,8 +931,8 @@ static int ll_agl_thread(void *arg)
 
 	spin_lock(&plli->lli_agl_lock);
 	sai->sai_agl_valid = 0;
-	while (!list_empty(&sai->sai_entries_agl)) {
-		clli = list_entry(sai->sai_entries_agl.next,
+	while (!list_empty(&sai->sai_agls)) {
+		clli = list_entry(sai->sai_agls.next,
 				  struct ll_inode_info, lli_agl_list);
 		list_del_init(&clli->lli_agl_list);
 		spin_unlock(&plli->lli_agl_lock);
@@ -1001,6 +949,7 @@ static int ll_agl_thread(void *arg)
 	return 0;
 }
 
+/* start agl thread */
 static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
 {
 	struct ptlrpc_thread *thread = &sai->sai_agl_thread;
@@ -1025,58 +974,71 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
 		     &lwi);
 }
 
+/* statahead thread main function */
 static int ll_statahead_thread(void *arg)
 {
 	struct dentry	    *parent = arg;
 	struct inode	     *dir    = d_inode(parent);
-	struct ll_inode_info     *plli   = ll_i2info(dir);
-	struct ll_inode_info     *clli;
+	struct ll_inode_info     *lli   = ll_i2info(dir);
 	struct ll_sb_info	*sbi    = ll_i2sbi(dir);
-	struct ll_statahead_info *sai    = ll_sai_get(plli->lli_sai);
-	struct ptlrpc_thread     *thread = &sai->sai_thread;
-	struct ptlrpc_thread *agl_thread = &sai->sai_agl_thread;
-	struct page	      *page;
+	struct ll_statahead_info *sai;
+	struct ptlrpc_thread *sa_thread;
+	struct ptlrpc_thread *agl_thread;
+	struct page	      *page = NULL;
 	__u64		     pos    = 0;
 	int		       first  = 0;
 	int		       rc     = 0;
-	struct ll_dir_chain       chain;
+	struct md_op_data *op_data;
 	struct l_wait_info	lwi    = { 0 };
 
-	thread->t_pid = current_pid();
+	sai = ll_sai_get(dir);
+	sa_thread = &sai->sai_thread;
+	agl_thread = &sai->sai_agl_thread;
+	sa_thread->t_pid = current_pid();
 	CDEBUG(D_READA, "statahead thread starting: sai %p, parent %pd\n",
 	       sai, parent);
 
+	op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
+				     LUSTRE_OPC_ANY, dir);
+	if (IS_ERR(op_data)) {
+		rc = PTR_ERR(op_data);
+		goto out;
+	}
+
+	op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
+
 	if (sbi->ll_flags & LL_SBI_AGL_ENABLED)
 		ll_start_agl(parent, sai);
 
 	atomic_inc(&sbi->ll_sa_total);
-	spin_lock(&plli->lli_sa_lock);
-	if (thread_is_init(thread))
+	spin_lock(&lli->lli_sa_lock);
+	if (thread_is_init(sa_thread))
 		/* If someone else has changed the thread state
 		 * (e.g. already changed to SVC_STOPPING), we can't just
 		 * blindly overwrite that setting.
 		 */
-		thread_set_flags(thread, SVC_RUNNING);
-	spin_unlock(&plli->lli_sa_lock);
-	wake_up(&thread->t_ctl_waitq);
-
-	ll_dir_chain_init(&chain);
-	page = ll_get_dir_page(dir, pos, &chain);
+		thread_set_flags(sa_thread, SVC_RUNNING);
+	spin_unlock(&lli->lli_sa_lock);
+	wake_up(&sa_thread->t_ctl_waitq);
 
-	while (1) {
+	while (pos != MDS_DIR_END_OFF && thread_is_running(sa_thread)) {
 		struct lu_dirpage *dp;
 		struct lu_dirent  *ent;
 
+		sai->sai_in_readpage = 1;
+		page = ll_get_dir_page(dir, op_data, pos);
+		sai->sai_in_readpage = 0;
 		if (IS_ERR(page)) {
 			rc = PTR_ERR(page);
-			CDEBUG(D_READA, "error reading dir "DFID" at %llu/%llu: [rc %d] [parent %u]\n",
+			CDEBUG(D_READA, "error reading dir "DFID" at %llu/%llu: opendir_pid = %u: rc = %d\n",
 			       PFID(ll_inode2fid(dir)), pos, sai->sai_index,
-			       rc, plli->lli_opendir_pid);
-			goto out;
+			       lli->lli_opendir_pid, rc);
+			break;
 		}
 
 		dp = page_address(page);
-		for (ent = lu_dirent_start(dp); ent;
+		for (ent = lu_dirent_start(dp);
+		     ent && thread_is_running(sa_thread) && !sa_low_hit(sai);
 		     ent = lu_dirent_next(ent)) {
 			__u64 hash;
 			int namelen;
@@ -1123,123 +1085,79 @@ static int ll_statahead_thread(void *arg)
 			if (unlikely(++first == 1))
 				continue;
 
-keep_it:
-			l_wait_event(thread->t_ctl_waitq,
-				     !sa_sent_full(sai) ||
-				     !list_empty(&sai->sai_entries_received) ||
-				     !list_empty(&sai->sai_entries_agl) ||
-				     !thread_is_running(thread),
-				     &lwi);
-
-interpret_it:
-			while (!list_empty(&sai->sai_entries_received))
-				ll_post_statahead(sai);
-
-			if (unlikely(!thread_is_running(thread))) {
-				ll_release_page(page, 0);
-				rc = 0;
-				goto out;
-			}
+			/* wait for spare statahead window */
+			do {
+				l_wait_event(sa_thread->t_ctl_waitq,
+					     !sa_sent_full(sai) ||
+					     sa_has_callback(sai) ||
+					     !list_empty(&sai->sai_agls) ||
+					     !thread_is_running(sa_thread),
+					     &lwi);
+				sa_handle_callback(sai);
 
-			/* If no window for metadata statahead, but there are
-			 * some AGL entries to be triggered, then try to help
-			 * to process the AGL entries.
-			 */
-			if (sa_sent_full(sai)) {
-				spin_lock(&plli->lli_agl_lock);
-				while (!list_empty(&sai->sai_entries_agl)) {
-					clli = list_entry(sai->sai_entries_agl.next,
+				spin_lock(&lli->lli_agl_lock);
+				while (sa_sent_full(sai) &&
+				       !agl_list_empty(sai)) {
+					struct ll_inode_info *clli;
+
+					clli = list_entry(sai->sai_agls.next,
 							  struct ll_inode_info, lli_agl_list);
 					list_del_init(&clli->lli_agl_list);
-					spin_unlock(&plli->lli_agl_lock);
+					spin_unlock(&lli->lli_agl_lock);
+
 					ll_agl_trigger(&clli->lli_vfs_inode,
 						       sai);
 
-					if (!list_empty(&sai->sai_entries_received))
-						goto interpret_it;
-
-					if (unlikely(
-						!thread_is_running(thread))) {
-						ll_release_page(page, 0);
-						rc = 0;
-						goto out;
-					}
-
-					if (!sa_sent_full(sai))
-						goto do_it;
-
-					spin_lock(&plli->lli_agl_lock);
+					spin_lock(&lli->lli_agl_lock);
 				}
-				spin_unlock(&plli->lli_agl_lock);
+				spin_unlock(&lli->lli_agl_lock);
+			} while (sa_sent_full(sai) &&
+				 thread_is_running(sa_thread));
 
-				goto keep_it;
-			}
-
-do_it:
-			ll_statahead_one(parent, name, namelen);
+			sa_statahead(parent, name, namelen);
 		}
-		pos = le64_to_cpu(dp->ldp_hash_end);
-		if (pos == MDS_DIR_END_OFF) {
-			/*
-			 * End of directory reached.
-			 */
-			ll_release_page(page, 0);
-			while (1) {
-				l_wait_event(thread->t_ctl_waitq,
-					     !list_empty(&sai->sai_entries_received) ||
-					     sai->sai_sent == sai->sai_replied ||
-					     !thread_is_running(thread),
-					     &lwi);
 
-				while (!list_empty(&sai->sai_entries_received))
-					ll_post_statahead(sai);
+		pos = le64_to_cpu(dp->ldp_hash_end);
+		ll_release_page(dir, page,
+				le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
 
-				if (unlikely(!thread_is_running(thread))) {
-					rc = 0;
-					goto out;
-				}
+		if (sa_low_hit(sai)) {
+			rc = -EFAULT;
+			atomic_inc(&sbi->ll_sa_wrong);
+			CDEBUG(D_READA, "Statahead for dir "DFID" hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread: pid %d\n",
+			       PFID(&lli->lli_fid), sai->sai_hit,
+			       sai->sai_miss, sai->sai_sent,
+			       sai->sai_replied, current_pid());
+			break;
+		}
+	}
+	ll_finish_md_op_data(op_data);
 
-				if (sai->sai_sent == sai->sai_replied &&
-				    list_empty(&sai->sai_entries_received))
-					break;
-			}
+	if (rc < 0) {
+		spin_lock(&lli->lli_sa_lock);
+		thread_set_flags(sa_thread, SVC_STOPPING);
+		lli->lli_sa_enabled = 0;
+		spin_unlock(&lli->lli_sa_lock);
+	}
 
-			spin_lock(&plli->lli_agl_lock);
-			while (!list_empty(&sai->sai_entries_agl) &&
-			       thread_is_running(thread)) {
-				clli = list_entry(sai->sai_entries_agl.next,
-						  struct ll_inode_info, lli_agl_list);
-				list_del_init(&clli->lli_agl_list);
-				spin_unlock(&plli->lli_agl_lock);
-				ll_agl_trigger(&clli->lli_vfs_inode, sai);
-				spin_lock(&plli->lli_agl_lock);
-			}
-			spin_unlock(&plli->lli_agl_lock);
+	/*
+	 * statahead is finished, but statahead entries need to be cached, wait
+	 * for file release to stop me.
+	 */
+	while (thread_is_running(sa_thread)) {
+		l_wait_event(sa_thread->t_ctl_waitq,
+			     sa_has_callback(sai) ||
+			     !agl_list_empty(sai) ||
+			     !thread_is_running(sa_thread),
+			     &lwi);
 
-			rc = 0;
-			goto out;
-		} else if (1) {
-			/*
-			 * chain is exhausted.
-			 * Normal case: continue to the next page.
-			 */
-			ll_release_page(page, le32_to_cpu(dp->ldp_flags) &
-					      LDF_COLLIDE);
-			page = ll_get_dir_page(dir, pos, &chain);
-		} else {
-			LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
-			ll_release_page(page, 1);
-			/*
-			 * go into overflow page.
-			 */
-		}
+		sa_handle_callback(sai);
 	}
-
 out:
 	if (sai->sai_agl_valid) {
-		spin_lock(&plli->lli_agl_lock);
+		spin_lock(&lli->lli_agl_lock);
 		thread_set_flags(agl_thread, SVC_STOPPING);
-		spin_unlock(&plli->lli_agl_lock);
+		spin_unlock(&lli->lli_agl_lock);
 		wake_up(&agl_thread->t_ctl_waitq);
 
 		CDEBUG(D_READA, "stop agl thread: sai %p pid %u\n",
@@ -1249,84 +1167,93 @@ out:
 			     &lwi);
 	} else {
 		/* Set agl_thread flags anyway. */
-		thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
+		thread_set_flags(agl_thread, SVC_STOPPED);
 	}
-	ll_dir_chain_fini(&chain);
-	spin_lock(&plli->lli_sa_lock);
-	if (!list_empty(&sai->sai_entries_received)) {
-		thread_set_flags(thread, SVC_STOPPING);
-		spin_unlock(&plli->lli_sa_lock);
-
-		/* To release the resources held by received entries. */
-		while (!list_empty(&sai->sai_entries_received))
-			ll_post_statahead(sai);
 
-		spin_lock(&plli->lli_sa_lock);
+	/*
+	 * wait for inflight statahead RPCs to finish, and then we can free sai
+	 * safely because statahead RPC will access sai data
+	 */
+	while (sai->sai_sent != sai->sai_replied) {
+		/* in case we're not woken up, timeout wait */
+		lwi = LWI_TIMEOUT(msecs_to_jiffies(MSEC_PER_SEC >> 3),
+				  NULL, NULL);
+		l_wait_event(sa_thread->t_ctl_waitq,
+			     sai->sai_sent == sai->sai_replied, &lwi);
 	}
-	thread_set_flags(thread, SVC_STOPPED);
-	spin_unlock(&plli->lli_sa_lock);
-	wake_up(&sai->sai_waitq);
-	wake_up(&thread->t_ctl_waitq);
-	ll_sai_put(sai);
-	dput(parent);
+
+	/* release resources held by statahead RPCs */
+	sa_handle_callback(sai);
+
+	spin_lock(&lli->lli_sa_lock);
+	thread_set_flags(sa_thread, SVC_STOPPED);
+	spin_unlock(&lli->lli_sa_lock);
+
 	CDEBUG(D_READA, "statahead thread stopped: sai %p, parent %pd\n",
 	       sai, parent);
+
+	wake_up(&sai->sai_waitq);
+	wake_up(&sa_thread->t_ctl_waitq);
+	ll_sai_put(sai);
+
 	return rc;
 }
 
-/**
- * called in ll_file_release().
- */
-void ll_stop_statahead(struct inode *dir, void *key)
+/* authorize opened dir handle @key to statahead */
+void ll_authorize_statahead(struct inode *dir, void *key)
 {
 	struct ll_inode_info *lli = ll_i2info(dir);
 
-	if (unlikely(!key))
-		return;
-
 	spin_lock(&lli->lli_sa_lock);
-	if (lli->lli_opendir_key != key || lli->lli_opendir_pid == 0) {
-		spin_unlock(&lli->lli_sa_lock);
-		return;
+	if (!lli->lli_opendir_key && !lli->lli_sai) {
+		/*
+		 * if lli_sai is not NULL, it means previous statahead is not
+		 * finished yet, we'd better not start a new statahead for now.
+		 */
+		LASSERT(!lli->lli_opendir_pid);
+		lli->lli_opendir_key = key;
+		lli->lli_opendir_pid = current_pid();
+		lli->lli_sa_enabled = 1;
 	}
+	spin_unlock(&lli->lli_sa_lock);
+}
 
-	lli->lli_opendir_key = NULL;
-
-	if (lli->lli_sai) {
-		struct l_wait_info lwi = { 0 };
-		struct ptlrpc_thread *thread = &lli->lli_sai->sai_thread;
+/*
+ * deauthorize opened dir handle @key to statahead, but statahead thread may
+ * still be running, notify it to quit.
+ */
+void ll_deauthorize_statahead(struct inode *dir, void *key)
+{
+	struct ll_inode_info *lli = ll_i2info(dir);
+	struct ll_statahead_info *sai;
 
-		if (!thread_is_stopped(thread)) {
-			thread_set_flags(thread, SVC_STOPPING);
-			spin_unlock(&lli->lli_sa_lock);
-			wake_up(&thread->t_ctl_waitq);
+	LASSERT(lli->lli_opendir_key == key);
+	LASSERT(lli->lli_opendir_pid);
 
-			CDEBUG(D_READA, "stop statahead thread: sai %p pid %u\n",
-			       lli->lli_sai, (unsigned int)thread->t_pid);
-			l_wait_event(thread->t_ctl_waitq,
-				     thread_is_stopped(thread),
-				     &lwi);
-		} else {
-			spin_unlock(&lli->lli_sa_lock);
-		}
+	CDEBUG(D_READA, "deauthorize statahead for "DFID"\n",
+	       PFID(&lli->lli_fid));
 
+	spin_lock(&lli->lli_sa_lock);
+	lli->lli_opendir_key = NULL;
+	lli->lli_opendir_pid = 0;
+	lli->lli_sa_enabled = 0;
+	sai = lli->lli_sai;
+	if (sai && thread_is_running(&sai->sai_thread)) {
 		/*
-		 * Put the ref which was held when first statahead_enter.
-		 * It maybe not the last ref for some statahead requests
-		 * maybe inflight.
+		 * statahead thread may not quit yet because it needs to cache
+		 * entries, now it's time to tell it to quit.
 		 */
-		ll_sai_put(lli->lli_sai);
-	} else {
-		lli->lli_opendir_pid = 0;
-		spin_unlock(&lli->lli_sa_lock);
+		thread_set_flags(&sai->sai_thread, SVC_STOPPING);
+		wake_up(&sai->sai_thread.t_ctl_waitq);
 	}
+	spin_unlock(&lli->lli_sa_lock);
 }
 
 enum {
 	/**
 	 * not first dirent, or is "."
 	 */
-	LS_NONE_FIRST_DE = 0,
+	LS_NOT_FIRST_DE = 0,
 	/**
 	 * the first non-hidden dirent
 	 */
@@ -1337,17 +1264,26 @@ enum {
 	LS_FIRST_DOT_DE
 };
 
+/* file is first dirent under @dir */
 static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 {
-	struct ll_dir_chain   chain;
 	const struct qstr  *target = &dentry->d_name;
+	struct md_op_data *op_data;
 	struct page	  *page;
 	__u64		 pos    = 0;
 	int		   dot_de;
-	int		   rc     = LS_NONE_FIRST_DE;
+	int rc = LS_NOT_FIRST_DE;
 
-	ll_dir_chain_init(&chain);
-	page = ll_get_dir_page(dir, pos, &chain);
+	op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
+				     LUSTRE_OPC_ANY, dir);
+	if (IS_ERR(op_data))
+		return PTR_ERR(op_data);
+	/**
+	 * FIXME choose the start offset of the readdir
+	 */
+	op_data->op_max_pages = ll_i2sbi(dir)->ll_md_brw_pages;
+
+	page = ll_get_dir_page(dir, op_data, pos);
 
 	while (1) {
 		struct lu_dirpage *dp;
@@ -1357,9 +1293,10 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 			struct ll_inode_info *lli = ll_i2info(dir);
 
 			rc = PTR_ERR(page);
-			CERROR("error reading dir "DFID" at %llu: [rc %d] [parent %u]\n",
+			CERROR("%s: error reading dir "DFID" at %llu: opendir_pid = %u : rc = %d\n",
+			       ll_get_fsname(dir->i_sb, NULL, 0),
 			       PFID(ll_inode2fid(dir)), pos,
-			       rc, lli->lli_opendir_pid);
+			       lli->lli_opendir_pid, rc);
 			break;
 		}
 
@@ -1411,13 +1348,13 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 
 			if (target->len != namelen ||
 			    memcmp(target->name, name, namelen) != 0)
-				rc = LS_NONE_FIRST_DE;
+				rc = LS_NOT_FIRST_DE;
 			else if (!dot_de)
 				rc = LS_FIRST_DE;
 			else
 				rc = LS_FIRST_DOT_DE;
 
-			ll_release_page(page, 0);
+			ll_release_page(dir, page, false);
 			goto out;
 		}
 		pos = le64_to_cpu(dp->ldp_hash_end);
@@ -1425,261 +1362,228 @@ static int is_first_dirent(struct inode *dir, struct dentry *dentry)
 			/*
 			 * End of directory reached.
 			 */
-			ll_release_page(page, 0);
-			break;
-		} else if (1) {
+			ll_release_page(dir, page, false);
+			goto out;
+		} else {
 			/*
 			 * chain is exhausted
 			 * Normal case: continue to the next page.
 			 */
-			ll_release_page(page, le32_to_cpu(dp->ldp_flags) &
-					      LDF_COLLIDE);
-			page = ll_get_dir_page(dir, pos, &chain);
-		} else {
-			/*
-			 * go into overflow page.
-			 */
-			LASSERT(le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
-			ll_release_page(page, 1);
+			ll_release_page(dir, page,
+					le32_to_cpu(dp->ldp_flags) &
+					LDF_COLLIDE);
+			page = ll_get_dir_page(dir, op_data, pos);
 		}
 	}
-
 out:
-	ll_dir_chain_fini(&chain);
+	ll_finish_md_op_data(op_data);
 	return rc;
 }
 
-static void
-ll_sai_unplug(struct ll_statahead_info *sai, struct ll_sa_entry *entry)
-{
-	struct ptlrpc_thread *thread = &sai->sai_thread;
-	struct ll_sb_info    *sbi    = ll_i2sbi(sai->sai_inode);
-	int		   hit;
-
-	if (entry && entry->se_stat == SA_ENTRY_SUCC)
-		hit = 1;
-	else
-		hit = 0;
-
-	ll_sa_entry_fini(sai, entry);
-	if (hit) {
-		sai->sai_hit++;
-		sai->sai_consecutive_miss = 0;
-		sai->sai_max = min(2 * sai->sai_max, sbi->ll_sa_max);
-	} else {
-		struct ll_inode_info *lli = ll_i2info(sai->sai_inode);
-
-		sai->sai_miss++;
-		sai->sai_consecutive_miss++;
-		if (sa_low_hit(sai) && thread_is_running(thread)) {
-			atomic_inc(&sbi->ll_sa_wrong);
-			CDEBUG(D_READA, "Statahead for dir " DFID " hit ratio too low: hit/miss %llu/%llu, sent/replied %llu/%llu, stopping statahead thread\n",
-			       PFID(&lli->lli_fid), sai->sai_hit,
-			       sai->sai_miss, sai->sai_sent,
-			       sai->sai_replied);
-			spin_lock(&lli->lli_sa_lock);
-			if (!thread_is_stopped(thread))
-				thread_set_flags(thread, SVC_STOPPING);
-			spin_unlock(&lli->lli_sa_lock);
-		}
-	}
-
-	if (!thread_is_stopped(thread))
-		wake_up(&thread->t_ctl_waitq);
-}
-
 /**
- * Start statahead thread if this is the first dir entry.
- * Otherwise if a thread is started already, wait it until it is ahead of me.
- * \retval 1       -- find entry with lock in cache, the caller needs to do
- *		    nothing.
- * \retval 0       -- find entry in cache, but without lock, the caller needs
- *		    refresh from MDS.
- * \retval others  -- the caller need to process as non-statahead.
+ * revalidate @dentryp from statahead cache
+ *
+ * \param[in]  dir	parent directory
+ * \param[in]  sai	sai structure
+ * \param[out] dentryp	pointer to dentry which will be revalidated
+ * \param[in]  unplug	unplug statahead window only (normally for negative
+ *			dentry)
+ * \retval		1 on success, dentry is saved in @dentryp
+ * \retval		0 if revalidation failed (no proper lock on client)
+ * \retval		negative number upon error
  */
-int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
-		       int only_unplug)
+static int revalidate_statahead_dentry(struct inode *dir,
+				       struct ll_statahead_info *sai,
+				       struct dentry **dentryp,
+				       bool unplug)
 {
-	struct ll_inode_info     *lli   = ll_i2info(dir);
-	struct ll_statahead_info *sai   = lli->lli_sai;
-	struct dentry	    *parent;
-	struct ll_sa_entry       *entry;
-	struct ptlrpc_thread     *thread;
-	struct l_wait_info	lwi   = { 0 };
-	struct task_struct *task;
-	int		       rc    = 0;
-	struct ll_inode_info     *plli;
+	struct sa_entry *entry = NULL;
+	struct l_wait_info lwi = { 0 };
+	struct ll_dentry_data *ldd;
+	struct ll_inode_info *lli;
+	int rc = 0;
 
-	LASSERT(lli->lli_opendir_pid == current_pid());
+	if ((*dentryp)->d_name.name[0] == '.') {
+		if (sai->sai_ls_all ||
+		    sai->sai_miss_hidden >= sai->sai_skip_hidden) {
+			/*
+			 * Hidden dentry is the first one, or statahead
+			 * thread does not skip so many hidden dentries
+			 * before "sai_ls_all" enabled as below.
+			 */
+		} else {
+			if (!sai->sai_ls_all)
+				/*
+				 * It maybe because hidden dentry is not
+				 * the first one, "sai_ls_all" was not
+				 * set, then "ls -al" missed. Enable
+				 * "sai_ls_all" for such case.
+				 */
+				sai->sai_ls_all = 1;
 
-	if (sai) {
-		thread = &sai->sai_thread;
-		if (unlikely(thread_is_stopped(thread) &&
-			     list_empty(&sai->sai_entries_stated))) {
-			/* to release resource */
-			ll_stop_statahead(dir, lli->lli_opendir_key);
+			/*
+			 * Such "getattr" has been skipped before
+			 * "sai_ls_all" enabled as above.
+			 */
+			sai->sai_miss_hidden++;
 			return -EAGAIN;
 		}
+	}
 
-		if ((*dentryp)->d_name.name[0] == '.') {
-			if (sai->sai_ls_all ||
-			    sai->sai_miss_hidden >= sai->sai_skip_hidden) {
-				/*
-				 * Hidden dentry is the first one, or statahead
-				 * thread does not skip so many hidden dentries
-				 * before "sai_ls_all" enabled as below.
-				 */
-			} else {
-				if (!sai->sai_ls_all)
-					/*
-					 * It maybe because hidden dentry is not
-					 * the first one, "sai_ls_all" was not
-					 * set, then "ls -al" missed. Enable
-					 * "sai_ls_all" for such case.
-					 */
-					sai->sai_ls_all = 1;
+	if (unplug) {
+		rc = 1;
+		goto out_unplug;
+	}
 
-				/*
-				 * Such "getattr" has been skipped before
-				 * "sai_ls_all" enabled as above.
-				 */
-				sai->sai_miss_hidden++;
-				return -EAGAIN;
-			}
-		}
+	entry = sa_get(sai, &(*dentryp)->d_name);
+	if (!entry) {
+		rc = -EAGAIN;
+		goto out_unplug;
+	}
 
-		entry = ll_sa_entry_get_byname(sai, &(*dentryp)->d_name);
-		if (!entry || only_unplug) {
-			ll_sai_unplug(sai, entry);
-			return entry ? 1 : -EAGAIN;
-		}
+	/* if statahead is busy in readdir, help it do post-work */
+	if (!sa_ready(entry) && sai->sai_in_readpage)
+		sa_handle_callback(sai);
 
-		if (!ll_sa_entry_stated(entry)) {
-			sai->sai_index_wait = entry->se_index;
-			lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL,
-					       LWI_ON_SIGNAL_NOOP, NULL);
-			rc = l_wait_event(sai->sai_waitq,
-					  ll_sa_entry_stated(entry) ||
-					  thread_is_stopped(thread),
-					  &lwi);
-			if (rc < 0) {
-				ll_sai_unplug(sai, entry);
-				return -EAGAIN;
-			}
+	if (!sa_ready(entry)) {
+		sai->sai_index_wait = entry->se_index;
+		lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(30), NULL,
+				       LWI_ON_SIGNAL_NOOP, NULL);
+		rc = l_wait_event(sai->sai_waitq, sa_ready(entry), &lwi);
+		if (rc < 0) {
+			/*
+			 * entry may not be ready, so it may be used by inflight
+			 * statahead RPC, don't free it.
+			 */
+			entry = NULL;
+			rc = -EAGAIN;
+			goto out_unplug;
 		}
+	}
 
-		if (entry->se_stat == SA_ENTRY_SUCC && entry->se_inode) {
-			struct inode *inode = entry->se_inode;
-			struct lookup_intent it = { .it_op = IT_GETATTR,
-						    .it_lock_handle =
-						     entry->se_handle };
-			__u64 bits;
-
-			rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
-						ll_inode2fid(inode), &bits);
-			if (rc == 1) {
-				if (!d_inode(*dentryp)) {
-					struct dentry *alias;
-
-					alias = ll_splice_alias(inode,
-								*dentryp);
-					if (IS_ERR(alias)) {
-						ll_sai_unplug(sai, entry);
-						return PTR_ERR(alias);
-					}
-					*dentryp = alias;
-				} else if (d_inode(*dentryp) != inode) {
-					/* revalidate, but inode is recreated */
-					CDEBUG(D_READA, "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
-					       ll_get_fsname(d_inode(*dentryp)->i_sb, NULL, 0),
-					       *dentryp,
-					       PFID(ll_inode2fid(d_inode(*dentryp))),
-					       PFID(ll_inode2fid(inode)));
-					ll_sai_unplug(sai, entry);
-					return -ESTALE;
-				} else {
-					iput(inode);
+	if (entry->se_state == SA_ENTRY_SUCC && entry->se_inode) {
+		struct inode *inode = entry->se_inode;
+		struct lookup_intent it = { .it_op = IT_GETATTR,
+					    .it_lock_handle = entry->se_handle };
+		__u64 bits;
+
+		rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
+					ll_inode2fid(inode), &bits);
+		if (rc == 1) {
+			if (!(*dentryp)->d_inode) {
+				struct dentry *alias;
+
+				alias = ll_splice_alias(inode, *dentryp);
+				if (IS_ERR(alias)) {
+					rc = PTR_ERR(alias);
+					goto out_unplug;
 				}
+				*dentryp = alias;
+				/**
+				 * statahead prepared this inode, transfer inode
+				 * refcount from sa_entry to dentry
+				 */
 				entry->se_inode = NULL;
-
-				if ((bits & MDS_INODELOCK_LOOKUP) &&
-				    d_lustre_invalid(*dentryp))
-					d_lustre_revalidate(*dentryp);
-				ll_intent_release(&it);
+			} else if ((*dentryp)->d_inode != inode) {
+				/* revalidate, but inode is recreated */
+				CDEBUG(D_READA,
+				       "%s: stale dentry %pd inode "DFID", statahead inode "DFID"\n",
+				       ll_get_fsname((*dentryp)->d_inode->i_sb,
+						     NULL, 0),
+				       *dentryp,
+				       PFID(ll_inode2fid((*dentryp)->d_inode)),
+				       PFID(ll_inode2fid(inode)));
+				rc = -ESTALE;
+				goto out_unplug;
 			}
-		}
 
-		ll_sai_unplug(sai, entry);
-		return rc;
+			if ((bits & MDS_INODELOCK_LOOKUP) &&
+			    d_lustre_invalid(*dentryp))
+				d_lustre_revalidate(*dentryp);
+			ll_intent_release(&it);
+		}
 	}
+out_unplug:
+	/*
+	 * statahead cached sa_entry can be used only once, and will be killed
+	 * right after use, so if lookup/revalidate accessed statahead cache,
+	 * set dentry ldd_sa_generation to parent lli_sa_generation, later if we
+	 * stat this file again, we know we've done statahead before, see
+	 * dentry_may_statahead().
+	 */
+	ldd = ll_d2d(*dentryp);
+	lli = ll_i2info(dir);
+	/* ldd can be NULL if llite lookup failed. */
+	if (ldd)
+		ldd->lld_sa_generation = lli->lli_sa_generation;
+	sa_put(sai, entry);
+	return rc;
+}
+
+/**
+ * start statahead thread
+ *
+ * \param[in] dir	parent directory
+ * \param[in] dentry	dentry that triggers statahead, normally the first
+ *			dirent under @dir
+ * \retval		-EAGAIN on success, because when this function is
+ *			called, it's already in lookup call, so client should
+ *			do it itself instead of waiting for statahead thread
+ *			to do it asynchronously.
+ * \retval		negative number upon error
+ */
+static int start_statahead_thread(struct inode *dir, struct dentry *dentry)
+{
+	struct ll_inode_info *lli = ll_i2info(dir);
+	struct ll_statahead_info *sai = NULL;
+	struct l_wait_info lwi = { 0 };
+	struct ptlrpc_thread *thread;
+	struct task_struct *task;
+	struct dentry *parent = dentry->d_parent;
+	int rc;
 
 	/* I am the "lli_opendir_pid" owner, only me can set "lli_sai". */
-	rc = is_first_dirent(dir, *dentryp);
-	if (rc == LS_NONE_FIRST_DE) {
+	rc = is_first_dirent(dir, dentry);
+	if (rc == LS_NOT_FIRST_DE) {
 		/* It is not "ls -{a}l" operation, no need statahead for it. */
-		rc = -EAGAIN;
+		rc = -EFAULT;
 		goto out;
 	}
 
-	sai = ll_sai_alloc();
+	sai = ll_sai_alloc(parent);
 	if (!sai) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
 	sai->sai_ls_all = (rc == LS_FIRST_DOT_DE);
-	sai->sai_inode = igrab(dir);
-	if (unlikely(!sai->sai_inode)) {
-		CWARN("Do not start stat ahead on dying inode "DFID"\n",
-		      PFID(&lli->lli_fid));
-		rc = -ESTALE;
-		goto out;
-	}
-
-	/* get parent reference count here, and put it in ll_statahead_thread */
-	parent = dget((*dentryp)->d_parent);
-	if (unlikely(sai->sai_inode != d_inode(parent))) {
-		struct ll_inode_info *nlli = ll_i2info(d_inode(parent));
-
-		CWARN("Race condition, someone changed %pd just now: old parent "DFID", new parent "DFID"\n",
-		      *dentryp,
-		      PFID(&lli->lli_fid), PFID(&nlli->lli_fid));
-		dput(parent);
-		iput(sai->sai_inode);
-		rc = -EAGAIN;
+	/*
+	 * if current lli_opendir_key was deauthorized, or dir re-opened by
+	 * another process, don't start statahead, otherwise the newly spawned
+	 * statahead thread won't be notified to quit.
+	 */
+	spin_lock(&lli->lli_sa_lock);
+	if (unlikely(lli->lli_sai || lli->lli_opendir_key ||
+		     lli->lli_opendir_pid != current->pid)) {
+		spin_unlock(&lli->lli_sa_lock);
+		rc = -EPERM;
 		goto out;
 	}
+	lli->lli_sai = sai;
+	spin_unlock(&lli->lli_sa_lock);
 
-	CDEBUG(D_READA, "start statahead thread: sai %p, parent %pd\n",
-	       sai, parent);
+	atomic_inc(&ll_i2sbi(parent->d_inode)->ll_sa_running);
 
-	/* The sai buffer already has one reference taken at allocation time,
-	 * but as soon as we expose the sai by attaching it to the lli that
-	 * default reference can be dropped by another thread calling
-	 * ll_stop_statahead. We need to take a local reference to protect
-	 * the sai buffer while we intend to access it.
-	 */
-	ll_sai_get(sai);
-	lli->lli_sai = sai;
+	CDEBUG(D_READA, "start statahead thread: [pid %d] [parent %pd]\n",
+	       current_pid(), parent);
 
-	plli = ll_i2info(d_inode(parent));
 	task = kthread_run(ll_statahead_thread, parent, "ll_sa_%u",
-			   plli->lli_opendir_pid);
+			   lli->lli_opendir_pid);
 	thread = &sai->sai_thread;
 	if (IS_ERR(task)) {
 		rc = PTR_ERR(task);
-		CERROR("can't start ll_sa thread, rc: %d\n", rc);
-		dput(parent);
-		lli->lli_opendir_key = NULL;
-		thread_set_flags(thread, SVC_STOPPED);
-		thread_set_flags(&sai->sai_agl_thread, SVC_STOPPED);
-		/* Drop both our own local reference and the default
-		 * reference from allocation time.
-		 */
-		ll_sai_put(sai);
-		ll_sai_put(sai);
-		LASSERT(!lli->lli_sai);
-		return -EAGAIN;
+		CERROR("can't start ll_sa thread, rc : %d\n", rc);
+		goto out;
 	}
 
 	l_wait_event(thread->t_ctl_waitq,
@@ -1694,10 +1598,47 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
 	return -EAGAIN;
 
 out:
-	kfree(sai);
+	/*
+	 * once we start statahead thread failed, disable statahead so
+	 * that subsequent stat won't waste time to try it.
+	 */
 	spin_lock(&lli->lli_sa_lock);
-	lli->lli_opendir_key = NULL;
-	lli->lli_opendir_pid = 0;
+	lli->lli_sa_enabled = 0;
+	lli->lli_sai = NULL;
 	spin_unlock(&lli->lli_sa_lock);
+	if (sai)
+		ll_sai_free(sai);
 	return rc;
 }
+
+/**
+ * statahead entry function, this is called when client getattr on a file, it
+ * will start statahead thread if this is the first dir entry, else revalidate
+ * dentry from statahead cache.
+ *
+ * \param[in]  dir	parent directory
+ * \param[out] dentryp	dentry to getattr
+ * \param[in]  unplug	unplug statahead window only (normally for negative
+ *			dentry)
+ * \retval		1 on success
+ * \retval		0 revalidation from statahead cache failed, caller needs
+ *			to getattr from server directly
+ * \retval		negative number on error, caller often ignores this and
+ *			then getattr from server
+ */
+int ll_statahead(struct inode *dir, struct dentry **dentryp, bool unplug)
+{
+	struct ll_statahead_info *sai;
+
+	sai = ll_sai_get(dir);
+	if (sai) {
+		int rc;
+
+		rc = revalidate_statahead_dentry(dir, sai, dentryp, unplug);
+		CDEBUG(D_READA, "revalidate statahead %pd: %d.\n",
+		       *dentryp, rc);
+		ll_sai_put(sai);
+		return rc;
+	}
+	return start_statahead_thread(dir, *dentryp);
+}
diff --git a/drivers/staging/lustre/lustre/llite/super25.c b/drivers/staging/lustre/lustre/llite/super25.c
index 3dd7e0eb0b54..106cd00910a7 100644
--- a/drivers/staging/lustre/lustre/llite/super25.c
+++ b/drivers/staging/lustre/lustre/llite/super25.c
@@ -34,7 +34,6 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include "../include/lustre_lite.h"
 #include "../include/lustre_ha.h"
 #include "../include/lustre_dlm.h"
 #include <linux/init.h>
@@ -83,8 +82,6 @@ struct super_operations lustre_super_operations = {
 };
 MODULE_ALIAS_FS("lustre");
 
-void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg));
-
 static int __init lustre_init(void)
 {
 	lnet_process_id_t lnet_id;
@@ -102,8 +99,8 @@ static int __init lustre_init(void)
 
 	rc = -ENOMEM;
 	ll_inode_cachep = kmem_cache_create("lustre_inode_cache",
-					    sizeof(struct ll_inode_info),
-					    0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT,
+					    sizeof(struct ll_inode_info), 0,
+					    SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
 					    NULL);
 	if (!ll_inode_cachep)
 		goto out_cache;
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index 8c8bdfe1ad71..f8bc7ed59646 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -35,7 +35,6 @@
 #include <linux/stat.h>
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
 
 static int ll_readlink_internal(struct inode *inode,
@@ -80,17 +79,17 @@ static int ll_readlink_internal(struct inode *inode,
 	}
 
 	body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
-	if ((body->valid & OBD_MD_LINKNAME) == 0) {
+	if ((body->mbo_valid & OBD_MD_LINKNAME) == 0) {
 		CERROR("OBD_MD_LINKNAME not set on reply\n");
 		rc = -EPROTO;
 		goto failed;
 	}
 
 	LASSERT(symlen != 0);
-	if (body->eadatasize != symlen) {
+	if (body->mbo_eadatasize != symlen) {
 		CERROR("%s: inode "DFID": symlink length %d not expected %d\n",
 		       ll_get_fsname(inode->i_sb, NULL, 0),
-		       PFID(ll_inode2fid(inode)), body->eadatasize - 1,
+		       PFID(ll_inode2fid(inode)), body->mbo_eadatasize - 1,
 		       symlen - 1);
 		rc = -EPROTO;
 		goto failed;
@@ -155,8 +154,8 @@ const struct inode_operations ll_fast_symlink_inode_operations = {
 	.get_link	= ll_get_link,
 	.getattr	= ll_getattr,
 	.permission	= ll_inode_permission,
-	.setxattr	= ll_setxattr,
-	.getxattr	= ll_getxattr,
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
 	.listxattr	= ll_listxattr,
-	.removexattr	= ll_removexattr,
+	.removexattr	= generic_removexattr,
 };
diff --git a/drivers/staging/lustre/lustre/llite/vvp_dev.c b/drivers/staging/lustre/lustre/llite/vvp_dev.c
index e623216e962d..8aa8ecc09a48 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_dev.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_dev.c
@@ -38,7 +38,6 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include "../include/obd.h"
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
 #include "vvp_internal.h"
 
@@ -368,12 +367,6 @@ int cl_sb_fini(struct super_block *sb)
 		CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
 		result = PTR_ERR(env);
 	}
-	/*
-	 * If mount failed (sbi->ll_cl == NULL), and this there are no other
-	 * mounts, stop device types manually (this usually happens
-	 * automatically when last device is destroyed).
-	 */
-	lu_types_stop();
 	return result;
 }
 
diff --git a/drivers/staging/lustre/lustre/llite/vvp_internal.h b/drivers/staging/lustre/lustre/llite/vvp_internal.h
index 79fc428461ed..5802da81cd0e 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_internal.h
+++ b/drivers/staging/lustre/lustre/llite/vvp_internal.h
@@ -217,11 +217,12 @@ struct vvp_object {
 	struct list_head	vob_pending_list;
 
 	/**
-	 * Access this counter is protected by inode->i_sem. Now that
-	 * the lifetime of transient pages must be covered by inode sem,
-	 * we don't need to hold any lock..
+	 * Number of transient pages.  This is no longer protected by i_sem,
+	 * and needs to be atomic.  This is not actually used for anything,
+	 * and can probably be removed.
 	 */
-	int			vob_transient_pages;
+	atomic_t		vob_transient_pages;
+
 	/**
 	 * Number of outstanding mmaps on this file.
 	 *
@@ -247,9 +248,9 @@ struct vvp_object {
  */
 struct vvp_page {
 	struct cl_page_slice vpg_cl;
-	int		  vpg_defer_uptodate;
-	int		  vpg_ra_used;
-	int		  vpg_write_queued;
+	unsigned int	vpg_defer_uptodate:1,
+			vpg_ra_used:1,
+			vpg_write_queued:1;
 	/**
 	 * Non-empty iff this page is already counted in
 	 * vvp_object::vob_pending_list. This list is only used as a flag,
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index 94916dcc6caa..2ab450359b6d 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -38,7 +38,6 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include "../include/obd.h"
-#include "../include/lustre_lite.h"
 
 #include "llite_internal.h"
 #include "vvp_internal.h"
@@ -628,7 +627,7 @@ static int vvp_io_setattr_time(const struct lu_env *env,
 		attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
 		valid |= CAT_MTIME;
 	}
-	result = cl_object_attr_set(env, obj, attr, valid);
+	result = cl_object_attr_update(env, obj, attr, valid);
 	cl_object_attr_unlock(obj);
 
 	return result;
@@ -821,7 +820,7 @@ static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
 	cl_page_disown(env, io, page);
 
 	/* held in ll_cl_init() */
-	lu_ref_del(&page->cp_reference, "cl_io", io);
+	lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
 	cl_page_put(env, page);
 }
 
@@ -959,10 +958,30 @@ static int vvp_io_write_start(const struct lu_env *env,
 
 	CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-	if (!vio->vui_iter) /* from a temp io in ll_cl_init(). */
+	if (!vio->vui_iter) {
+		/* from a temp io in ll_cl_init(). */
 		result = 0;
-	else
-		result = generic_file_write_iter(vio->vui_iocb, vio->vui_iter);
+	} else {
+		/*
+		 * When using the locked AIO function (generic_file_aio_write())
+		 * testing has shown the inode mutex to be a limiting factor
+		 * with multi-threaded single shared file performance. To get
+		 * around this, we now use the lockless version. To maintain
+		 * consistency, proper locking to protect against writes,
+		 * trucates, etc. is handled in the higher layers of lustre.
+		 */
+		bool lock_node = !IS_NOSEC(inode);
+
+		if (lock_node)
+			inode_lock(inode);
+		result = __generic_file_write_iter(vio->vui_iocb,
+						   vio->vui_iter);
+		if (lock_node)
+			inode_unlock(inode);
+
+		if (result > 0 || result == -EIOCBQUEUED)
+			result = generic_write_sync(vio->vui_iocb, result);
+	}
 
 	if (result > 0) {
 		result = vvp_io_write_commit(env, io);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_lock.c b/drivers/staging/lustre/lustre/llite/vvp_lock.c
index 64be0c9df35b..07eb26cc43f5 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_lock.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_lock.c
@@ -37,7 +37,6 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
 
 #include "vvp_internal.h"
 
diff --git a/drivers/staging/lustre/lustre/llite/vvp_object.c b/drivers/staging/lustre/lustre/llite/vvp_object.c
index 2c520b0bf6ca..b57195d15674 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_object.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_object.c
@@ -39,7 +39,6 @@
 #include "../../include/linux/libcfs/libcfs.h"
 
 #include "../include/obd.h"
-#include "../include/lustre_lite.h"
 
 #include "llite_internal.h"
 #include "vvp_internal.h"
@@ -68,8 +67,8 @@ static int vvp_object_print(const struct lu_env *env, void *cookie,
 
 	(*p)(env, cookie, "(%s %d %d) inode: %p ",
 	     list_empty(&obj->vob_pending_list) ? "-" : "+",
-	     obj->vob_transient_pages, atomic_read(&obj->vob_mmap_cnt),
-	     inode);
+	     atomic_read(&obj->vob_transient_pages),
+	     atomic_read(&obj->vob_mmap_cnt), inode);
 	if (inode) {
 		lli = ll_i2info(inode);
 		(*p)(env, cookie, "%lu/%u %o %u %d %p "DFID,
@@ -102,8 +101,8 @@ static int vvp_attr_get(const struct lu_env *env, struct cl_object *obj,
 	return 0; /* layers below have to fill in the rest */
 }
 
-static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
-			const struct cl_attr *attr, unsigned valid)
+static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj,
+			   const struct cl_attr *attr, unsigned int valid)
 {
 	struct inode *inode = vvp_object_inode(obj);
 
@@ -120,7 +119,7 @@ static int vvp_attr_set(const struct lu_env *env, struct cl_object *obj,
 	if (0 && valid & CAT_SIZE)
 		i_size_write(inode, attr->cat_size);
 	/* not currently necessary */
-	if (0 && valid & (CAT_UID|CAT_GID|CAT_SIZE))
+	if (0 && valid & (CAT_UID | CAT_GID | CAT_SIZE))
 		mark_inode_dirty(inode);
 	return 0;
 }
@@ -210,7 +209,7 @@ static const struct cl_object_operations vvp_ops = {
 	.coo_lock_init = vvp_lock_init,
 	.coo_io_init   = vvp_io_init,
 	.coo_attr_get  = vvp_attr_get,
-	.coo_attr_set  = vvp_attr_set,
+	.coo_attr_update = vvp_attr_update,
 	.coo_conf_set  = vvp_conf_set,
 	.coo_prune     = vvp_prune,
 	.coo_glimpse   = vvp_object_glimpse
@@ -221,7 +220,7 @@ static int vvp_object_init0(const struct lu_env *env,
 			    const struct cl_object_conf *conf)
 {
 	vob->vob_inode = conf->coc_inode;
-	vob->vob_transient_pages = 0;
+	atomic_set(&vob->vob_transient_pages, 0);
 	cl_object_page_init(&vob->vob_cl, sizeof(struct vvp_page));
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 2e566d90bb94..5d79efc1aafe 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -44,8 +44,6 @@
 #include <linux/page-flags.h>
 #include <linux/pagemap.h>
 
-#include "../include/lustre_lite.h"
-
 #include "llite_internal.h"
 #include "vvp_internal.h"
 
@@ -249,7 +247,7 @@ static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret
 			set_bit(AS_EIO, &inode->i_mapping->flags);
 
 		if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
-		     obj->vob_discard_page_warned == 0) {
+		    obj->vob_discard_page_warned == 0) {
 			obj->vob_discard_page_warned = 1;
 			ll_dirty_page_discard_warn(vmpage, ioret);
 		}
@@ -444,18 +442,10 @@ static int vvp_transient_page_prep(const struct lu_env *env,
 	return 0;
 }
 
-static void vvp_transient_page_verify(const struct cl_page *page)
-{
-	struct inode *inode = vvp_object_inode(page->cp_obj);
-
-	LASSERT(!inode_trylock(inode));
-}
-
 static int vvp_transient_page_own(const struct lu_env *env,
 				  const struct cl_page_slice *slice,
 				  struct cl_io *unused, int nonblock)
 {
-	vvp_transient_page_verify(slice->cpl_page);
 	return 0;
 }
 
@@ -463,21 +453,18 @@ static void vvp_transient_page_assume(const struct lu_env *env,
 				      const struct cl_page_slice *slice,
 				      struct cl_io *unused)
 {
-	vvp_transient_page_verify(slice->cpl_page);
 }
 
 static void vvp_transient_page_unassume(const struct lu_env *env,
 					const struct cl_page_slice *slice,
 					struct cl_io *unused)
 {
-	vvp_transient_page_verify(slice->cpl_page);
 }
 
 static void vvp_transient_page_disown(const struct lu_env *env,
 				      const struct cl_page_slice *slice,
 				      struct cl_io *unused)
 {
-	vvp_transient_page_verify(slice->cpl_page);
 }
 
 static void vvp_transient_page_discard(const struct lu_env *env,
@@ -486,8 +473,6 @@ static void vvp_transient_page_discard(const struct lu_env *env,
 {
 	struct cl_page *page = slice->cpl_page;
 
-	vvp_transient_page_verify(slice->cpl_page);
-
 	/*
 	 * For transient pages, remove it from the radix tree.
 	 */
@@ -511,7 +496,6 @@ vvp_transient_page_completion(const struct lu_env *env,
 			      const struct cl_page_slice *slice,
 			      int ioret)
 {
-	vvp_transient_page_verify(slice->cpl_page);
 }
 
 static void vvp_transient_page_fini(const struct lu_env *env,
@@ -522,8 +506,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
 	struct vvp_object *clobj = cl2vvp(clp->cp_obj);
 
 	vvp_page_fini_common(vpg);
-	LASSERT(!inode_trylock(clobj->vob_inode));
-	clobj->vob_transient_pages--;
+	atomic_dec(&clobj->vob_transient_pages);
 }
 
 static const struct cl_page_operations vvp_transient_page_ops = {
@@ -549,7 +532,7 @@ static const struct cl_page_operations vvp_transient_page_ops = {
 };
 
 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
-		struct cl_page *page, pgoff_t index)
+		  struct cl_page *page, pgoff_t index)
 {
 	struct vvp_page *vpg = cl_object_page_slice(obj, page);
 	struct page     *vmpage = page->cp_vmpage;
@@ -570,10 +553,9 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
 	} else {
 		struct vvp_object *clobj = cl2vvp(obj);
 
-		LASSERT(!inode_trylock(clobj->vob_inode));
 		cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
 				  &vvp_transient_page_ops);
-		clobj->vob_transient_pages++;
+		atomic_inc(&clobj->vob_transient_pages);
 	}
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/llite/vvp_req.c b/drivers/staging/lustre/lustre/llite/vvp_req.c
index 9fe9d6c0a7d1..e3f4c790d646 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_req.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_req.c
@@ -32,7 +32,6 @@
 #include "../include/cl_object.h"
 #include "../include/obd.h"
 #include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
 #include "llite_internal.h"
 #include "vvp_internal.h"
 
@@ -83,8 +82,10 @@ static void vvp_req_attr_set(const struct lu_env *env,
 	}
 	obdo_from_inode(oa, inode, valid_flags & flags);
 	obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
+	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
+		oa->o_parent_oid++;
 	memcpy(attr->cra_jobid, ll_i2info(inode)->lli_jobid,
-	       JOBSTATS_JOBID_SIZE);
+	       LUSTRE_JOBID_SIZE);
 }
 
 static void vvp_req_completion(const struct lu_env *env,
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c
index 98303cf85815..e070adb7a3cc 100644
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -38,21 +38,12 @@
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_dlm.h"
 #include "../include/lustre_ver.h"
 #include "../include/lustre_eacl.h"
 
 #include "llite_internal.h"
 
-#define XATTR_USER_T	    (1)
-#define XATTR_TRUSTED_T	 (2)
-#define XATTR_SECURITY_T	(3)
-#define XATTR_ACL_ACCESS_T      (4)
-#define XATTR_ACL_DEFAULT_T     (5)
-#define XATTR_LUSTRE_T	  (6)
-#define XATTR_OTHER_T	   (7)
-
 static
 int get_xattr_type(const char *name)
 {
@@ -99,46 +90,57 @@ int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type)
 	return 0;
 }
 
-static
-int ll_setxattr_common(struct inode *inode, const char *name,
-		       const void *value, size_t size,
-		       int flags, __u64 valid)
+static int
+ll_xattr_set_common(const struct xattr_handler *handler,
+		    struct dentry *dentry, struct inode *inode,
+		    const char *name, const void *value, size_t size,
+		    int flags)
 {
+	char fullname[strlen(handler->prefix) + strlen(name) + 1];
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct ptlrpc_request *req = NULL;
-	int xattr_type, rc;
 	const char *pv = value;
+	__u64 valid;
+	int rc;
+
+	if (flags == XATTR_REPLACE) {
+		ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
+		valid = OBD_MD_FLXATTRRM;
+	} else {
+		ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
+		valid = OBD_MD_FLXATTR;
+	}
 
-	xattr_type = get_xattr_type(name);
-	rc = xattr_type_filter(sbi, xattr_type);
+	rc = xattr_type_filter(sbi, handler->flags);
 	if (rc)
 		return rc;
 
-	if ((xattr_type == XATTR_ACL_ACCESS_T ||
-	     xattr_type == XATTR_ACL_DEFAULT_T) &&
+	if ((handler->flags == XATTR_ACL_ACCESS_T ||
+	     handler->flags == XATTR_ACL_DEFAULT_T) &&
 	    !inode_owner_or_capable(inode))
 		return -EPERM;
 
 	/* b10667: ignore lustre special xattr for now */
-	if ((xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) ||
-	    (xattr_type == XATTR_LUSTRE_T && strcmp(name, "lustre.lov") == 0))
+	if ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
+	    (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))
 		return 0;
 
 	/* b15587: ignore security.capability xattr for now */
-	if ((xattr_type == XATTR_SECURITY_T &&
-	     strcmp(name, "security.capability") == 0))
+	if ((handler->flags == XATTR_SECURITY_T &&
+	     !strcmp(name, "capability")))
 		return 0;
 
 	/* LU-549:  Disable security.selinux when selinux is disabled */
-	if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
-	    strcmp(name, "security.selinux") == 0)
+	if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
+	    strcmp(name, "selinux") == 0)
 		return -EOPNOTSUPP;
 
+	sprintf(fullname, "%s%s\n", handler->prefix, name);
 	rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode),
-			 valid, name, pv, size, 0, flags,
+			 valid, fullname, pv, size, 0, flags,
 			 ll_i2suppgid(inode), &req);
 	if (rc) {
-		if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+		if (rc == -EOPNOTSUPP && handler->flags == XATTR_USER_T) {
 			LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
 			sbi->ll_flags &= ~LL_SBI_USER_XATTR;
 		}
@@ -149,8 +151,10 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 	return 0;
 }
 
-int ll_setxattr(struct dentry *dentry, struct inode *inode,
-		const char *name, const void *value, size_t size, int flags)
+static int ll_xattr_set(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *value, size_t size,
+			int flags)
 {
 	LASSERT(inode);
 	LASSERT(name);
@@ -158,20 +162,24 @@ int ll_setxattr(struct dentry *dentry, struct inode *inode,
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
 	       PFID(ll_inode2fid(inode)), inode, name);
 
-	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
-
-	if ((strncmp(name, XATTR_TRUSTED_PREFIX,
-		     sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
-	     strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
-	    (strncmp(name, XATTR_LUSTRE_PREFIX,
-		     sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
-	     strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
+	if (!strcmp(name, "lov")) {
 		struct lov_user_md *lump = (struct lov_user_md *)value;
+		int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
+						       LPROC_LL_SETXATTR;
 		int rc = 0;
 
+		ll_stats_ops_tally(ll_i2sbi(inode), op_type, 1);
+
 		if (size != 0 && size < sizeof(struct lov_user_md))
 			return -EINVAL;
 
+		/*
+		 * It is possible to set an xattr to a "" value of zero size.
+		 * For this case we are going to treat it as a removal.
+		 */
+		if (!size && lump)
+			lump = NULL;
+
 		/* Attributes that are saved via getxattr will always have
 		 * the stripe_offset as 0.  Instead, the MDS should be
 		 * allowed to pick the starting OST index.   b=17846
@@ -181,12 +189,15 @@ int ll_setxattr(struct dentry *dentry, struct inode *inode,
 
 		if (lump && S_ISREG(inode->i_mode)) {
 			__u64 it_flags = FMODE_WRITE;
-			int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
-				sizeof(*lump) : sizeof(struct lov_user_md_v3);
+			int lum_size;
+
+			lum_size = ll_lov_user_md_size(lump);
+			if (lum_size < 0 || size < lum_size)
+				return 0; /* b=10667: ignore error */
 
 			rc = ll_lov_setstripe_ea_info(inode, dentry, it_flags,
 						      lump, lum_size);
-			/* b10667: rc always be 0 here for now */
+			/* b=10667: rc always be 0 here for now */
 			rc = 0;
 		} else if (S_ISDIR(inode->i_mode)) {
 			rc = ll_dir_setstripe(inode, lump, 0);
@@ -194,92 +205,27 @@ int ll_setxattr(struct dentry *dentry, struct inode *inode,
 
 		return rc;
 
-	} else if (strcmp(name, XATTR_NAME_LMA) == 0 ||
-		   strcmp(name, XATTR_NAME_LINK) == 0)
+	} else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
+		ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);
 		return 0;
+	}
 
-	return ll_setxattr_common(inode, name, value, size, flags,
-				  OBD_MD_FLXATTR);
-}
-
-int ll_removexattr(struct dentry *dentry, const char *name)
-{
-	struct inode *inode = d_inode(dentry);
-
-	LASSERT(inode);
-	LASSERT(name);
-
-	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
-	       PFID(ll_inode2fid(inode)), inode, name);
-
-	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
-	return ll_setxattr_common(inode, name, NULL, 0, 0,
-				  OBD_MD_FLXATTRRM);
+	return ll_xattr_set_common(handler, dentry, inode, name, value, size,
+				   flags);
 }
 
-static
-int ll_getxattr_common(struct inode *inode, const char *name,
-		       void *buffer, size_t size, __u64 valid)
+int
+ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
+	      size_t size, __u64 valid)
 {
+	struct ll_inode_info *lli = ll_i2info(inode);
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct ptlrpc_request *req = NULL;
 	struct mdt_body *body;
-	int xattr_type, rc;
 	void *xdata;
-	struct ll_inode_info *lli = ll_i2info(inode);
-
-	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
-	       PFID(ll_inode2fid(inode)), inode);
-
-	/* listxattr have slightly different behavior from of ext3:
-	 * without 'user_xattr' ext3 will list all xattr names but
-	 * filtered out "^user..*"; we list them all for simplicity.
-	 */
-	if (!name) {
-		xattr_type = XATTR_OTHER_T;
-		goto do_getxattr;
-	}
+	int rc;
 
-	xattr_type = get_xattr_type(name);
-	rc = xattr_type_filter(sbi, xattr_type);
-	if (rc)
-		return rc;
-
-	/* b15587: ignore security.capability xattr for now */
-	if ((xattr_type == XATTR_SECURITY_T &&
-	     strcmp(name, "security.capability") == 0))
-		return -ENODATA;
-
-	/* LU-549:  Disable security.selinux when selinux is disabled */
-	if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
-	    strcmp(name, "security.selinux") == 0)
-		return -EOPNOTSUPP;
-
-#ifdef CONFIG_FS_POSIX_ACL
-	/* posix acl is under protection of LOOKUP lock. when calling to this,
-	 * we just have path resolution to the target inode, so we have great
-	 * chance that cached ACL is uptodate.
-	 */
-	if (xattr_type == XATTR_ACL_ACCESS_T) {
-		struct posix_acl *acl;
-
-		spin_lock(&lli->lli_lock);
-		acl = posix_acl_dup(lli->lli_posix_acl);
-		spin_unlock(&lli->lli_lock);
-
-		if (!acl)
-			return -ENODATA;
-
-		rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
-		posix_acl_release(acl);
-		return rc;
-	}
-	if (xattr_type == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
-		return -ENODATA;
-#endif
-
-do_getxattr:
-	if (sbi->ll_xattr_cache_enabled && xattr_type != XATTR_ACL_ACCESS_T) {
+	if (sbi->ll_xattr_cache_enabled && type != XATTR_ACL_ACCESS_T) {
 		rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
 		if (rc == -EAGAIN)
 			goto getxattr_nocache;
@@ -311,36 +257,36 @@ getxattr_nocache:
 
 		/* only detect the xattr size */
 		if (size == 0) {
-			rc = body->eadatasize;
+			rc = body->mbo_eadatasize;
 			goto out;
 		}
 
-		if (size < body->eadatasize) {
+		if (size < body->mbo_eadatasize) {
 			CERROR("server bug: replied size %u > %u\n",
-			       body->eadatasize, (int)size);
+			       body->mbo_eadatasize, (int)size);
 			rc = -ERANGE;
 			goto out;
 		}
 
-		if (body->eadatasize == 0) {
+		if (body->mbo_eadatasize == 0) {
 			rc = -ENODATA;
 			goto out;
 		}
 
 		/* do not need swab xattr data */
 		xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
-						     body->eadatasize);
+						     body->mbo_eadatasize);
 		if (!xdata) {
 			rc = -EFAULT;
 			goto out;
 		}
 
-		memcpy(buffer, xdata, body->eadatasize);
-		rc = body->eadatasize;
+		memcpy(buffer, xdata, body->mbo_eadatasize);
+		rc = body->mbo_eadatasize;
 	}
 
 out_xattr:
-	if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
+	if (rc == -EOPNOTSUPP && type == XATTR_USER_T) {
 		LCONSOLE_INFO(
 			"%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
 			ll_get_fsname(inode->i_sb, NULL, 0), rc);
@@ -351,8 +297,65 @@ out:
 	return rc;
 }
 
-ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
-		    const char *name, void *buffer, size_t size)
+static int ll_xattr_get_common(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, void *buffer, size_t size)
+{
+	char fullname[strlen(handler->prefix) + strlen(name) + 1];
+	struct ll_sb_info *sbi = ll_i2sbi(inode);
+#ifdef CONFIG_FS_POSIX_ACL
+	struct ll_inode_info *lli = ll_i2info(inode);
+#endif
+	int rc;
+
+	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
+	       PFID(ll_inode2fid(inode)), inode);
+
+	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
+
+	rc = xattr_type_filter(sbi, handler->flags);
+	if (rc)
+		return rc;
+
+	/* b15587: ignore security.capability xattr for now */
+	if ((handler->flags == XATTR_SECURITY_T && !strcmp(name, "capability")))
+		return -ENODATA;
+
+	/* LU-549:  Disable security.selinux when selinux is disabled */
+	if (handler->flags == XATTR_SECURITY_T && !selinux_is_enabled() &&
+	    !strcmp(name, "selinux"))
+		return -EOPNOTSUPP;
+
+#ifdef CONFIG_FS_POSIX_ACL
+	/* posix acl is under protection of LOOKUP lock. when calling to this,
+	 * we just have path resolution to the target inode, so we have great
+	 * chance that cached ACL is uptodate.
+	 */
+	if (handler->flags == XATTR_ACL_ACCESS_T) {
+		struct posix_acl *acl;
+
+		spin_lock(&lli->lli_lock);
+		acl = posix_acl_dup(lli->lli_posix_acl);
+		spin_unlock(&lli->lli_lock);
+
+		if (!acl)
+			return -ENODATA;
+
+		rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+		posix_acl_release(acl);
+		return rc;
+	}
+	if (handler->flags == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
+		return -ENODATA;
+#endif
+	sprintf(fullname, "%s%s\n", handler->prefix, name);
+	return ll_xattr_list(inode, fullname, handler->flags, buffer, size,
+			     OBD_MD_FLXATTR);
+}
+
+static int ll_xattr_get(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, void *buffer, size_t size)
 {
 	LASSERT(inode);
 	LASSERT(name);
@@ -360,36 +363,23 @@ ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode,
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
 	       PFID(ll_inode2fid(inode)), inode, name);
 
-	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
-
-	if ((strncmp(name, XATTR_TRUSTED_PREFIX,
-		     sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
-	     strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
-	    (strncmp(name, XATTR_LUSTRE_PREFIX,
-		     sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
-	     strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
+	if (!strcmp(name, "lov")) {
 		struct lov_stripe_md *lsm;
 		struct lov_user_md *lump;
 		struct lov_mds_md *lmm = NULL;
 		struct ptlrpc_request *request = NULL;
 		int rc = 0, lmmsize = 0;
 
+		ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);
+
 		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
 			return -ENODATA;
 
-		if (size == 0 && S_ISDIR(inode->i_mode)) {
-			/* XXX directory EA is fix for now, optimize to save
-			 * RPC transfer
-			 */
-			rc = sizeof(struct lov_user_md);
-			goto out;
-		}
-
 		lsm = ccc_inode_lsm_get(inode);
 		if (!lsm) {
 			if (S_ISDIR(inode->i_mode)) {
-				rc = ll_dir_getstripe(inode, &lmm,
-						      &lmmsize, &request);
+				rc = ll_dir_getstripe(inode, (void **)&lmm,
+						      &lmmsize, &request, 0);
 			} else {
 				rc = -ENODATA;
 			}
@@ -439,7 +429,7 @@ out:
 		return rc;
 	}
 
-	return ll_getxattr_common(inode, name, buffer, size, OBD_MD_FLXATTR);
+	return ll_xattr_get_common(handler, dentry, inode, name, buffer, size);
 }
 
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -457,7 +447,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 	ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);
 
-	rc = ll_getxattr_common(inode, NULL, buffer, size, OBD_MD_FLXATTRLS);
+	rc = ll_xattr_list(inode, NULL, XATTR_OTHER_T, buffer, size,
+			   OBD_MD_FLXATTRLS);
 	if (rc < 0)
 		goto out;
 
@@ -488,7 +479,8 @@ ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		if (!ll_i2info(inode)->lli_has_smd)
 			rc2 = -1;
 	} else if (S_ISDIR(inode->i_mode)) {
-		rc2 = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
+		rc2 = ll_dir_getstripe(inode, (void **)&lmm, &lmmsize,
+				       &request, 0);
 	}
 
 	if (rc2 < 0) {
@@ -518,3 +510,57 @@ out:
 
 	return rc;
 }
+
+static const struct xattr_handler ll_user_xattr_handler = {
+	.prefix = XATTR_USER_PREFIX,
+	.flags = XATTR_USER_T,
+	.get = ll_xattr_get_common,
+	.set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_trusted_xattr_handler = {
+	.prefix = XATTR_TRUSTED_PREFIX,
+	.flags = XATTR_TRUSTED_T,
+	.get = ll_xattr_get,
+	.set = ll_xattr_set,
+};
+
+static const struct xattr_handler ll_security_xattr_handler = {
+	.prefix = XATTR_SECURITY_PREFIX,
+	.flags = XATTR_SECURITY_T,
+	.get = ll_xattr_get_common,
+	.set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_acl_access_xattr_handler = {
+	.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
+	.flags = XATTR_ACL_ACCESS_T,
+	.get = ll_xattr_get_common,
+	.set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_acl_default_xattr_handler = {
+	.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
+	.flags = XATTR_ACL_DEFAULT_T,
+	.get = ll_xattr_get_common,
+	.set = ll_xattr_set_common,
+};
+
+static const struct xattr_handler ll_lustre_xattr_handler = {
+	.prefix = XATTR_LUSTRE_PREFIX,
+	.flags = XATTR_LUSTRE_T,
+	.get = ll_xattr_get,
+	.set = ll_xattr_set,
+};
+
+const struct xattr_handler *ll_xattr_handlers[] = {
+	&ll_user_xattr_handler,
+	&ll_trusted_xattr_handler,
+	&ll_security_xattr_handler,
+#ifdef CONFIG_FS_POSIX_ACL
+	&ll_acl_access_xattr_handler,
+	&ll_acl_default_xattr_handler,
+#endif
+	&ll_lustre_xattr_handler,
+	NULL,
+};
diff --git a/drivers/staging/lustre/lustre/llite/xattr_cache.c b/drivers/staging/lustre/lustre/llite/xattr_cache.c
index 8089da8143d9..50a19a40bd4e 100644
--- a/drivers/staging/lustre/lustre/llite/xattr_cache.c
+++ b/drivers/staging/lustre/lustre/llite/xattr_cache.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include "../include/obd_support.h"
-#include "../include/lustre_lite.h"
 #include "../include/lustre_dlm.h"
 #include "../include/lustre_ver.h"
 #include "llite_internal.h"
@@ -270,10 +269,12 @@ static int ll_xattr_find_get_lock(struct inode *inode,
 	struct lustre_handle lockh = { 0 };
 	struct md_op_data *op_data;
 	struct ll_inode_info *lli = ll_i2info(inode);
-	struct ldlm_enqueue_info einfo = { .ei_type = LDLM_IBITS,
-					   .ei_mode = it_to_lock_mode(oit),
-					   .ei_cb_bl = ll_md_blocking_ast,
-					   .ei_cb_cp = ldlm_completion_ast };
+	struct ldlm_enqueue_info einfo = {
+		.ei_type = LDLM_IBITS,
+		.ei_mode = it_to_lock_mode(oit),
+		.ei_cb_bl = &ll_md_blocking_ast,
+		.ei_cb_cp = &ldlm_completion_ast,
+	};
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct obd_export *exp = sbi->ll_md_exp;
 	int rc;
@@ -304,7 +305,7 @@ static int ll_xattr_find_get_lock(struct inode *inode,
 
 	op_data->op_valid = OBD_MD_FLXATTR | OBD_MD_FLXATTRLS;
 
-	rc = md_enqueue(exp, &einfo, oit, op_data, &lockh, NULL, 0, NULL, 0);
+	rc = md_enqueue(exp, &einfo, NULL, oit, op_data, &lockh, 0);
 	ll_finish_md_op_data(op_data);
 
 	if (rc < 0) {
@@ -380,25 +381,25 @@ static int ll_xattr_cache_refill(struct inode *inode, struct lookup_intent *oit)
 	}
 	/* do not need swab xattr data */
 	xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
-					     body->eadatasize);
+					     body->mbo_eadatasize);
 	xval = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS,
-					    body->aclsize);
+					    body->mbo_aclsize);
 	xsizes = req_capsule_server_sized_get(&req->rq_pill, &RMF_EAVALS_LENS,
-					      body->max_mdsize * sizeof(__u32));
+					      body->mbo_max_mdsize * sizeof(__u32));
 	if (!xdata || !xval || !xsizes) {
 		CERROR("wrong setxattr reply\n");
 		rc = -EPROTO;
 		goto out_destroy;
 	}
 
-	xtail = xdata + body->eadatasize;
-	xvtail = xval + body->aclsize;
+	xtail = xdata + body->mbo_eadatasize;
+	xvtail = xval + body->mbo_aclsize;
 
 	CDEBUG(D_CACHE, "caching: xdata=%p xtail=%p\n", xdata, xtail);
 
 	ll_xattr_cache_init(lli);
 
-	for (i = 0; i < body->max_mdsize; i++) {
+	for (i = 0; i < body->mbo_max_mdsize; i++) {
 		CDEBUG(D_CACHE, "caching [%s]=%.*s\n", xdata, *xsizes, xval);
 		/* Perform consistency checks: attr names and vals in pill */
 		if (!memchr(xdata, 0, xtail - xdata)) {
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_fld.c b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
index a3d170aa6fd2..a5265f9b5797 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_fld.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_fld.c
@@ -47,18 +47,20 @@
 #include "../include/lprocfs_status.h"
 #include "lmv_internal.h"
 
-int lmv_fld_lookup(struct lmv_obd *lmv,
-		   const struct lu_fid *fid,
-		   u32 *mds)
+int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds)
 {
+	struct obd_device *obd = lmv2obd_dev(lmv);
 	int rc;
 
-	/* FIXME: Currently ZFS still use local seq for ROOT unfortunately, and
+	/*
+	 * FIXME: Currently ZFS still use local seq for ROOT unfortunately, and
 	 * this fid_is_local check should be removed once LU-2240 is fixed
 	 */
-	LASSERTF((fid_seq_in_fldb(fid_seq(fid)) ||
-		  fid_seq_is_local_file(fid_seq(fid))) &&
-		 fid_is_sane(fid), DFID" is insane!\n", PFID(fid));
+	if (!fid_is_sane(fid) || !(fid_seq_in_fldb(fid_seq(fid)) ||
+				   fid_seq_is_local_file(fid_seq(fid)))) {
+		CERROR("%s: invalid FID " DFID "\n", obd->obd_name, PFID(fid));
+		return -EINVAL;
+	}
 
 	rc = fld_client_lookup(&lmv->lmv_fld, fid_seq(fid), mds,
 			       LU_SEQ_RANGE_MDT, NULL);
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
index 2f58fdab8d1e..9f4e826bb0af 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c
@@ -43,13 +43,13 @@
 #include "../include/lustre_lib.h"
 #include "../include/lustre_net.h"
 #include "../include/lustre_dlm.h"
+#include "../include/lustre_mdc.h"
 #include "../include/obd_class.h"
 #include "../include/lprocfs_status.h"
 #include "lmv_internal.h"
 
-static int lmv_intent_remote(struct obd_export *exp, void *lmm,
-			     int lmmsize, struct lookup_intent *it,
-			     const struct lu_fid *parent_fid, int flags,
+static int lmv_intent_remote(struct obd_export *exp, struct lookup_intent *it,
+			     const struct lu_fid *parent_fid,
 			     struct ptlrpc_request **reqp,
 			     ldlm_blocking_callback cb_blocking,
 			     __u64 extra_lock_flags)
@@ -68,7 +68,7 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 	if (!body)
 		return -EPROTO;
 
-	LASSERT((body->valid & OBD_MD_MDS));
+	LASSERT((body->mbo_valid & OBD_MD_MDS));
 
 	/*
 	 * Unfortunately, we have to lie to MDC/MDS to retrieve
@@ -87,9 +87,9 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 		it->it_request = NULL;
 	}
 
-	LASSERT(fid_is_sane(&body->fid1));
+	LASSERT(fid_is_sane(&body->mbo_fid1));
 
-	tgt = lmv_find_target(lmv, &body->fid1);
+	tgt = lmv_find_target(lmv, &body->mbo_fid1);
 	if (IS_ERR(tgt)) {
 		rc = PTR_ERR(tgt);
 		goto out;
@@ -101,7 +101,7 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 		goto out;
 	}
 
-	op_data->op_fid1 = body->fid1;
+	op_data->op_fid1 = body->mbo_fid1;
 	/* Sent the parent FID to the remote MDT */
 	if (parent_fid) {
 		/* The parent fid is only for remote open to
@@ -110,18 +110,14 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 		 */
 		LASSERT(it->it_op & IT_OPEN);
 		op_data->op_fid2 = *parent_fid;
-		/* Add object FID to op_fid3, in case it needs to check stale
-		 * (M_CHECK_STALE), see mdc_finish_intent_lock
-		 */
-		op_data->op_fid3 = body->fid1;
 	}
 
 	op_data->op_bias = MDS_CROSS_REF;
-	CDEBUG(D_INODE, "REMOTE_INTENT with fid="DFID" -> mds #%d\n",
-	       PFID(&body->fid1), tgt->ltd_idx);
+	CDEBUG(D_INODE, "REMOTE_INTENT with fid=" DFID " -> mds #%u\n",
+	       PFID(&body->mbo_fid1), tgt->ltd_idx);
 
-	rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
-			    flags, &req, cb_blocking, extra_lock_flags);
+	rc = md_intent_lock(tgt->ltd_exp, op_data, it, &req, cb_blocking,
+			    extra_lock_flags);
 	if (rc)
 		goto out_free_op_data;
 
@@ -136,8 +132,10 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm,
 		it->it_remote_lock_mode = it->it_lock_mode;
 	}
 
-	it->it_lock_handle = plock.cookie;
-	it->it_lock_mode = pmode;
+	if (pmode) {
+		it->it_lock_handle = plock.cookie;
+		it->it_lock_mode = pmode;
+	}
 
 out_free_op_data:
 	kfree(op_data);
@@ -150,13 +148,126 @@ out:
 	return rc;
 }
 
+int lmv_revalidate_slaves(struct obd_export *exp,
+			  const struct lmv_stripe_md *lsm,
+			  ldlm_blocking_callback cb_blocking,
+			  int extra_lock_flags)
+{
+	struct obd_device *obd = exp->exp_obd;
+	struct lmv_obd *lmv = &obd->u.lmv;
+	struct ptlrpc_request *req = NULL;
+	struct mdt_body *body;
+	struct md_op_data *op_data;
+	int rc = 0, i;
+
+	/**
+	 * revalidate slaves has some problems, temporarily return,
+	 * we may not need that
+	 */
+	op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+	if (!op_data)
+		return -ENOMEM;
+
+	/**
+	 * Loop over the stripe information, check validity and update them
+	 * from MDS if needed.
+	 */
+	for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+		struct lookup_intent it = { .it_op = IT_GETATTR };
+		struct lustre_handle *lockh = NULL;
+		struct lmv_tgt_desc *tgt = NULL;
+		struct inode *inode;
+		struct lu_fid fid;
+
+		fid = lsm->lsm_md_oinfo[i].lmo_fid;
+		inode = lsm->lsm_md_oinfo[i].lmo_root;
+
+		/*
+		 * Prepare op_data for revalidating. Note that @fid2 shluld be
+		 * defined otherwise it will go to server and take new lock
+		 * which is not needed here.
+		 */
+		memset(op_data, 0, sizeof(*op_data));
+		op_data->op_fid1 = fid;
+		op_data->op_fid2 = fid;
+
+		tgt = lmv_locate_mds(lmv, op_data, &fid);
+		if (IS_ERR(tgt)) {
+			rc = PTR_ERR(tgt);
+			goto cleanup;
+		}
+
+		CDEBUG(D_INODE, "Revalidate slave " DFID " -> mds #%u\n",
+		       PFID(&fid), tgt->ltd_idx);
+
+		if (req) {
+			ptlrpc_req_finished(req);
+			req = NULL;
+		}
+
+		rc = md_intent_lock(tgt->ltd_exp, op_data, &it, &req,
+				    cb_blocking, extra_lock_flags);
+		if (rc < 0)
+			goto cleanup;
+
+		lockh = (struct lustre_handle *)&it.it_lock_handle;
+		if (rc > 0 && !req) {
+			/* slave inode is still valid */
+			CDEBUG(D_INODE, "slave "DFID" is still valid.\n",
+			       PFID(&fid));
+			rc = 0;
+		} else {
+			/* refresh slave from server */
+			body = req_capsule_server_get(&req->rq_pill,
+						      &RMF_MDT_BODY);
+			LASSERT(body);
+
+			if (unlikely(body->mbo_nlink < 2)) {
+				CERROR("%s: nlink %d < 2 corrupt stripe %d "DFID":" DFID"\n",
+				       obd->obd_name, body->mbo_nlink, i,
+				       PFID(&lsm->lsm_md_oinfo[i].lmo_fid),
+				       PFID(&lsm->lsm_md_oinfo[0].lmo_fid));
+
+				if (it.it_lock_mode && lockh) {
+					ldlm_lock_decref(lockh, it.it_lock_mode);
+					it.it_lock_mode = 0;
+				}
+
+				rc = -EIO;
+				goto cleanup;
+			}
+
+			i_size_write(inode, body->mbo_size);
+			inode->i_blocks = body->mbo_blocks;
+			set_nlink(inode, body->mbo_nlink);
+			LTIME_S(inode->i_atime) = body->mbo_atime;
+			LTIME_S(inode->i_ctime) = body->mbo_ctime;
+			LTIME_S(inode->i_mtime) = body->mbo_mtime;
+		}
+
+		md_set_lock_data(tgt->ltd_exp, lockh, inode, NULL);
+
+		if (it.it_lock_mode && lockh) {
+			ldlm_lock_decref(lockh, it.it_lock_mode);
+			it.it_lock_mode = 0;
+		}
+	}
+
+cleanup:
+	if (req)
+		ptlrpc_req_finished(req);
+
+	kfree(op_data);
+	return rc;
+}
+
 /*
  * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
  * may be split dir.
  */
 static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
-			   void *lmm, int lmmsize, struct lookup_intent *it,
-			   int flags, struct ptlrpc_request **reqp,
+			   struct lookup_intent *it,
+			   struct ptlrpc_request **reqp,
 			   ldlm_blocking_callback cb_blocking,
 			   __u64 extra_lock_flags)
 {
@@ -166,35 +277,55 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
 	struct mdt_body		*body;
 	int			rc;
 
-	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
-	if (IS_ERR(tgt))
-		return PTR_ERR(tgt);
+	if (it->it_flags & MDS_OPEN_BY_FID) {
+		LASSERT(fid_is_sane(&op_data->op_fid2));
+
+		/*
+		 * for striped directory, we can't know parent stripe fid
+		 * without name, but we can set it to child fid, and MDT
+		 * will obtain it from linkea in open in such case.
+		 */
+		if (op_data->op_mea1)
+			op_data->op_fid1 = op_data->op_fid2;
+
+		tgt = lmv_find_target(lmv, &op_data->op_fid2);
+		if (IS_ERR(tgt))
+			return PTR_ERR(tgt);
+
+		op_data->op_mds = tgt->ltd_idx;
+	} else {
+		LASSERT(fid_is_sane(&op_data->op_fid1));
+		LASSERT(fid_is_zero(&op_data->op_fid2));
+		LASSERT(op_data->op_name);
+
+		tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+		if (IS_ERR(tgt))
+			return PTR_ERR(tgt);
+	}
 
 	/* If it is ready to open the file by FID, do not need
 	 * allocate FID at all, otherwise it will confuse MDT
 	 */
-	if ((it->it_op & IT_CREAT) &&
-	    !(it->it_flags & MDS_OPEN_BY_FID)) {
+	if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
 		/*
-		 * For open with IT_CREATE and for IT_CREATE cases allocate new
-		 * fid and setup FLD for it.
+		 * For lookup(IT_CREATE) cases allocate new fid and setup FLD
+		 * for it.
 		 */
-		op_data->op_fid3 = op_data->op_fid2;
-		rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data);
+		rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
 		if (rc != 0)
 			return rc;
 	}
 
-	CDEBUG(D_INODE, "OPEN_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%d\n",
+	CDEBUG(D_INODE, "OPEN_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u\n",
 	       PFID(&op_data->op_fid1),
 	       PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx);
 
-	rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, flags,
-			    reqp, cb_blocking, extra_lock_flags);
+	rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
+			    extra_lock_flags);
 	if (rc != 0)
 		return rc;
 	/*
-	 * Nothing is found, do not access body->fid1 as it is zero and thus
+	 * Nothing is found, do not access body->mbo_fid1 as it is zero and thus
 	 * pointless.
 	 */
 	if ((it->it_disposition & DISP_LOOKUP_NEG) &&
@@ -205,31 +336,17 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
 	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
 	if (!body)
 		return -EPROTO;
-	/*
-	 * Not cross-ref case, just get out of here.
-	 */
-	if (likely(!(body->valid & OBD_MD_MDS)))
-		return 0;
 
-	/*
-	 * Okay, MDS has returned success. Probably name has been resolved in
-	 * remote inode.
-	 */
-	rc = lmv_intent_remote(exp, lmm, lmmsize, it, &op_data->op_fid1, flags,
-			       reqp, cb_blocking, extra_lock_flags);
-	if (rc != 0) {
-		LASSERT(rc < 0);
-		/*
-		 * This is possible, that some userspace application will try to
-		 * open file as directory and we will have -ENOTDIR here. As
-		 * this is normal situation, we should not print error here,
-		 * only debug info.
-		 */
-		CDEBUG(D_INODE, "Can't handle remote %s: dir " DFID "(" DFID "):%*s: %d\n",
-		       LL_IT2STR(it), PFID(&op_data->op_fid2),
-		       PFID(&op_data->op_fid1), op_data->op_namelen,
-		       op_data->op_name, rc);
-		return rc;
+	/* Not cross-ref case, just get out of here. */
+	if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
+		rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp,
+				       cb_blocking, extra_lock_flags);
+		if (rc != 0)
+			return rc;
+
+		body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+		if (!body)
+			return -EPROTO;
 	}
 
 	return rc;
@@ -240,37 +357,102 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
  */
 static int lmv_intent_lookup(struct obd_export *exp,
 			     struct md_op_data *op_data,
-			     void *lmm, int lmmsize, struct lookup_intent *it,
-			     int flags, struct ptlrpc_request **reqp,
+			     struct lookup_intent *it,
+			     struct ptlrpc_request **reqp,
 			     ldlm_blocking_callback cb_blocking,
 			     __u64 extra_lock_flags)
 {
+	struct lmv_stripe_md *lsm = op_data->op_mea1;
 	struct obd_device      *obd = exp->exp_obd;
 	struct lmv_obd	 *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc    *tgt = NULL;
 	struct mdt_body	*body;
 	int		     rc = 0;
 
+	/*
+	 * If it returns ERR_PTR(-EBADFD) then it is an unknown hash type
+	 * it will try all stripes to locate the object
+	 */
 	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
-	if (IS_ERR(tgt))
+	if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD))
 		return PTR_ERR(tgt);
 
+	/*
+	 * Both migrating dir and unknown hash dir need to try
+	 * all of sub-stripes
+	 */
+	if (lsm && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
+		struct lmv_oinfo *oinfo = &lsm->lsm_md_oinfo[0];
+
+		op_data->op_fid1 = oinfo->lmo_fid;
+		op_data->op_mds = oinfo->lmo_mds;
+		tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
+		if (IS_ERR(tgt))
+			return PTR_ERR(tgt);
+	}
+
 	if (!fid_is_sane(&op_data->op_fid2))
 		fid_zero(&op_data->op_fid2);
 
-	CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
-	       ", name='%s' -> mds #%d\n", PFID(&op_data->op_fid1),
-	       PFID(&op_data->op_fid2),
+	CDEBUG(D_INODE, "LOOKUP_INTENT with fid1=" DFID ", fid2=" DFID ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n",
+	       PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
 	       op_data->op_name ? op_data->op_name : "<NULL>",
-	       tgt->ltd_idx);
+	       tgt->ltd_idx, lsm, !lsm ? -1 : lsm->lsm_md_magic);
 
 	op_data->op_bias &= ~MDS_CROSS_REF;
 
-	rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
-			    flags, reqp, cb_blocking, extra_lock_flags);
+	rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
+			    extra_lock_flags);
+	if (rc < 0)
+		return rc;
 
-	if (rc < 0 || !*reqp)
+	if (!*reqp) {
+		/*
+		 * If RPC happens, lsm information will be revalidated
+		 * during update_inode process (see ll_update_lsm_md)
+		 */
+		if (op_data->op_mea2) {
+			rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
+						   cb_blocking,
+						   extra_lock_flags);
+			if (rc != 0)
+				return rc;
+		}
 		return rc;
+	} else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm &&
+		   lmv_need_try_all_stripes(lsm)) {
+		/*
+		 * For migrating and unknown hash type directory, it will
+		 * try to target the entry on other stripes
+		 */
+		int stripe_index;
+
+		for (stripe_index = 1;
+		     stripe_index < lsm->lsm_md_stripe_count &&
+		     it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) {
+			struct lmv_oinfo *oinfo;
+
+			/* release the previous request */
+			ptlrpc_req_finished(*reqp);
+			it->it_request = NULL;
+			*reqp = NULL;
+
+			oinfo = &lsm->lsm_md_oinfo[stripe_index];
+			tgt = lmv_find_target(lmv, &oinfo->lmo_fid);
+			if (IS_ERR(tgt))
+				return PTR_ERR(tgt);
+
+			CDEBUG(D_INODE, "Try other stripes " DFID"\n",
+			       PFID(&oinfo->lmo_fid));
+
+			op_data->op_fid1 = oinfo->lmo_fid;
+			it->it_disposition &= ~DISP_ENQ_COMPLETE;
+			rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp,
+					    cb_blocking, extra_lock_flags);
+			if (rc)
+				return rc;
+		}
+	}
 
 	/*
 	 * MDS has returned success. Probably name has been resolved in
@@ -279,19 +461,23 @@ static int lmv_intent_lookup(struct obd_export *exp,
 	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
 	if (!body)
 		return -EPROTO;
-	/* Not cross-ref case, just get out of here. */
-	if (likely(!(body->valid & OBD_MD_MDS)))
-		return 0;
 
-	rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags, reqp,
-			       cb_blocking, extra_lock_flags);
+	/* Not cross-ref case, just get out of here. */
+	if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
+		rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking,
+				       extra_lock_flags);
+		if (rc != 0)
+			return rc;
+		body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
+		if (!body)
+			return -EPROTO;
+	}
 
 	return rc;
 }
 
 int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
-		    void *lmm, int lmmsize, struct lookup_intent *it,
-		    int flags, struct ptlrpc_request **reqp,
+		    struct lookup_intent *it, struct ptlrpc_request **reqp,
 		    ldlm_blocking_callback cb_blocking,
 		    __u64 extra_lock_flags)
 {
@@ -300,8 +486,9 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 
 	LASSERT(fid_is_sane(&op_data->op_fid1));
 
-	CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n",
-	       LL_IT2STR(it), op_data->op_namelen, op_data->op_name,
+	CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%*s' on "DFID"\n",
+	       LL_IT2STR(it), PFID(&op_data->op_fid2),
+	       (int)op_data->op_namelen, op_data->op_name,
 	       PFID(&op_data->op_fid1));
 
 	rc = lmv_check_connect(obd);
@@ -309,14 +496,34 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 		return rc;
 
 	if (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_LAYOUT))
-		rc = lmv_intent_lookup(exp, op_data, lmm, lmmsize, it,
-				       flags, reqp, cb_blocking,
+		rc = lmv_intent_lookup(exp, op_data, it, reqp, cb_blocking,
 				       extra_lock_flags);
 	else if (it->it_op & IT_OPEN)
-		rc = lmv_intent_open(exp, op_data, lmm, lmmsize, it,
-				     flags, reqp, cb_blocking,
+		rc = lmv_intent_open(exp, op_data, it, reqp, cb_blocking,
 				     extra_lock_flags);
 	else
 		LBUG();
+
+	if (rc < 0) {
+		struct lustre_handle lock_handle;
+
+		if (it->it_lock_mode) {
+			lock_handle.cookie = it->it_lock_handle;
+			ldlm_lock_decref(&lock_handle, it->it_lock_mode);
+		}
+
+		it->it_lock_handle = 0;
+		it->it_lock_mode = 0;
+
+		if (it->it_remote_lock_mode) {
+			lock_handle.cookie = it->it_remote_lock_handle;
+			ldlm_lock_decref(&lock_handle,
+					 it->it_remote_lock_mode);
+		}
+
+		it->it_remote_lock_handle = 0;
+		it->it_remote_lock_mode = 0;
+	}
+
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_internal.h b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
index 0beafc49b8d2..52b03745ac19 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_internal.h
+++ b/drivers/staging/lustre/lustre/lmv/lmv_internal.h
@@ -35,6 +35,7 @@
 
 #include "../include/lustre/lustre_idl.h"
 #include "../include/obd.h"
+#include "../include/lustre_lmv.h"
 
 #define LMV_MAX_TGT_COUNT 128
 
@@ -44,77 +45,116 @@
 int lmv_check_connect(struct obd_device *obd);
 
 int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
-		    void *lmm, int lmmsize, struct lookup_intent *it,
-		    int flags, struct ptlrpc_request **reqp,
+		    struct lookup_intent *it, struct ptlrpc_request **reqp,
 		    ldlm_blocking_callback cb_blocking,
 		    __u64 extra_lock_flags);
 
 int lmv_fld_lookup(struct lmv_obd *lmv, const struct lu_fid *fid, u32 *mds);
 int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds);
-int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
-		  struct md_op_data *op_data);
+int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+		  struct lu_fid *fid, struct md_op_data *op_data);
 
-static inline struct lmv_stripe_md *lmv_get_mea(struct ptlrpc_request *req)
-{
-	struct mdt_body	 *body;
-	struct lmv_stripe_md    *mea;
-
-	LASSERT(req);
-
-	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
-	if (!body || !S_ISDIR(body->mode) || !body->eadatasize)
-		return NULL;
+int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+		  const union lmv_mds_md *lmm, int stripe_count);
 
-	mea = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD,
-					   body->eadatasize);
-	if (mea->mea_count == 0)
-		return NULL;
-	if (mea->mea_magic != MEA_MAGIC_LAST_CHAR &&
-	    mea->mea_magic != MEA_MAGIC_ALL_CHARS &&
-	    mea->mea_magic != MEA_MAGIC_HASH_SEGMENT)
-		return NULL;
+int lmv_revalidate_slaves(struct obd_export *exp,
+			  const struct lmv_stripe_md *lsm,
+			  ldlm_blocking_callback cb_blocking,
+			  int extra_lock_flags);
 
-	return mea;
-}
-
-static inline int lmv_get_easize(struct lmv_obd *lmv)
+static inline struct obd_device *lmv2obd_dev(struct lmv_obd *lmv)
 {
-	return sizeof(struct lmv_stripe_md) +
-		lmv->desc.ld_tgt_count *
-		sizeof(struct lu_fid);
+	return container_of0(lmv, struct obd_device, u.lmv);
 }
 
 static inline struct lmv_tgt_desc *
-lmv_get_target(struct lmv_obd *lmv, u32 mds)
+lmv_get_target(struct lmv_obd *lmv, u32 mdt_idx, int *index)
 {
-	int count = lmv->desc.ld_tgt_count;
 	int i;
 
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
 		if (!lmv->tgts[i])
 			continue;
 
-		if (lmv->tgts[i]->ltd_idx == mds)
+		if (lmv->tgts[i]->ltd_idx == mdt_idx) {
+			if (index)
+				*index = i;
 			return lmv->tgts[i];
+		}
 	}
 
 	return ERR_PTR(-ENODEV);
 }
 
-static inline struct lmv_tgt_desc *
-lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid)
+static inline int
+lmv_find_target_index(struct lmv_obd *lmv, const struct lu_fid *fid)
 {
-	u32 mds = 0;
-	int rc;
+	struct lmv_tgt_desc *ltd;
+	u32 mdt_idx = 0;
+	int index = 0;
 
 	if (lmv->desc.ld_tgt_count > 1) {
-		rc = lmv_fld_lookup(lmv, fid, &mds);
-		if (rc)
-			return ERR_PTR(rc);
+		int rc;
+
+		rc = lmv_fld_lookup(lmv, fid, &mdt_idx);
+		if (rc < 0)
+			return rc;
 	}
 
-	return lmv_get_target(lmv, mds);
+	ltd = lmv_get_target(lmv, mdt_idx, &index);
+	if (IS_ERR(ltd))
+		return PTR_ERR(ltd);
+
+	return index;
+}
+
+static inline struct lmv_tgt_desc *
+lmv_find_target(struct lmv_obd *lmv, const struct lu_fid *fid)
+{
+	int index;
+
+	index = lmv_find_target_index(lmv, fid);
+	if (index < 0)
+		return ERR_PTR(index);
+
+	return lmv->tgts[index];
+}
+
+static inline int lmv_stripe_md_size(int stripe_count)
+{
+	struct lmv_stripe_md *lsm;
+
+	return sizeof(*lsm) + stripe_count * sizeof(lsm->lsm_md_oinfo[0]);
+}
+
+int lmv_name_to_stripe_index(enum lmv_hash_type hashtype,
+			     unsigned int max_mdt_index,
+			     const char *name, int namelen);
+
+static inline const struct lmv_oinfo *
+lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
+			int namelen)
+{
+	int stripe_index;
+
+	stripe_index = lmv_name_to_stripe_index(lsm->lsm_md_hash_type,
+						lsm->lsm_md_stripe_count,
+						name, namelen);
+	if (stripe_index < 0)
+		return ERR_PTR(stripe_index);
+
+	LASSERTF(stripe_index < lsm->lsm_md_stripe_count,
+		 "stripe_index = %d, stripe_count = %d hash_type = %x name = %.*s\n",
+		 stripe_index, lsm->lsm_md_stripe_count,
+		 lsm->lsm_md_hash_type, namelen, name);
+
+	return &lsm->lsm_md_oinfo[stripe_index];
+}
+
+static inline bool lmv_need_try_all_stripes(const struct lmv_stripe_md *lsm)
+{
+	return !lmv_is_known_hash_type(lsm->lsm_md_hash_type) ||
+	       lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
 }
 
 struct lmv_tgt_desc
@@ -123,6 +163,6 @@ struct lmv_tgt_desc
 /* lproc_lmv.c */
 void lprocfs_lmv_init_vars(struct lprocfs_static_vars *lvars);
 
-extern struct file_operations lmv_proc_target_fops;
+extern const struct file_operations lmv_proc_target_fops;
 
 #endif
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index 0e1588a43187..7dbb2b946acf 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -43,12 +43,13 @@
 
 #include "../include/lustre/lustre_idl.h"
 #include "../include/obd_support.h"
-#include "../include/lustre_lib.h"
 #include "../include/lustre_net.h"
 #include "../include/obd_class.h"
+#include "../include/lustre_lmv.h"
 #include "../include/lprocfs_status.h"
-#include "../include/lustre_lite.h"
+#include "../include/cl_object.h"
 #include "../include/lustre_fid.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_kernelcomm.h"
 #include "lmv_internal.h"
 
@@ -70,12 +71,12 @@ static void lmv_activate_target(struct lmv_obd *lmv,
  *  -ENOTCONN: The UUID is found, but the target connection is bad (!)
  *  -EBADF   : The UUID is found, but the OBD of the wrong type (!)
  */
-static int lmv_set_mdc_active(struct lmv_obd *lmv, struct obd_uuid *uuid,
+static int lmv_set_mdc_active(struct lmv_obd *lmv, const struct obd_uuid *uuid,
 			      int activate)
 {
 	struct lmv_tgt_desc    *uninitialized_var(tgt);
 	struct obd_device      *obd;
-	int		     i;
+	u32		     i;
 	int		     rc = 0;
 
 	CDEBUG(D_INFO, "Searching in lmv %p for uuid %s (activate=%d)\n",
@@ -244,36 +245,12 @@ static int lmv_connect(const struct lu_env *env,
 	return rc;
 }
 
-static void lmv_set_timeouts(struct obd_device *obd)
-{
-	struct lmv_obd	*lmv;
-	int		    i;
-
-	lmv = &obd->u.lmv;
-	if (lmv->server_timeout == 0)
-		return;
-
-	if (lmv->connected == 0)
-		return;
-
-	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		struct lmv_tgt_desc *tgt = lmv->tgts[i];
-
-		tgt = lmv->tgts[i];
-		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
-			continue;
-
-		obd_set_info_async(NULL, tgt->ltd_exp, sizeof(KEY_INTERMDS),
-				   KEY_INTERMDS, 0, NULL, NULL);
-	}
-}
-
-static int lmv_init_ea_size(struct obd_export *exp, int easize,
-			    int def_easize, int cookiesize, int def_cookiesize)
+static int lmv_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
+			    u32 cookiesize, u32 def_cookiesize)
 {
 	struct obd_device   *obd = exp->exp_obd;
 	struct lmv_obd      *lmv = &obd->u.lmv;
-	int		  i;
+	u32 i;
 	int		  rc = 0;
 	int		  change = 0;
 
@@ -420,6 +397,7 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 {
 	struct lmv_obd      *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc *tgt;
+	int orig_tgt_count = 0;
 	int		  rc = 0;
 
 	CDEBUG(D_CONFIG, "Target uuid: %s. index %d\n", uuidp->uuid, index);
@@ -489,14 +467,17 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
 	tgt->ltd_uuid = *uuidp;
 	tgt->ltd_active = 0;
 	lmv->tgts[index] = tgt;
-	if (index >= lmv->desc.ld_tgt_count)
+	if (index >= lmv->desc.ld_tgt_count) {
+		orig_tgt_count = lmv->desc.ld_tgt_count;
 		lmv->desc.ld_tgt_count = index + 1;
+	}
 
 	if (lmv->connected) {
 		rc = lmv_connect_mdc(obd, tgt);
 		if (rc) {
 			spin_lock(&lmv->lmv_lock);
-			lmv->desc.ld_tgt_count--;
+			if (lmv->desc.ld_tgt_count == index + 1)
+				lmv->desc.ld_tgt_count = orig_tgt_count;
 			memset(tgt, 0, sizeof(*tgt));
 			spin_unlock(&lmv->lmv_lock);
 		} else {
@@ -514,7 +495,7 @@ int lmv_check_connect(struct obd_device *obd)
 {
 	struct lmv_obd       *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc  *tgt;
-	int		   i;
+	u32 i;
 	int		   rc;
 	int		   easize;
 
@@ -554,10 +535,9 @@ int lmv_check_connect(struct obd_device *obd)
 			goto out_disc;
 	}
 
-	lmv_set_timeouts(obd);
 	class_export_put(lmv->exp);
 	lmv->connected = 1;
-	easize = lmv_get_easize(lmv);
+	easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC);
 	lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0);
 	mutex_unlock(&lmv->lmv_init_mutex);
 	return 0;
@@ -629,7 +609,7 @@ static int lmv_disconnect(struct obd_export *exp)
 	struct obd_device     *obd = class_exp2obd(exp);
 	struct lmv_obd	*lmv = &obd->u.lmv;
 	int		    rc;
-	int		    i;
+	u32 i;
 
 	if (!lmv->tgts)
 		goto out_local;
@@ -758,7 +738,7 @@ static int lmv_hsm_req_count(struct lmv_obd *lmv,
 			     const struct hsm_user_request *hur,
 			     const struct lmv_tgt_desc *tgt_mds)
 {
-	int			i, nr = 0;
+	u32 i, nr = 0;
 	struct lmv_tgt_desc    *curr_tgt;
 
 	/* count how many requests must be sent to the given target */
@@ -885,10 +865,8 @@ static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
 
 	rc = libcfs_kkuc_group_add(filp, lk->lk_uid, lk->lk_group,
 				   &kcd, sizeof(kcd));
-	if (rc) {
-		if (filp)
-			fput(filp);
-	}
+	if (rc)
+		fput(filp);
 
 	return rc;
 }
@@ -899,10 +877,10 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 	struct obd_device    *obddev = class_exp2obd(exp);
 	struct lmv_obd       *lmv = &obddev->u.lmv;
 	struct lmv_tgt_desc *tgt = NULL;
-	int		   i = 0;
+	u32 i = 0;
 	int		   rc = 0;
 	int		   set = 0;
-	int		   count = lmv->desc.ld_tgt_count;
+	u32 count = lmv->desc.ld_tgt_count;
 
 	if (count == 0)
 		return -ENOTTY;
@@ -1011,6 +989,21 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
 		rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
 		break;
 	}
+	case LL_IOC_FID2MDTIDX: {
+		struct lu_fid *fid = karg;
+		int mdt_index;
+
+		rc = lmv_fld_lookup(lmv, fid, &mdt_index);
+		if (rc)
+			return rc;
+
+		/*
+		 * Note: this is from llite(see ll_dir_ioctl()), @uarg does not
+		 * point to user space memory for FID2MDTIDX.
+		 */
+		*(__u32 *)uarg = mdt_index;
+		break;
+	}
 	case OBD_IOC_FID2PATH: {
 		rc = lmv_fid2path(exp, len, karg, uarg);
 		break;
@@ -1169,32 +1162,37 @@ static int lmv_placement_policy(struct obd_device *obd,
 		return 0;
 	}
 
+	if (op_data->op_default_stripe_offset != -1) {
+		*mds = op_data->op_default_stripe_offset;
+		return 0;
+	}
+
 	/**
 	 * If stripe_offset is provided during setdirstripe
 	 * (setdirstripe -i xx), xx MDS will be chosen.
 	 */
-	if (op_data->op_cli_flags & CLI_SET_MEA) {
+	if (op_data->op_cli_flags & CLI_SET_MEA && op_data->op_data) {
 		struct lmv_user_md *lum;
 
-		lum = (struct lmv_user_md *)op_data->op_data;
-		if (lum->lum_type == LMV_STRIPE_TYPE &&
-		    lum->lum_stripe_offset != -1) {
-			if (lum->lum_stripe_offset >= lmv->desc.ld_tgt_count) {
-				CERROR("%s: Stripe_offset %d > MDT count %d: rc = %d\n",
-				       obd->obd_name,
-				       lum->lum_stripe_offset,
-				       lmv->desc.ld_tgt_count, -ERANGE);
-				return -ERANGE;
-			}
-			*mds = lum->lum_stripe_offset;
-			return 0;
+		lum = op_data->op_data;
+		if (le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) {
+			*mds = le32_to_cpu(lum->lum_stripe_offset);
+		} else {
+			/*
+			 * -1 means default, which will be in the same MDT with
+			 * the stripe
+			 */
+			*mds = op_data->op_mds;
+			lum->lum_stripe_offset = cpu_to_le32(op_data->op_mds);
 		}
+	} else {
+		/*
+		 * Allocate new fid on target according to operation type and
+		 * parent home mds.
+		 */
+		*mds = op_data->op_mds;
 	}
 
-	/* Allocate new fid on target according to operation type and parent
-	 * home mds.
-	 */
-	*mds = op_data->op_mds;
 	return 0;
 }
 
@@ -1203,7 +1201,7 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
 	struct lmv_tgt_desc	*tgt;
 	int			 rc;
 
-	tgt = lmv_get_target(lmv, mds);
+	tgt = lmv_get_target(lmv, mds, NULL);
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
@@ -1221,7 +1219,7 @@ int __lmv_fid_alloc(struct lmv_obd *lmv, struct lu_fid *fid, u32 mds)
 	/*
 	 * Asking underlaying tgt layer to allocate new fid.
 	 */
-	rc = obd_fid_alloc(tgt->ltd_exp, fid, NULL);
+	rc = obd_fid_alloc(NULL, tgt->ltd_exp, fid, NULL);
 	if (rc > 0) {
 		LASSERT(fid_is_sane(fid));
 		rc = 0;
@@ -1232,8 +1230,8 @@ out:
 	return rc;
 }
 
-int lmv_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
-		  struct md_op_data *op_data)
+int lmv_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+		  struct lu_fid *fid, struct md_op_data *op_data)
 {
 	struct obd_device     *obd = class_exp2obd(exp);
 	struct lmv_obd	*lmv = &obd->u.lmv;
@@ -1278,10 +1276,10 @@ static int lmv_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 		return -EINVAL;
 	}
 
-	lmv->tgts = kcalloc(32, sizeof(*lmv->tgts), GFP_NOFS);
+	lmv->tgts_size = 32U;
+	lmv->tgts = kcalloc(lmv->tgts_size, sizeof(*lmv->tgts), GFP_NOFS);
 	if (!lmv->tgts)
 		return -ENOMEM;
-	lmv->tgts_size = 32;
 
 	obd_str2uuid(&lmv->desc.ld_uuid, desc->ld_uuid.uuid);
 	lmv->desc.ld_tgt_count = 0;
@@ -1354,7 +1352,7 @@ static int lmv_process_config(struct obd_device *obd, u32 len, void *buf)
 
 		obd_str2uuid(&obd_uuid,  lustre_cfg_buf(lcfg, 1));
 
-		if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
+		if (sscanf(lustre_cfg_buf(lcfg, 2), "%u", &index) != 1) {
 			rc = -EINVAL;
 			goto out;
 		}
@@ -1380,7 +1378,7 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
 	struct lmv_obd	*lmv = &obd->u.lmv;
 	struct obd_statfs     *temp;
 	int		    rc = 0;
-	int		    i;
+	u32 i;
 
 	rc = lmv_check_connect(obd);
 	if (rc)
@@ -1522,7 +1520,7 @@ static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
 {
 	struct obd_device   *obd = exp->exp_obd;
 	struct lmv_obd      *lmv = &obd->u.lmv;
-	int		  i;
+	u32 i;
 	int		  rc;
 
 	rc = lmv_check_connect(obd);
@@ -1545,36 +1543,6 @@ static int lmv_null_inode(struct obd_export *exp, const struct lu_fid *fid)
 	return 0;
 }
 
-static int lmv_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
-			   ldlm_iterator_t it, void *data)
-{
-	struct obd_device   *obd = exp->exp_obd;
-	struct lmv_obd      *lmv = &obd->u.lmv;
-	int		  i;
-	int		  rc;
-
-	rc = lmv_check_connect(obd);
-	if (rc)
-		return rc;
-
-	CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid));
-
-	/*
-	 * With DNE every object can have two locks in different namespaces:
-	 * lookup lock in space of MDT storing direntry and update/open lock in
-	 * space of MDT storing inode.
-	 */
-	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		if (!lmv->tgts[i] || !lmv->tgts[i]->ltd_exp)
-			continue;
-		rc = md_find_cbdata(lmv->tgts[i]->ltd_exp, fid, it, data);
-		if (rc)
-			return rc;
-	}
-
-	return rc;
-}
-
 static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
 		     struct md_open_data *mod, struct ptlrpc_request **request)
 {
@@ -1596,25 +1564,116 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
 	return rc;
 }
 
-struct lmv_tgt_desc
-*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
-		struct lu_fid *fid)
+/**
+ * Choosing the MDT by name or FID in @op_data.
+ * For non-striped directory, it will locate MDT by fid.
+ * For striped-directory, it will locate MDT by name. And also
+ * it will reset op_fid1 with the FID of the chosen stripe.
+ **/
+static struct lmv_tgt_desc *
+lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
+			   const char *name, int namelen, struct lu_fid *fid,
+			   u32 *mds)
+{
+	const struct lmv_oinfo *oinfo;
+	struct lmv_tgt_desc *tgt;
+
+	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH)) {
+		if (cfs_fail_val >= lsm->lsm_md_stripe_count)
+			return ERR_PTR(-EBADF);
+		oinfo = &lsm->lsm_md_oinfo[cfs_fail_val];
+	} else {
+		oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
+		if (IS_ERR(oinfo))
+			return ERR_CAST(oinfo);
+	}
+
+	if (fid)
+		*fid = oinfo->lmo_fid;
+	if (mds)
+		*mds = oinfo->lmo_mds;
+
+	tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
+
+	CDEBUG(D_INFO, "locate on mds %u " DFID "\n", oinfo->lmo_mds,
+	       PFID(&oinfo->lmo_fid));
+	return tgt;
+}
+
+/**
+ * Locate mds by fid or name
+ *
+ * For striped directory (lsm != NULL), it will locate the stripe
+ * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
+ * is unknown, it will return -EBADFD, and lmv_intent_lookup might need
+ * walk through all of stripes to locate the entry.
+ *
+ * For normal direcotry, it will locate MDS by FID directly.
+ * \param[in] lmv	LMV device
+ * \param[in] op_data	client MD stack parameters, name, namelen
+ *			mds_num etc.
+ * \param[in] fid	object FID used to locate MDS.
+ *
+ * retval		pointer to the lmv_tgt_desc if succeed.
+ *			ERR_PTR(errno) if failed.
+ */
+struct lmv_tgt_desc*
+lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
+	       struct lu_fid *fid)
 {
+	struct lmv_stripe_md *lsm = op_data->op_mea1;
 	struct lmv_tgt_desc *tgt;
 
-	tgt = lmv_find_target(lmv, fid);
-	if (IS_ERR(tgt))
+	/*
+	 * During creating VOLATILE file, it should honor the mdt
+	 * index if the file under striped dir is being restored, see
+	 * ct_restore().
+	 */
+	if (op_data->op_bias & MDS_CREATE_VOLATILE &&
+	    (int)op_data->op_mds != -1 && lsm) {
+		int i;
+
+		tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
+		if (IS_ERR(tgt))
+			return tgt;
+
+		/* refill the right parent fid */
+		for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+			struct lmv_oinfo *oinfo;
+
+			oinfo = &lsm->lsm_md_oinfo[i];
+			if (oinfo->lmo_mds == op_data->op_mds) {
+				*fid = oinfo->lmo_fid;
+				break;
+			}
+		}
+
+		/* Hmm, can not find the stripe by mdt_index(op_mds) */
+		if (i == lsm->lsm_md_stripe_count)
+			tgt = ERR_PTR(-EINVAL);
+
 		return tgt;
+	}
 
-	op_data->op_mds = tgt->ltd_idx;
+	if (!lsm || !op_data->op_namelen) {
+		tgt = lmv_find_target(lmv, fid);
+		if (IS_ERR(tgt))
+			return tgt;
 
-	return tgt;
+		op_data->op_mds = tgt->ltd_idx;
+
+		return tgt;
+	}
+
+	return lmv_locate_target_for_name(lmv, lsm, op_data->op_name,
+					  op_data->op_namelen, fid,
+					  &op_data->op_mds);
 }
 
 static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
-		      const void *data, int datalen, int mode, __u32 uid,
-		      __u32 gid, cfs_cap_t cap_effective, __u64 rdev,
-		      struct ptlrpc_request **request)
+		      const void *data, size_t datalen, umode_t mode,
+		      uid_t uid, gid_t gid, cfs_cap_t cap_effective,
+		      __u64 rdev, struct ptlrpc_request **request)
 {
 	struct obd_device       *obd = exp->exp_obd;
 	struct lmv_obd	  *lmv = &obd->u.lmv;
@@ -1632,13 +1691,30 @@ static int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
-	rc = lmv_fid_alloc(exp, &op_data->op_fid2, op_data);
+	CDEBUG(D_INODE, "CREATE name '%.*s' on "DFID" -> mds #%x\n",
+	       (int)op_data->op_namelen, op_data->op_name,
+	       PFID(&op_data->op_fid1), op_data->op_mds);
+
+	rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
 	if (rc)
 		return rc;
 
-	CDEBUG(D_INODE, "CREATE '%*s' on "DFID" -> mds #%x\n",
-	       op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
-	       op_data->op_mds);
+	if (exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE) {
+		/*
+		 * Send the create request to the MDT where the object
+		 * will be located
+		 */
+		tgt = lmv_find_target(lmv, &op_data->op_fid2);
+		if (IS_ERR(tgt))
+			return PTR_ERR(tgt);
+
+		op_data->op_mds = tgt->ltd_idx;
+	} else {
+		CDEBUG(D_CONFIG, "Server doesn't support striped dirs\n");
+	}
+
+	CDEBUG(D_INODE, "CREATE obj "DFID" -> mds #%x\n",
+	       PFID(&op_data->op_fid1), op_data->op_mds);
 
 	op_data->op_flags |= MF_MDC_CANCEL_FID1;
 	rc = md_create(tgt->ltd_exp, op_data, data, datalen, mode, uid, gid,
@@ -1674,70 +1750,10 @@ static int lmv_done_writing(struct obd_export *exp,
 }
 
 static int
-lmv_enqueue_remote(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
-		   struct lookup_intent *it, struct md_op_data *op_data,
-		   struct lustre_handle *lockh, void *lmm, int lmmsize,
-		   __u64 extra_lock_flags)
-{
-	struct ptlrpc_request      *req = it->it_request;
-	struct obd_device	  *obd = exp->exp_obd;
-	struct lmv_obd	     *lmv = &obd->u.lmv;
-	struct lustre_handle	plock;
-	struct lmv_tgt_desc	*tgt;
-	struct md_op_data	  *rdata;
-	struct lu_fid	       fid1;
-	struct mdt_body	    *body;
-	int			 rc = 0;
-	int			 pmode;
-
-	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-
-	if (!(body->valid & OBD_MD_MDS))
-		return 0;
-
-	CDEBUG(D_INODE, "REMOTE_ENQUEUE '%s' on "DFID" -> "DFID"\n",
-	       LL_IT2STR(it), PFID(&op_data->op_fid1), PFID(&body->fid1));
-
-	/*
-	 * We got LOOKUP lock, but we really need attrs.
-	 */
-	pmode = it->it_lock_mode;
-	LASSERT(pmode != 0);
-	memcpy(&plock, lockh, sizeof(plock));
-	it->it_lock_mode = 0;
-	it->it_request = NULL;
-	fid1 = body->fid1;
-
-	ptlrpc_req_finished(req);
-
-	tgt = lmv_find_target(lmv, &fid1);
-	if (IS_ERR(tgt)) {
-		rc = PTR_ERR(tgt);
-		goto out;
-	}
-
-	rdata = kzalloc(sizeof(*rdata), GFP_NOFS);
-	if (!rdata) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	rdata->op_fid1 = fid1;
-	rdata->op_bias = MDS_CROSS_REF;
-
-	rc = md_enqueue(tgt->ltd_exp, einfo, it, rdata, lockh,
-			lmm, lmmsize, NULL, extra_lock_flags);
-	kfree(rdata);
-out:
-	ldlm_lock_decref(&plock, pmode);
-	return rc;
-}
-
-static int
 lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+	    const ldlm_policy_data_t *policy,
 	    struct lookup_intent *it, struct md_op_data *op_data,
-	    struct lustre_handle *lockh, void *lmm, int lmmsize,
-	    struct ptlrpc_request **req, __u64 extra_lock_flags)
+	    struct lustre_handle *lockh, __u64 extra_lock_flags)
 {
 	struct obd_device	*obd = exp->exp_obd;
 	struct lmv_obd	   *lmv = &obd->u.lmv;
@@ -1755,22 +1771,18 @@ lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
-	CDEBUG(D_INODE, "ENQUEUE '%s' on "DFID" -> mds #%d\n",
+	CDEBUG(D_INODE, "ENQUEUE '%s' on " DFID " -> mds #%u\n",
 	       LL_IT2STR(it), PFID(&op_data->op_fid1), tgt->ltd_idx);
 
-	rc = md_enqueue(tgt->ltd_exp, einfo, it, op_data, lockh,
-			lmm, lmmsize, req, extra_lock_flags);
+	rc = md_enqueue(tgt->ltd_exp, einfo, policy, it, op_data, lockh,
+			extra_lock_flags);
 
-	if (rc == 0 && it && it->it_op == IT_OPEN) {
-		rc = lmv_enqueue_remote(exp, einfo, it, op_data, lockh,
-					lmm, lmmsize, extra_lock_flags);
-	}
 	return rc;
 }
 
 static int
 lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
-		 struct ptlrpc_request **request)
+		 struct ptlrpc_request **preq)
 {
 	struct ptlrpc_request   *req = NULL;
 	struct obd_device       *obd = exp->exp_obd;
@@ -1787,26 +1799,25 @@ lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
-	CDEBUG(D_INODE, "GETATTR_NAME for %*s on "DFID" -> mds #%d\n",
-	       op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
-	       tgt->ltd_idx);
+	CDEBUG(D_INODE, "GETATTR_NAME for %*s on " DFID " -> mds #%u\n",
+	       (int)op_data->op_namelen, op_data->op_name,
+	       PFID(&op_data->op_fid1), tgt->ltd_idx);
 
-	rc = md_getattr_name(tgt->ltd_exp, op_data, request);
+	rc = md_getattr_name(tgt->ltd_exp, op_data, preq);
 	if (rc != 0)
 		return rc;
 
-	body = req_capsule_server_get(&(*request)->rq_pill,
-				      &RMF_MDT_BODY);
-
-	if (body->valid & OBD_MD_MDS) {
-		struct lu_fid rid = body->fid1;
+	body = req_capsule_server_get(&(*preq)->rq_pill, &RMF_MDT_BODY);
+	if (body->mbo_valid & OBD_MD_MDS) {
+		struct lu_fid rid = body->mbo_fid1;
 
 		CDEBUG(D_INODE, "Request attrs for "DFID"\n",
 		       PFID(&rid));
 
 		tgt = lmv_find_target(lmv, &rid);
 		if (IS_ERR(tgt)) {
-			ptlrpc_req_finished(*request);
+			ptlrpc_req_finished(*preq);
+			*preq = NULL;
 			return PTR_ERR(tgt);
 		}
 
@@ -1815,8 +1826,8 @@ lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
 		op_data->op_namelen = 0;
 		op_data->op_name = NULL;
 		rc = md_getattr_name(tgt->ltd_exp, op_data, &req);
-		ptlrpc_req_finished(*request);
-		*request = req;
+		ptlrpc_req_finished(*preq);
+		*preq = req;
 	}
 
 	return rc;
@@ -1829,23 +1840,24 @@ lmv_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
 	 fl == MF_MDC_CANCEL_FID4 ? &op_data->op_fid4 : \
 	 NULL)
 
-static int lmv_early_cancel(struct obd_export *exp, struct md_op_data *op_data,
-			    int op_tgt, enum ldlm_mode mode, int bits,
-			    int flag)
+static int lmv_early_cancel(struct obd_export *exp, struct lmv_tgt_desc *tgt,
+			    struct md_op_data *op_data, int op_tgt,
+			    enum ldlm_mode mode, int bits, int flag)
 {
 	struct lu_fid	  *fid = md_op_data_fid(op_data, flag);
 	struct obd_device      *obd = exp->exp_obd;
 	struct lmv_obd	 *lmv = &obd->u.lmv;
-	struct lmv_tgt_desc    *tgt;
 	ldlm_policy_data_t      policy = { {0} };
 	int		     rc = 0;
 
 	if (!fid_is_sane(fid))
 		return 0;
 
-	tgt = lmv_find_target(lmv, fid);
-	if (IS_ERR(tgt))
-		return PTR_ERR(tgt);
+	if (!tgt) {
+		tgt = lmv_find_target(lmv, fid);
+		if (IS_ERR(tgt))
+			return PTR_ERR(tgt);
+	}
 
 	if (tgt->ltd_idx != op_tgt) {
 		CDEBUG(D_INODE, "EARLY_CANCEL on "DFID"\n", PFID(fid));
@@ -1882,12 +1894,24 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
 	LASSERT(op_data->op_namelen != 0);
 
 	CDEBUG(D_INODE, "LINK "DFID":%*s to "DFID"\n",
-	       PFID(&op_data->op_fid2), op_data->op_namelen,
+	       PFID(&op_data->op_fid2), (int)op_data->op_namelen,
 	       op_data->op_name, PFID(&op_data->op_fid1));
 
 	op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
 	op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
 	op_data->op_cap = cfs_curproc_cap_pack();
+	if (op_data->op_mea2) {
+		struct lmv_stripe_md *lsm = op_data->op_mea2;
+		const struct lmv_oinfo *oinfo;
+
+		oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name,
+						op_data->op_namelen);
+		if (IS_ERR(oinfo))
+			return PTR_ERR(oinfo);
+
+		op_data->op_fid2 = oinfo->lmo_fid;
+	}
+
 	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
@@ -1896,7 +1920,7 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
 	 * Cancel UPDATE lock on child (fid1).
 	 */
 	op_data->op_flags |= MF_MDC_CANCEL_FID2;
-	rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX,
+	rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
 			      MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1);
 	if (rc != 0)
 		return rc;
@@ -1907,20 +1931,22 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
-		      const char *old, int oldlen, const char *new, int newlen,
+		      const char *old, size_t oldlen,
+		      const char *new, size_t newlen,
 		      struct ptlrpc_request **request)
 {
 	struct obd_device       *obd = exp->exp_obd;
 	struct lmv_obd	  *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc     *src_tgt;
-	struct lmv_tgt_desc     *tgt_tgt;
 	int			rc;
 
 	LASSERT(oldlen != 0);
 
-	CDEBUG(D_INODE, "RENAME %*s in "DFID" to %*s in "DFID"\n",
-	       oldlen, old, PFID(&op_data->op_fid1),
-	       newlen, new, PFID(&op_data->op_fid2));
+	CDEBUG(D_INODE, "RENAME %.*s in "DFID":%d to %.*s in "DFID":%d\n",
+	       (int)oldlen, old, PFID(&op_data->op_fid1),
+	       op_data->op_mea1 ? op_data->op_mea1->lsm_md_stripe_count : 0,
+	       (int)newlen, new, PFID(&op_data->op_fid2),
+	       op_data->op_mea2 ? op_data->op_mea2->lsm_md_stripe_count : 0);
 
 	rc = lmv_check_connect(obd);
 	if (rc)
@@ -1929,13 +1955,60 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
 	op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
 	op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
 	op_data->op_cap = cfs_curproc_cap_pack();
-	src_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+
+	if (op_data->op_cli_flags & CLI_MIGRATE) {
+		LASSERTF(fid_is_sane(&op_data->op_fid3), "invalid FID "DFID"\n",
+			 PFID(&op_data->op_fid3));
+
+		if (op_data->op_mea1) {
+			struct lmv_stripe_md *lsm = op_data->op_mea1;
+			struct lmv_tgt_desc *tmp;
+
+			/* Fix the parent fid for striped dir */
+			tmp = lmv_locate_target_for_name(lmv, lsm, old,
+							 oldlen,
+							 &op_data->op_fid1,
+							 NULL);
+			if (IS_ERR(tmp))
+				return PTR_ERR(tmp);
+		}
+
+		rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
+		if (rc)
+			return rc;
+		src_tgt = lmv_find_target(lmv, &op_data->op_fid3);
+	} else {
+		if (op_data->op_mea1) {
+			struct lmv_stripe_md *lsm = op_data->op_mea1;
+
+			src_tgt = lmv_locate_target_for_name(lmv, lsm, old,
+							     oldlen,
+							     &op_data->op_fid1,
+							     &op_data->op_mds);
+			if (IS_ERR(src_tgt))
+				return PTR_ERR(src_tgt);
+		} else {
+			src_tgt = lmv_find_target(lmv, &op_data->op_fid1);
+			if (IS_ERR(src_tgt))
+				return PTR_ERR(src_tgt);
+
+			op_data->op_mds = src_tgt->ltd_idx;
+		}
+
+		if (op_data->op_mea2) {
+			struct lmv_stripe_md *lsm = op_data->op_mea2;
+			const struct lmv_oinfo *oinfo;
+
+			oinfo = lsm_name_to_stripe_info(lsm, new, newlen);
+			if (IS_ERR(oinfo))
+				return PTR_ERR(oinfo);
+
+			op_data->op_fid2 = oinfo->lmo_fid;
+		}
+	}
 	if (IS_ERR(src_tgt))
 		return PTR_ERR(src_tgt);
 
-	tgt_tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
-	if (IS_ERR(tgt_tgt))
-		return PTR_ERR(tgt_tgt);
 	/*
 	 * LOOKUP lock on src child (fid3) should also be cancelled for
 	 * src_tgt in mdc_rename.
@@ -1946,35 +2019,53 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
 	 * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its
 	 * own target.
 	 */
-	rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+	rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
 			      LCK_EX, MDS_INODELOCK_UPDATE,
 			      MF_MDC_CANCEL_FID2);
-
+	if (rc)
+		return rc;
 	/*
-	 * Cancel LOOKUP locks on tgt child (fid4) for parent tgt_tgt.
+	 * Cancel LOOKUP locks on source child (fid3) for parent tgt_tgt.
 	 */
-	if (rc == 0) {
-		rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+	if (fid_is_sane(&op_data->op_fid3)) {
+		struct lmv_tgt_desc *tgt;
+
+		tgt = lmv_find_target(lmv, &op_data->op_fid1);
+		if (IS_ERR(tgt))
+			return PTR_ERR(tgt);
+
+		/* Cancel LOOKUP lock on its parent */
+		rc = lmv_early_cancel(exp, tgt, op_data, src_tgt->ltd_idx,
 				      LCK_EX, MDS_INODELOCK_LOOKUP,
-				      MF_MDC_CANCEL_FID4);
+				      MF_MDC_CANCEL_FID3);
+		if (rc)
+			return rc;
+
+		rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
+				      LCK_EX, MDS_INODELOCK_FULL,
+				      MF_MDC_CANCEL_FID3);
+		if (rc)
+			return rc;
 	}
 
 	/*
 	 * Cancel all the locks on tgt child (fid4).
 	 */
-	if (rc == 0)
-		rc = lmv_early_cancel(exp, op_data, src_tgt->ltd_idx,
+	if (fid_is_sane(&op_data->op_fid4))
+		rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
 				      LCK_EX, MDS_INODELOCK_FULL,
 				      MF_MDC_CANCEL_FID4);
 
-	if (rc == 0)
-		rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen,
-			       new, newlen, request);
+	CDEBUG(D_INODE, DFID":m%d to "DFID"\n", PFID(&op_data->op_fid1),
+	       op_data->op_mds, PFID(&op_data->op_fid2));
+
+	rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen,
+		       new, newlen, request);
 	return rc;
 }
 
 static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
-		       void *ea, int ealen, void *ea2, int ea2len,
+		       void *ea, size_t ealen, void *ea2, size_t ea2len,
 		       struct ptlrpc_request **request,
 		       struct md_open_data **mod)
 {
@@ -2021,169 +2112,419 @@ static int lmv_sync(struct obd_export *exp, const struct lu_fid *fid,
 	return rc;
 }
 
-/*
- * Adjust a set of pages, each page containing an array of lu_dirpages,
- * so that each page can be used as a single logical lu_dirpage.
+/**
+ * Get current minimum entry from striped directory
  *
- * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
- * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
- * struct lu_dirent.  It has size up to LU_PAGE_SIZE. The ldp_hash_end
- * value is used as a cookie to request the next lu_dirpage in a
- * directory listing that spans multiple pages (two in this example):
- *   ________
- *  |	|
- * .|--------v-------   -----.
- * |s|e|f|p|ent|ent| ... |ent|
- * '--|--------------   -----'   Each CFS_PAGE contains a single
- *    '------.		   lu_dirpage.
- * .---------v-------   -----.
- * |s|e|f|p|ent| 0 | ... | 0 |
- * '-----------------   -----'
+ * This function will search the dir entry, whose hash value is the
+ * closest(>=) to @hash_offset, from all of sub-stripes, and it is
+ * only being called for striped directory.
  *
- * However, on hosts where the native VM page size (PAGE_SIZE) is
- * larger than LU_PAGE_SIZE, a single host page may contain multiple
- * lu_dirpages. After reading the lu_dirpages from the MDS, the
- * ldp_hash_end of the first lu_dirpage refers to the one immediately
- * after it in the same CFS_PAGE (arrows simplified for brevity, but
- * in general e0==s1, e1==s2, etc.):
+ * \param[in] exp		export of LMV
+ * \param[in] op_data		parameters transferred beween client MD stack
+ *				stripe_information will be included in this
+ *				parameter
+ * \param[in] cb_op		ldlm callback being used in enqueue in
+ *				mdc_read_page
+ * \param[in] hash_offset	the hash value, which is used to locate
+ *				minum(closet) dir entry
+ * \param[in|out] stripe_offset	the caller use this to indicate the stripe
+ *				index of last entry, so to avoid hash conflict
+ *				between stripes. It will also be used to
+ *				return the stripe index of current dir entry.
+ * \param[in|out] entp		the minum entry and it also is being used
+ *				to input the last dir entry to resolve the
+ *				hash conflict
  *
- * .--------------------   -----.
- * |s0|e0|f0|p|ent|ent| ... |ent|
- * |---v----------------   -----|
- * |s1|e1|f1|p|ent|ent| ... |ent|
- * |---v----------------   -----|  Here, each CFS_PAGE contains
- *	     ...		 multiple lu_dirpages.
- * |---v----------------   -----|
- * |s'|e'|f'|p|ent|ent| ... |ent|
- * '---|----------------   -----'
- *     v
- * .----------------------------.
- * |	next CFS_PAGE       |
+ * \param[out] ppage		the page which holds the minum entry
  *
- * This structure is transformed into a single logical lu_dirpage as follows:
+ * \retval			= 0 get the entry successfully
+ *				negative errno (< 0) does not get the entry
+ */
+static int lmv_get_min_striped_entry(struct obd_export *exp,
+				     struct md_op_data *op_data,
+				     struct md_callback *cb_op,
+				     __u64 hash_offset, int *stripe_offset,
+				     struct lu_dirent **entp,
+				     struct page **ppage)
+{
+	struct lmv_stripe_md *lsm = op_data->op_mea1;
+	struct obd_device *obd = exp->exp_obd;
+	struct lmv_obd *lmv = &obd->u.lmv;
+	struct lu_dirent *min_ent = NULL;
+	struct page *min_page = NULL;
+	struct lmv_tgt_desc *tgt;
+	int stripe_count;
+	int min_idx = 0;
+	int rc = 0;
+	int i;
+
+	stripe_count = lsm->lsm_md_stripe_count;
+	for (i = 0; i < stripe_count; i++) {
+		__u64 stripe_hash = hash_offset;
+		struct lu_dirent *ent = NULL;
+		struct page *page = NULL;
+		struct lu_dirpage *dp;
+
+		tgt = lmv_get_target(lmv, lsm->lsm_md_oinfo[i].lmo_mds, NULL);
+		if (IS_ERR(tgt)) {
+			rc = PTR_ERR(tgt);
+			goto out;
+		}
+
+		/*
+		 * op_data will be shared by each stripe, so we need
+		 * reset these value for each stripe
+		 */
+		op_data->op_fid1 = lsm->lsm_md_oinfo[i].lmo_fid;
+		op_data->op_fid2 = lsm->lsm_md_oinfo[i].lmo_fid;
+		op_data->op_data = lsm->lsm_md_oinfo[i].lmo_root;
+next:
+		rc = md_read_page(tgt->ltd_exp, op_data, cb_op, stripe_hash,
+				  &page);
+		if (rc)
+			goto out;
+
+		dp = page_address(page);
+		for (ent = lu_dirent_start(dp); ent;
+		     ent = lu_dirent_next(ent)) {
+			/* Skip dummy entry */
+			if (!le16_to_cpu(ent->lde_namelen))
+				continue;
+
+			if (le64_to_cpu(ent->lde_hash) < hash_offset)
+				continue;
+
+			if (le64_to_cpu(ent->lde_hash) == hash_offset &&
+			    (*entp == ent || i < *stripe_offset))
+				continue;
+
+			/* skip . and .. for other stripes */
+			if (i && (!strncmp(ent->lde_name, ".",
+					   le16_to_cpu(ent->lde_namelen)) ||
+				  !strncmp(ent->lde_name, "..",
+					   le16_to_cpu(ent->lde_namelen))))
+				continue;
+			break;
+		}
+
+		if (!ent) {
+			stripe_hash = le64_to_cpu(dp->ldp_hash_end);
+
+			kunmap(page);
+			put_page(page);
+			page = NULL;
+
+			/*
+			 * reach the end of current stripe, go to next stripe
+			 */
+			if (stripe_hash == MDS_DIR_END_OFF)
+				continue;
+			else
+				goto next;
+		}
+
+		if (min_ent) {
+			if (le64_to_cpu(min_ent->lde_hash) >
+			    le64_to_cpu(ent->lde_hash)) {
+				min_ent = ent;
+				kunmap(min_page);
+				put_page(min_page);
+				min_idx = i;
+				min_page = page;
+			} else {
+				kunmap(page);
+				put_page(page);
+				page = NULL;
+			}
+		} else {
+			min_ent = ent;
+			min_page = page;
+			min_idx = i;
+		}
+	}
+
+out:
+	if (*ppage) {
+		kunmap(*ppage);
+		put_page(*ppage);
+	}
+	*stripe_offset = min_idx;
+	*entp = min_ent;
+	*ppage = min_page;
+	return rc;
+}
+
+/**
+ * Build dir entry page from a striped directory
  *
- * - Replace e0 with e' so the request for the next lu_dirpage gets the page
- *   labeled 'next CFS_PAGE'.
+ * This function gets one entry by @offset from a striped directory. It will
+ * read entries from all of stripes, and choose one closest to the required
+ * offset(&offset). A few notes
+ * 1. skip . and .. for non-zero stripes, because there can only have one .
+ * and .. in a directory.
+ * 2. op_data will be shared by all of stripes, instead of allocating new
+ * one, so need to restore before reusing.
+ * 3. release the entry page if that is not being chosen.
  *
- * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
- *   a hash collision with the next page exists.
+ * \param[in] exp	obd export refer to LMV
+ * \param[in] op_data	hold those MD parameters of read_entry
+ * \param[in] cb_op	ldlm callback being used in enqueue in mdc_read_entry
+ * \param[out] ldp	the entry being read
+ * \param[out] ppage	the page holding the entry. Note: because the entry
+ *			will be accessed in upper layer, so we need hold the
+ *			page until the usages of entry is finished, see
+ *			ll_dir_entry_next.
  *
- * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
- *   to the first entry of the next lu_dirpage.
+ * retval		=0 if get entry successfully
+ *			<0 cannot get entry
  */
-#if PAGE_SIZE > LU_PAGE_SIZE
-static void lmv_adjust_dirpages(struct page **pages, int ncfspgs, int nlupgs)
-{
-	int i;
+static int lmv_read_striped_page(struct obd_export *exp,
+				 struct md_op_data *op_data,
+				 struct md_callback *cb_op,
+				 __u64 offset, struct page **ppage)
+{
+	struct inode *master_inode = op_data->op_data;
+	struct lu_fid master_fid = op_data->op_fid1;
+	struct obd_device *obd = exp->exp_obd;
+	__u64 hash_offset = offset;
+	struct page *min_ent_page = NULL;
+	struct page *ent_page = NULL;
+	struct lu_dirent *min_ent = NULL;
+	struct lu_dirent *last_ent;
+	struct lu_dirent *ent;
+	struct lu_dirpage *dp;
+	size_t left_bytes;
+	int ent_idx = 0;
+	void *area;
+	int rc;
 
-	for (i = 0; i < ncfspgs; i++) {
-		struct lu_dirpage	*dp = kmap(pages[i]);
-		struct lu_dirpage	*first = dp;
-		struct lu_dirent	*end_dirent = NULL;
-		struct lu_dirent	*ent;
-		__u64			hash_end = dp->ldp_hash_end;
-		__u32			flags = dp->ldp_flags;
-
-		while (--nlupgs > 0) {
-			ent = lu_dirent_start(dp);
-			for (end_dirent = ent; ent;
-			     end_dirent = ent, ent = lu_dirent_next(ent))
-				;
-
-			/* Advance dp to next lu_dirpage. */
-			dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
-
-			/* Check if we've reached the end of the CFS_PAGE. */
-			if (!((unsigned long)dp & ~PAGE_MASK))
-				break;
+	rc = lmv_check_connect(obd);
+	if (rc)
+		return rc;
 
-			/* Save the hash and flags of this lu_dirpage. */
-			hash_end = dp->ldp_hash_end;
-			flags = dp->ldp_flags;
+	/*
+	 * Allocate a page and read entries from all of stripes and fill
+	 * the page by hash order
+	 */
+	ent_page = alloc_page(GFP_KERNEL);
+	if (!ent_page)
+		return -ENOMEM;
 
-			/* Check if lu_dirpage contains no entries. */
-			if (!end_dirent)
-				break;
+	/* Initialize the entry page */
+	dp = kmap(ent_page);
+	memset(dp, 0, sizeof(*dp));
+	dp->ldp_hash_start = cpu_to_le64(offset);
+	dp->ldp_flags |= LDF_COLLIDE;
+
+	area = dp + 1;
+	left_bytes = PAGE_SIZE - sizeof(*dp);
+	ent = area;
+	last_ent = ent;
+	do {
+		__u16 ent_size;
+
+		/* Find the minum entry from all sub-stripes */
+		rc = lmv_get_min_striped_entry(exp, op_data, cb_op, hash_offset,
+					       &ent_idx, &min_ent,
+					       &min_ent_page);
+		if (rc)
+			goto out;
 
-			/* Enlarge the end entry lde_reclen from 0 to
-			 * first entry of next lu_dirpage.
-			 */
-			LASSERT(le16_to_cpu(end_dirent->lde_reclen) == 0);
-			end_dirent->lde_reclen =
-				cpu_to_le16((char *)(dp->ldp_entries) -
-					    (char *)end_dirent);
+		/*
+		 * If it can not get minum entry, it means it already reaches
+		 * the end of this directory
+		 */
+		if (!min_ent) {
+			last_ent->lde_reclen = 0;
+			hash_offset = MDS_DIR_END_OFF;
+			goto out;
+		}
+
+		ent_size = le16_to_cpu(min_ent->lde_reclen);
+
+		/*
+		 * the last entry lde_reclen is 0, but it might not
+		 * the end of this entry of this temporay entry
+		 */
+		if (!ent_size)
+			ent_size = lu_dirent_calc_size(
+					le16_to_cpu(min_ent->lde_namelen),
+					le32_to_cpu(min_ent->lde_attrs));
+		if (ent_size > left_bytes) {
+			last_ent->lde_reclen = cpu_to_le16(0);
+			hash_offset = le64_to_cpu(min_ent->lde_hash);
+			goto out;
 		}
 
-		first->ldp_hash_end = hash_end;
-		first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
-		first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
+		memcpy(ent, min_ent, ent_size);
+
+		/*
+		 * Replace . with master FID and Replace .. with the parent FID
+		 * of master object
+		 */
+		if (!strncmp(ent->lde_name, ".",
+			     le16_to_cpu(ent->lde_namelen)) &&
+		    le16_to_cpu(ent->lde_namelen) == 1)
+			fid_cpu_to_le(&ent->lde_fid, &master_fid);
+		else if (!strncmp(ent->lde_name, "..",
+				  le16_to_cpu(ent->lde_namelen)) &&
+			 le16_to_cpu(ent->lde_namelen) == 2)
+			fid_cpu_to_le(&ent->lde_fid, &op_data->op_fid3);
+
+		left_bytes -= ent_size;
+		ent->lde_reclen = cpu_to_le16(ent_size);
+		last_ent = ent;
+		ent = (void *)ent + ent_size;
+		hash_offset = le64_to_cpu(min_ent->lde_hash);
+		if (hash_offset == MDS_DIR_END_OFF) {
+			last_ent->lde_reclen = 0;
+			break;
+		}
+	} while (1);
+out:
+	if (min_ent_page) {
+		kunmap(min_ent_page);
+		put_page(min_ent_page);
+	}
 
-		kunmap(pages[i]);
+	if (unlikely(rc)) {
+		__free_page(ent_page);
+		ent_page = NULL;
+	} else {
+		if (ent == area)
+			dp->ldp_flags |= LDF_EMPTY;
+		dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
+		dp->ldp_hash_end = cpu_to_le64(hash_offset);
 	}
-	LASSERTF(nlupgs == 0, "left = %d", nlupgs);
+
+	/*
+	 * We do not want to allocate md_op_data during each
+	 * dir entry reading, so op_data will be shared by every stripe,
+	 * then we need to restore it back to original value before
+	 * return to the upper layer
+	 */
+	op_data->op_fid1 = master_fid;
+	op_data->op_fid2 = master_fid;
+	op_data->op_data = master_inode;
+
+	*ppage = ent_page;
+
+	return rc;
 }
-#else
-#define lmv_adjust_dirpages(pages, ncfspgs, nlupgs) do {} while (0)
-#endif	/* PAGE_SIZE > LU_PAGE_SIZE */
 
-static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
-			struct page **pages, struct ptlrpc_request **request)
+static int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data,
+			 struct md_callback *cb_op, __u64 offset,
+			 struct page **ppage)
 {
-	struct obd_device	*obd = exp->exp_obd;
-	struct lmv_obd		*lmv = &obd->u.lmv;
-	__u64			offset = op_data->op_offset;
-	int			rc;
-	int			ncfspgs; /* pages read in PAGE_SIZE */
-	int			nlupgs; /* pages read in LU_PAGE_SIZE */
-	struct lmv_tgt_desc	*tgt;
+	struct lmv_stripe_md *lsm = op_data->op_mea1;
+	struct obd_device *obd = exp->exp_obd;
+	struct lmv_obd *lmv = &obd->u.lmv;
+	struct lmv_tgt_desc *tgt;
+	int rc;
 
 	rc = lmv_check_connect(obd);
 	if (rc)
 		return rc;
 
-	CDEBUG(D_INODE, "READPAGE at %#llx from "DFID"\n",
-	       offset, PFID(&op_data->op_fid1));
+	if (unlikely(lsm)) {
+		rc = lmv_read_striped_page(exp, op_data, cb_op, offset, ppage);
+		return rc;
+	}
 
 	tgt = lmv_find_target(lmv, &op_data->op_fid1);
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
-	rc = md_readpage(tgt->ltd_exp, op_data, pages, request);
-	if (rc != 0)
-		return rc;
-
-	ncfspgs = ((*request)->rq_bulk->bd_nob_transferred + PAGE_SIZE - 1)
-		 >> PAGE_SHIFT;
-	nlupgs = (*request)->rq_bulk->bd_nob_transferred >> LU_PAGE_SHIFT;
-	LASSERT(!((*request)->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
-	LASSERT(ncfspgs > 0 && ncfspgs <= op_data->op_npages);
-
-	CDEBUG(D_INODE, "read %d(%d)/%d pages\n", ncfspgs, nlupgs,
-	       op_data->op_npages);
-
-	lmv_adjust_dirpages(pages, ncfspgs, nlupgs);
+	rc = md_read_page(tgt->ltd_exp, op_data, cb_op, offset, ppage);
 
 	return rc;
 }
 
+/**
+ * Unlink a file/directory
+ *
+ * Unlink a file or directory under the parent dir. The unlink request
+ * usually will be sent to the MDT where the child is located, but if
+ * the client does not have the child FID then request will be sent to the
+ * MDT where the parent is located.
+ *
+ * If the parent is a striped directory then it also needs to locate which
+ * stripe the name of the child is located, and replace the parent FID
+ * (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
+ * it will walk through all of sub-stripes until the child is being
+ * unlinked finally.
+ *
+ * \param[in] exp	export refer to LMV
+ * \param[in] op_data	different parameters transferred beween client
+ *			MD stacks, name, namelen, FIDs etc.
+ *			op_fid1 is the parent FID, op_fid2 is the child
+ *			FID.
+ * \param[out] request point to the request of unlink.
+ *
+ * retval		0 if succeed
+ *			negative errno if failed.
+ */
 static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
 		      struct ptlrpc_request **request)
 {
-	struct obd_device       *obd = exp->exp_obd;
+	struct lmv_stripe_md *lsm = op_data->op_mea1;
+	struct obd_device    *obd = exp->exp_obd;
 	struct lmv_obd	  *lmv = &obd->u.lmv;
+	struct lmv_tgt_desc *parent_tgt = NULL;
 	struct lmv_tgt_desc     *tgt = NULL;
 	struct mdt_body		*body;
+	int stripe_index = 0;
 	int		     rc;
 
 	rc = lmv_check_connect(obd);
 	if (rc)
 		return rc;
-retry:
+retry_unlink:
+	/* For striped dir, we need to locate the parent as well */
+	if (lsm) {
+		struct lmv_tgt_desc *tmp;
+
+		LASSERT(op_data->op_name && op_data->op_namelen);
+
+		tmp = lmv_locate_target_for_name(lmv, lsm,
+						 op_data->op_name,
+						 op_data->op_namelen,
+						 &op_data->op_fid1,
+						 &op_data->op_mds);
+
+		/*
+		 * return -EBADFD means unknown hash type, might
+		 * need try all sub-stripe here
+		 */
+		if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD)
+			return PTR_ERR(tmp);
+
+		/*
+		 * Note: both migrating dir and unknown hash dir need to
+		 * try all of sub-stripes, so we need start search the
+		 * name from stripe 0, but migrating dir is already handled
+		 * inside lmv_locate_target_for_name(), so we only check
+		 * unknown hash type directory here
+		 */
+		if (!lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
+			struct lmv_oinfo *oinfo;
+
+			oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+			op_data->op_fid1 = oinfo->lmo_fid;
+			op_data->op_mds = oinfo->lmo_mds;
+		}
+	}
+
+try_next_stripe:
 	/* Send unlink requests to the MDT where the child is located */
 	if (likely(!fid_is_zero(&op_data->op_fid2)))
-		tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2);
+		tgt = lmv_find_target(lmv, &op_data->op_fid2);
+	else if (lsm)
+		tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
 	else
 		tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
+
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
@@ -2203,29 +2544,57 @@ retry:
 	/*
 	 * Cancel FULL locks on child (fid3).
 	 */
-	rc = lmv_early_cancel(exp, op_data, tgt->ltd_idx, LCK_EX,
-			      MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3);
+	parent_tgt = lmv_find_target(lmv, &op_data->op_fid1);
+	if (IS_ERR(parent_tgt))
+		return PTR_ERR(parent_tgt);
 
+	if (parent_tgt != tgt) {
+		rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx,
+				      LCK_EX, MDS_INODELOCK_LOOKUP,
+				      MF_MDC_CANCEL_FID3);
+	}
+
+	rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX,
+			      MDS_INODELOCK_FULL, MF_MDC_CANCEL_FID3);
 	if (rc != 0)
 		return rc;
 
-	CDEBUG(D_INODE, "unlink with fid="DFID"/"DFID" -> mds #%d\n",
+	CDEBUG(D_INODE, "unlink with fid=" DFID "/" DFID " -> mds #%u\n",
 	       PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
 
 	rc = md_unlink(tgt->ltd_exp, op_data, request);
-	if (rc != 0 && rc != -EREMOTE)
+	if (rc != 0 && rc != -EREMOTE  && rc != -ENOENT)
 		return rc;
 
+	/* Try next stripe if it is needed. */
+	if (rc == -ENOENT && lsm && lmv_need_try_all_stripes(lsm)) {
+		struct lmv_oinfo *oinfo;
+
+		stripe_index++;
+		if (stripe_index >= lsm->lsm_md_stripe_count)
+			return rc;
+
+		oinfo = &lsm->lsm_md_oinfo[stripe_index];
+
+		op_data->op_fid1 = oinfo->lmo_fid;
+		op_data->op_mds = oinfo->lmo_mds;
+
+		ptlrpc_req_finished(*request);
+		*request = NULL;
+
+		goto try_next_stripe;
+	}
+
 	body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
 	if (!body)
 		return -EPROTO;
 
 	/* Not cross-ref case, just get out of here. */
-	if (likely(!(body->valid & OBD_MD_MDS)))
-		return 0;
+	if (likely(!(body->mbo_valid & OBD_MD_MDS)))
+		return rc;
 
 	CDEBUG(D_INODE, "%s: try unlink to another MDT for "DFID"\n",
-	       exp->exp_obd->obd_name, PFID(&body->fid1));
+	       exp->exp_obd->obd_name, PFID(&body->mbo_fid1));
 
 	/* This is a remote object, try remote MDT, Note: it may
 	 * try more than 1 time here, Considering following case
@@ -2247,11 +2616,11 @@ retry:
 	 * In theory, it might try unlimited time here, but it should
 	 * be very rare case.
 	 */
-	op_data->op_fid2 = body->fid1;
+	op_data->op_fid2 = body->mbo_fid1;
 	ptlrpc_req_finished(*request);
 	*request = NULL;
 
-	goto retry;
+	goto retry_unlink;
 }
 
 static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
@@ -2274,6 +2643,22 @@ static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
 	return 0;
 }
 
+/**
+ * Get by key a value associated with a LMV device.
+ *
+ * Dispatch request to lower-layer devices as needed.
+ *
+ * \param[in]  env	execution environment for this thread
+ * \param[in]  exp	export for the LMV device
+ * \param[in]  keylen	length of key identifier
+ * \param[in]  key	identifier of key to get value for
+ * \param[in]  vallen	size of \a val
+ * \param[out] val	pointer to storage location for value
+ * \param[in]  lsm	optional striping metadata of object
+ *
+ * \retval 0		on success
+ * \retval negative	negated errno on failure
+ */
 static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 			__u32 keylen, void *key, __u32 *vallen, void *val,
 			struct lov_stripe_md *lsm)
@@ -2337,6 +2722,22 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp,
 	return -EINVAL;
 }
 
+/**
+ * Asynchronously set by key a value associated with a LMV device.
+ *
+ * Dispatch request to lower-layer devices as needed.
+ *
+ * \param[in] env	execution environment for this thread
+ * \param[in] exp	export for the LMV device
+ * \param[in] keylen	length of key identifier
+ * \param[in] key	identifier of key to store value for
+ * \param[in] vallen	size of value to store
+ * \param[in] val	pointer to data to be stored
+ * \param[in] set	optional list of related ptlrpc requests
+ *
+ * \retval 0		on success
+ * \retval negative	negated errno on failure
+ */
 static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
 			      u32 keylen, void *key, u32 vallen,
 			      void *val, struct ptlrpc_request_set *set)
@@ -2354,7 +2755,8 @@ static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
 	}
 	lmv = &obd->u.lmv;
 
-	if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX)) {
+	if (KEY_IS(KEY_READ_ONLY) || KEY_IS(KEY_FLUSH_CTX) ||
+	    KEY_IS(KEY_DEFAULT_EASIZE)) {
 		int i, err = 0;
 
 		for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
@@ -2375,105 +2777,247 @@ static int lmv_set_info_async(const struct lu_env *env, struct obd_export *exp,
 	return -EINVAL;
 }
 
-static int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
-		      struct lov_stripe_md *lsm)
+static int lmv_pack_md_v1(const struct lmv_stripe_md *lsm,
+			  struct lmv_mds_md_v1 *lmm1)
 {
-	struct obd_device	 *obd = class_exp2obd(exp);
-	struct lmv_obd	    *lmv = &obd->u.lmv;
-	struct lmv_stripe_md      *meap;
-	struct lmv_stripe_md      *lsmp;
-	int			mea_size;
-	int			i;
+	int cplen;
+	int i;
+
+	lmm1->lmv_magic = cpu_to_le32(lsm->lsm_md_magic);
+	lmm1->lmv_stripe_count = cpu_to_le32(lsm->lsm_md_stripe_count);
+	lmm1->lmv_master_mdt_index = cpu_to_le32(lsm->lsm_md_master_mdt_index);
+	lmm1->lmv_hash_type = cpu_to_le32(lsm->lsm_md_hash_type);
+	cplen = strlcpy(lmm1->lmv_pool_name, lsm->lsm_md_pool_name,
+			sizeof(lmm1->lmv_pool_name));
+	if (cplen >= sizeof(lmm1->lmv_pool_name))
+		return -E2BIG;
+
+	for (i = 0; i < lsm->lsm_md_stripe_count; i++)
+		fid_cpu_to_le(&lmm1->lmv_stripe_fids[i],
+			      &lsm->lsm_md_oinfo[i].lmo_fid);
+	return 0;
+}
 
-	mea_size = lmv_get_easize(lmv);
-	if (!lmmp)
-		return mea_size;
+static int
+lmv_pack_md(union lmv_mds_md **lmmp, const struct lmv_stripe_md *lsm,
+	    int stripe_count)
+{
+	int lmm_size = 0, rc = 0;
+	bool allocated = false;
 
+	LASSERT(lmmp);
+
+	/* Free lmm */
 	if (*lmmp && !lsm) {
+		int stripe_cnt;
+
+		stripe_cnt = lmv_mds_md_stripe_count_get(*lmmp);
+		lmm_size = lmv_mds_md_size(stripe_cnt,
+					   le32_to_cpu((*lmmp)->lmv_magic));
+		if (!lmm_size)
+			return -EINVAL;
 		kvfree(*lmmp);
 		*lmmp = NULL;
 		return 0;
 	}
 
+	/* Alloc lmm */
+	if (!*lmmp && !lsm) {
+		lmm_size = lmv_mds_md_size(stripe_count, LMV_MAGIC);
+		LASSERT(lmm_size > 0);
+		*lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
+		if (!*lmmp)
+			return -ENOMEM;
+		lmv_mds_md_stripe_count_set(*lmmp, stripe_count);
+		(*lmmp)->lmv_magic = cpu_to_le32(LMV_MAGIC);
+		return lmm_size;
+	}
+
+	/* pack lmm */
+	LASSERT(lsm);
+	lmm_size = lmv_mds_md_size(lsm->lsm_md_stripe_count,
+				   lsm->lsm_md_magic);
 	if (!*lmmp) {
-		*lmmp = libcfs_kvzalloc(mea_size, GFP_NOFS);
+		*lmmp = libcfs_kvzalloc(lmm_size, GFP_NOFS);
 		if (!*lmmp)
 			return -ENOMEM;
+		allocated = true;
 	}
 
-	if (!lsm)
-		return mea_size;
+	switch (lsm->lsm_md_magic) {
+	case LMV_MAGIC_V1:
+		rc = lmv_pack_md_v1(lsm, &(*lmmp)->lmv_md_v1);
+		break;
+	default:
+		rc = -EINVAL;
+		break;
+	}
 
-	lsmp = (struct lmv_stripe_md *)lsm;
-	meap = (struct lmv_stripe_md *)*lmmp;
+	if (rc && allocated) {
+		kvfree(*lmmp);
+		*lmmp = NULL;
+	}
 
-	if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
-	    lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
-		return -EINVAL;
+	return lmm_size;
+}
 
-	meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
-	meap->mea_count = cpu_to_le32(lsmp->mea_count);
-	meap->mea_master = cpu_to_le32(lsmp->mea_master);
+static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
+			    const struct lmv_mds_md_v1 *lmm1)
+{
+	struct lmv_obd *lmv = &exp->exp_obd->u.lmv;
+	int stripe_count;
+	int rc = 0;
+	int cplen;
+	int i;
 
-	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		meap->mea_ids[i] = lsmp->mea_ids[i];
-		fid_cpu_to_le(&meap->mea_ids[i], &lsmp->mea_ids[i]);
+	lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic);
+	lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
+	lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index);
+	if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE))
+		lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN;
+	else
+		lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
+	lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version);
+	cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name,
+			sizeof(lsm->lsm_md_pool_name));
+
+	if (cplen >= sizeof(lsm->lsm_md_pool_name))
+		return -E2BIG;
+
+	CDEBUG(D_INFO, "unpack lsm count %d, master %d hash_type %d layout_version %d\n",
+	       lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index,
+	       lsm->lsm_md_hash_type, lsm->lsm_md_layout_version);
+
+	stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
+	for (i = 0; i < stripe_count; i++) {
+		fid_le_to_cpu(&lsm->lsm_md_oinfo[i].lmo_fid,
+			      &lmm1->lmv_stripe_fids[i]);
+		rc = lmv_fld_lookup(lmv, &lsm->lsm_md_oinfo[i].lmo_fid,
+				    &lsm->lsm_md_oinfo[i].lmo_mds);
+		if (rc)
+			return rc;
+		CDEBUG(D_INFO, "unpack fid #%d "DFID"\n", i,
+		       PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
 	}
 
-	return mea_size;
+	return rc;
 }
 
-static int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
-			struct lov_mds_md *lmm, int lmm_size)
+int lmv_unpack_md(struct obd_export *exp, struct lmv_stripe_md **lsmp,
+		  const union lmv_mds_md *lmm, int stripe_count)
 {
-	struct obd_device	  *obd = class_exp2obd(exp);
-	struct lmv_stripe_md      **tmea = (struct lmv_stripe_md **)lsmp;
-	struct lmv_stripe_md       *mea = (struct lmv_stripe_md *)lmm;
-	struct lmv_obd	     *lmv = &obd->u.lmv;
-	int			 mea_size;
-	int			 i;
-	__u32		       magic;
+	struct lmv_stripe_md *lsm;
+	bool allocated = false;
+	int lsm_size, rc;
 
-	mea_size = lmv_get_easize(lmv);
-	if (!lsmp)
-		return mea_size;
+	LASSERT(lsmp);
 
-	if (*lsmp && !lmm) {
-		kvfree(*tmea);
+	lsm = *lsmp;
+	/* Free memmd */
+	if (lsm && !lmm) {
+		int i;
+
+		for (i = 1; i < lsm->lsm_md_stripe_count; i++) {
+			/*
+			 * For migrating inode, the master stripe and master
+			 * object will be the same, so do not need iput, see
+			 * ll_update_lsm_md
+			 */
+			if (!(lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION &&
+			      !i) && lsm->lsm_md_oinfo[i].lmo_root)
+				iput(lsm->lsm_md_oinfo[i].lmo_root);
+		}
+
+		kvfree(lsm);
 		*lsmp = NULL;
 		return 0;
 	}
 
-	LASSERT(mea_size == lmm_size);
+	/* Alloc memmd */
+	if (!lsm && !lmm) {
+		lsm_size = lmv_stripe_md_size(stripe_count);
+		lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS);
+		if (!lsm)
+			return -ENOMEM;
+		lsm->lsm_md_stripe_count = stripe_count;
+		*lsmp = lsm;
+		return 0;
+	}
 
-	*tmea = libcfs_kvzalloc(mea_size, GFP_NOFS);
-	if (!*tmea)
-		return -ENOMEM;
+	if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE)
+		return -EPERM;
 
-	if (!lmm)
-		return mea_size;
+	/* Unpack memmd */
+	if (le32_to_cpu(lmm->lmv_magic) != LMV_MAGIC_V1 &&
+	    le32_to_cpu(lmm->lmv_magic) != LMV_USER_MAGIC) {
+		CERROR("%s: invalid lmv magic %x: rc = %d\n",
+		       exp->exp_obd->obd_name, le32_to_cpu(lmm->lmv_magic),
+		       -EIO);
+		return -EIO;
+	}
 
-	if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
-	    mea->mea_magic == MEA_MAGIC_ALL_CHARS ||
-	    mea->mea_magic == MEA_MAGIC_HASH_SEGMENT) {
-		magic = le32_to_cpu(mea->mea_magic);
-	} else {
-		/*
-		 * Old mea is not handled here.
+	if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_V1)
+		lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm));
+	else
+		/**
+		 * Unpack default dirstripe(lmv_user_md) to lmv_stripe_md,
+		 * stripecount should be 0 then.
 		 */
-		CERROR("Old not supportable EA is found\n");
-		LBUG();
+		lsm_size = lmv_stripe_md_size(0);
+
+	if (!lsm) {
+		lsm = libcfs_kvzalloc(lsm_size, GFP_NOFS);
+		if (!lsm)
+			return -ENOMEM;
+		allocated = true;
+		*lsmp = lsm;
+	}
+
+	switch (le32_to_cpu(lmm->lmv_magic)) {
+	case LMV_MAGIC_V1:
+		rc = lmv_unpack_md_v1(exp, lsm, &lmm->lmv_md_v1);
+		break;
+	default:
+		CERROR("%s: unrecognized magic %x\n", exp->exp_obd->obd_name,
+		       le32_to_cpu(lmm->lmv_magic));
+		rc = -EINVAL;
+		break;
 	}
 
-	(*tmea)->mea_magic = magic;
-	(*tmea)->mea_count = le32_to_cpu(mea->mea_count);
-	(*tmea)->mea_master = le32_to_cpu(mea->mea_master);
+	if (rc && allocated) {
+		kvfree(lsm);
+		*lsmp = NULL;
+		lsm_size = rc;
+	}
+	return lsm_size;
+}
+EXPORT_SYMBOL(lmv_unpack_md);
+
+static int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+			struct lov_mds_md *lmm, int disk_len)
+{
+	return lmv_unpack_md(exp, (struct lmv_stripe_md **)lsmp,
+			     (union lmv_mds_md *)lmm, disk_len);
+}
+
+static int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+		      struct lov_stripe_md *lsm)
+{
+	const struct lmv_stripe_md *lmv = (struct lmv_stripe_md *)lsm;
+	struct obd_device *obd = exp->exp_obd;
+	struct lmv_obd *lmv_obd = &obd->u.lmv;
+	int stripe_count;
 
-	for (i = 0; i < (*tmea)->mea_count; i++) {
-		(*tmea)->mea_ids[i] = mea->mea_ids[i];
-		fid_le_to_cpu(&(*tmea)->mea_ids[i], &(*tmea)->mea_ids[i]);
+	if (!lmmp) {
+		if (lsm)
+			stripe_count = lmv->lsm_md_stripe_count;
+		else
+			stripe_count = lmv_obd->desc.ld_tgt_count;
+
+		return lmv_mds_md_size(stripe_count, LMV_MAGIC_V1);
 	}
-	return mea_size;
+
+	return lmv_pack_md((union lmv_mds_md **)lmmp, lmv, 0);
 }
 
 static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
@@ -2484,7 +3028,7 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
 	struct lmv_obd	  *lmv = &obd->u.lmv;
 	int		      rc = 0;
 	int		      err;
-	int		      i;
+	u32 i;
 
 	LASSERT(fid);
 
@@ -2502,8 +3046,9 @@ static int lmv_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
 	return rc;
 }
 
-static int lmv_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
-			     __u64 *bits)
+static int lmv_set_lock_data(struct obd_export *exp,
+			     const struct lustre_handle *lockh,
+			     void *data, __u64 *bits)
 {
 	struct lmv_obd	  *lmv = &exp->exp_obd->u.lmv;
 	struct lmv_tgt_desc *tgt = lmv->tgts[0];
@@ -2526,24 +3071,32 @@ static enum ldlm_mode lmv_lock_match(struct obd_export *exp, __u64 flags,
 	struct obd_device       *obd = exp->exp_obd;
 	struct lmv_obd	  *lmv = &obd->u.lmv;
 	enum ldlm_mode	      rc;
-	int		      i;
+	int tgt;
+	u32 i;
 
 	CDEBUG(D_INODE, "Lock match for "DFID"\n", PFID(fid));
 
 	/*
-	 * With CMD every object can have two locks in different namespaces:
-	 * lookup lock in space of mds storing direntry and update/open lock in
-	 * space of mds storing inode. Thus we check all targets, not only that
-	 * one fid was created in.
+	 * With DNE every object can have two locks in different namespaces:
+	 * lookup lock in space of MDT storing direntry and update/open lock in
+	 * space of MDT storing inode.  Try the MDT that the FID maps to first,
+	 * since this can be easily found, and only try others if that fails.
 	 */
-	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
-		struct lmv_tgt_desc *tgt = lmv->tgts[i];
+	for (i = 0, tgt = lmv_find_target_index(lmv, fid);
+	     i < lmv->desc.ld_tgt_count;
+	     i++, tgt = (tgt + 1) % lmv->desc.ld_tgt_count) {
+		if (tgt < 0) {
+			CDEBUG(D_HA, "%s: "DFID" is inaccessible: rc = %d\n",
+			       obd->obd_name, PFID(fid), tgt);
+			tgt = 0;
+		}
 
-		if (!tgt || !tgt->ltd_exp || !tgt->ltd_active)
+		if (!lmv->tgts[tgt] || !lmv->tgts[tgt]->ltd_exp ||
+		    !lmv->tgts[tgt]->ltd_active)
 			continue;
 
-		rc = md_lock_match(tgt->ltd_exp, flags, fid, type, policy, mode,
-				   lockh);
+		rc = md_lock_match(lmv->tgts[tgt]->ltd_exp, flags, fid,
+				   type, policy, mode, lockh);
 		if (rc)
 			return rc;
 	}
@@ -2571,8 +3124,10 @@ static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
 	struct lmv_obd	  *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc *tgt = lmv->tgts[0];
 
-	if (md->mea)
-		obd_free_memmd(exp, (void *)&md->mea);
+	if (md->lmv) {
+		lmv_free_memmd(md->lmv);
+		md->lmv = NULL;
+	}
 	if (!tgt || !tgt->ltd_exp)
 		return -EINVAL;
 	return md_free_lustre_md(tgt->ltd_exp, md);
@@ -2621,7 +3176,7 @@ static int lmv_intent_getattr_async(struct obd_export *exp,
 	if (rc)
 		return rc;
 
-	tgt = lmv_find_target(lmv, &op_data->op_fid1);
+	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
 	if (IS_ERR(tgt))
 		return PTR_ERR(tgt);
 
@@ -2649,6 +3204,23 @@ static int lmv_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 	return rc;
 }
 
+static int
+lmv_get_fid_from_lsm(struct obd_export *exp,
+		     const struct lmv_stripe_md *lsm,
+		     const char *name, int namelen, struct lu_fid *fid)
+{
+	const struct lmv_oinfo *oinfo;
+
+	LASSERT(lsm);
+	oinfo = lsm_name_to_stripe_info(lsm, name, namelen);
+	if (IS_ERR(oinfo))
+		return PTR_ERR(oinfo);
+
+	*fid = oinfo->lmo_fid;
+
+	return 0;
+}
+
 /**
  * For lmv, only need to send request to master MDT, and the master MDT will
  * process with other slave MDTs. The only exception is Q_GETOQUOTA for which
@@ -2660,8 +3232,9 @@ static int lmv_quotactl(struct obd_device *unused, struct obd_export *exp,
 	struct obd_device   *obd = class_exp2obd(exp);
 	struct lmv_obd      *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc *tgt = lmv->tgts[0];
-	int		  rc = 0, i;
+	int rc = 0;
 	__u64 curspace = 0, curinodes = 0;
+	u32 i;
 
 	if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
 	    !lmv->desc.ld_tgt_count) {
@@ -2704,7 +3277,8 @@ static int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp,
 	struct obd_device   *obd = class_exp2obd(exp);
 	struct lmv_obd      *lmv = &obd->u.lmv;
 	struct lmv_tgt_desc *tgt;
-	int		  i, rc = 0;
+	int rc = 0;
+	u32 i;
 
 	for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
 		int err;
@@ -2723,6 +3297,47 @@ static int lmv_quotacheck(struct obd_device *unused, struct obd_export *exp,
 	return rc;
 }
 
+static int lmv_merge_attr(struct obd_export *exp,
+			  const struct lmv_stripe_md *lsm,
+			  struct cl_attr *attr,
+			  ldlm_blocking_callback cb_blocking)
+{
+	int rc, i;
+
+	rc = lmv_revalidate_slaves(exp, lsm, cb_blocking, 0);
+	if (rc < 0)
+		return rc;
+
+	for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
+		struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root;
+
+		CDEBUG(D_INFO, ""DFID" size %llu, blocks %llu nlink %u, atime %lu ctime %lu, mtime %lu.\n",
+		       PFID(&lsm->lsm_md_oinfo[i].lmo_fid),
+		       i_size_read(inode), (unsigned long long)inode->i_blocks,
+		       inode->i_nlink, LTIME_S(inode->i_atime),
+		       LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime));
+
+		/* for slave stripe, it needs to subtract nlink for . and .. */
+		if (i)
+			attr->cat_nlink += inode->i_nlink - 2;
+		else
+			attr->cat_nlink = inode->i_nlink;
+
+		attr->cat_size += i_size_read(inode);
+		attr->cat_blocks += inode->i_blocks;
+
+		if (attr->cat_atime < LTIME_S(inode->i_atime))
+			attr->cat_atime = LTIME_S(inode->i_atime);
+
+		if (attr->cat_ctime < LTIME_S(inode->i_ctime))
+			attr->cat_ctime = LTIME_S(inode->i_ctime);
+
+		if (attr->cat_mtime < LTIME_S(inode->i_mtime))
+			attr->cat_mtime = LTIME_S(inode->i_mtime);
+	}
+	return 0;
+}
+
 static struct obd_ops lmv_obd_ops = {
 	.owner		= THIS_MODULE,
 	.setup		= lmv_setup,
@@ -2746,7 +3361,6 @@ static struct obd_ops lmv_obd_ops = {
 static struct md_ops lmv_md_ops = {
 	.getstatus		= lmv_getstatus,
 	.null_inode		= lmv_null_inode,
-	.find_cbdata		= lmv_find_cbdata,
 	.close			= lmv_close,
 	.create			= lmv_create,
 	.done_writing		= lmv_done_writing,
@@ -2760,7 +3374,7 @@ static struct md_ops lmv_md_ops = {
 	.setattr		= lmv_setattr,
 	.setxattr		= lmv_setxattr,
 	.sync			= lmv_sync,
-	.readpage		= lmv_readpage,
+	.read_page		= lmv_read_page,
 	.unlink			= lmv_unlink,
 	.init_ea_size		= lmv_init_ea_size,
 	.cancel_unused		= lmv_cancel_unused,
@@ -2768,10 +3382,12 @@ static struct md_ops lmv_md_ops = {
 	.lock_match		= lmv_lock_match,
 	.get_lustre_md		= lmv_get_lustre_md,
 	.free_lustre_md		= lmv_free_lustre_md,
+	.merge_attr		= lmv_merge_attr,
 	.set_open_replay_data	= lmv_set_open_replay_data,
 	.clear_open_replay_data	= lmv_clear_open_replay_data,
 	.intent_getattr_async	= lmv_intent_getattr_async,
-	.revalidate_lock	= lmv_revalidate_lock
+	.revalidate_lock	= lmv_revalidate_lock,
+	.get_fid_from_lsm	= lmv_get_fid_from_lsm,
 };
 
 static int __init lmv_init(void)
diff --git a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
index c29c361eb0cc..20bbdfc21d15 100644
--- a/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
+++ b/drivers/staging/lustre/lustre/lmv/lproc_lmv.c
@@ -169,7 +169,7 @@ static int lmv_tgt_seq_show(struct seq_file *p, void *v)
 
 	if (!tgt)
 		return 0;
-	seq_printf(p, "%d: %s %sACTIVE\n",
+	seq_printf(p, "%u: %s %sACTIVE\n",
 		   tgt->ltd_idx, tgt->ltd_uuid.uuid,
 		   tgt->ltd_active ? "" : "IN");
 	return 0;
@@ -202,7 +202,7 @@ static struct lprocfs_vars lprocfs_lmv_obd_vars[] = {
 	{ NULL }
 };
 
-struct file_operations lmv_proc_target_fops = {
+const struct file_operations lmv_proc_target_fops = {
 	.owner		= THIS_MODULE,
 	.open		 = lmv_target_seq_open,
 	.read		 = seq_read,
diff --git a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
index 9740568d9521..4d2b7d303fea 100644
--- a/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_cl_internal.h
@@ -289,8 +289,8 @@ struct lov_lock {
 };
 
 struct lov_page {
-	struct cl_page_slice lps_cl;
-	int		  lps_invalid;
+	struct cl_page_slice	lps_cl;
+	unsigned int		lps_stripe; /* stripe index */
 };
 
 /*
@@ -556,6 +556,8 @@ struct lov_lock_link *lov_lock_link_find(const struct lu_env *env,
 					 struct lovsub_lock *sub);
 struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
 				  const struct cl_page_slice *slice);
+
+struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov);
 int lov_page_stripe(const struct cl_page *page);
 
 #define lov_foreach_target(lov, var)		    \
@@ -742,11 +744,15 @@ static inline struct lov_thread_info *lov_env_info(const struct lu_env *env)
 static inline struct lov_layout_raid0 *lov_r0(struct lov_object *lov)
 {
 	LASSERT(lov->lo_type == LLT_RAID0);
-	LASSERT(lov->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC ||
-		lov->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC_V3);
+	LASSERT(lov->lo_lsm->lsm_magic == LOV_MAGIC ||
+		lov->lo_lsm->lsm_magic == LOV_MAGIC_V3);
 	return &lov->u.raid0;
 }
 
+/* lov_pack.c */
+int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
+		  struct lov_user_md __user *lump);
+
 /** @} lov */
 
 #endif
diff --git a/drivers/staging/lustre/lustre/lov/lov_dev.c b/drivers/staging/lustre/lustre/lov/lov_dev.c
index b1f260d43bc7..056ae2ed88e8 100644
--- a/drivers/staging/lustre/lustre/lov/lov_dev.c
+++ b/drivers/staging/lustre/lustre/lov/lov_dev.c
@@ -516,6 +516,5 @@ struct lu_device_type lov_device_type = {
 	.ldt_ops      = &lov_device_type_ops,
 	.ldt_ctx_tags = LCT_CL_THREAD
 };
-EXPORT_SYMBOL(lov_device_type);
 
 /** @} lov */
diff --git a/drivers/staging/lustre/lustre/lov/lov_ea.c b/drivers/staging/lustre/lustre/lov/lov_ea.c
index 5053dead17bb..214c561767e0 100644
--- a/drivers/staging/lustre/lustre/lov/lov_ea.c
+++ b/drivers/staging/lustre/lustre/lov/lov_ea.c
@@ -66,7 +66,8 @@ static int lsm_lmm_verify_common(struct lov_mds_md *lmm, int lmm_bytes,
 	}
 
 	if (lmm->lmm_stripe_size == 0 ||
-	    (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
+	    (le32_to_cpu(lmm->lmm_stripe_size) &
+	     (LOV_MIN_STRIPE_SIZE - 1)) != 0) {
 		CERROR("bad stripe size %u\n",
 		       le32_to_cpu(lmm->lmm_stripe_size));
 		lov_dump_lmm_common(D_WARNING, lmm);
@@ -146,21 +147,15 @@ lsm_stripe_by_offset_plain(struct lov_stripe_md *lsm, int *stripeno,
 		*swidth = (u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count;
 }
 
-static int lsm_destroy_plain(struct lov_stripe_md *lsm, struct obdo *oa,
-			     struct obd_export *md_exp)
-{
-	return 0;
-}
-
 /* Find minimum stripe maxbytes value.  For inactive or
- * reconnecting targets use LUSTRE_STRIPE_MAXBYTES.
+ * reconnecting targets use LUSTRE_EXT3_STRIPE_MAXBYTES.
  */
 static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes)
 {
 	struct obd_import *imp = tgt->ltd_obd->u.cli.cl_import;
 
 	if (!imp || !tgt->ltd_active) {
-		*stripe_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+		*stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
 		return;
 	}
 
@@ -171,7 +166,7 @@ static void lov_tgt_maxbytes(struct lov_tgt_desc *tgt, __u64 *stripe_maxbytes)
 		if (*stripe_maxbytes > imp->imp_connect_data.ocd_maxbytes)
 			*stripe_maxbytes = imp->imp_connect_data.ocd_maxbytes;
 	} else {
-		*stripe_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+		*stripe_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
 	}
 	spin_unlock(&imp->imp_lock);
 }
@@ -245,7 +240,6 @@ static int lsm_unpackmd_v1(struct lov_obd *lov, struct lov_stripe_md *lsm,
 
 const struct lsm_operations lsm_v1_ops = {
 	.lsm_free	    = lsm_free_plain,
-	.lsm_destroy	 = lsm_destroy_plain,
 	.lsm_stripe_by_index    = lsm_stripe_by_index_plain,
 	.lsm_stripe_by_offset   = lsm_stripe_by_offset_plain,
 	.lsm_lmm_verify	 = lsm_lmm_verify_v1,
@@ -335,7 +329,6 @@ static int lsm_unpackmd_v3(struct lov_obd *lov, struct lov_stripe_md *lsm,
 
 const struct lsm_operations lsm_v3_ops = {
 	.lsm_free	    = lsm_free_plain,
-	.lsm_destroy	 = lsm_destroy_plain,
 	.lsm_stripe_by_index    = lsm_stripe_by_index_plain,
 	.lsm_stripe_by_offset   = lsm_stripe_by_offset_plain,
 	.lsm_lmm_verify	 = lsm_lmm_verify_v3,
diff --git a/drivers/staging/lustre/lustre/lov/lov_internal.h b/drivers/staging/lustre/lustre/lov/lov_internal.h
index 12bd511e8988..07e5ede3e952 100644
--- a/drivers/staging/lustre/lustre/lov/lov_internal.h
+++ b/drivers/staging/lustre/lustre/lov/lov_internal.h
@@ -134,8 +134,6 @@ static inline void lov_put_reqset(struct lov_request_set *set)
 /* lov_merge.c */
 void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
 		     struct lov_stripe_md *lsm, int stripeno, int *set);
-int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
-		   u64 size, int shrink);
 int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
 		      struct ost_lvb *lvb, __u64 *kms_place);
 
@@ -157,11 +155,6 @@ int lov_update_common_set(struct lov_request_set *set,
 int lov_prep_getattr_set(struct obd_export *exp, struct obd_info *oinfo,
 			 struct lov_request_set **reqset);
 int lov_fini_getattr_set(struct lov_request_set *set);
-int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
-			 struct obdo *src_oa, struct lov_stripe_md *lsm,
-			 struct obd_trans_info *oti,
-			 struct lov_request_set **reqset);
-int lov_fini_destroy_set(struct lov_request_set *set);
 int lov_prep_setattr_set(struct obd_export *exp, struct obd_info *oinfo,
 			 struct obd_trans_info *oti,
 			 struct lov_request_set **reqset);
@@ -197,8 +190,6 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmm,
 	       struct lov_stripe_md *lsm);
 int lov_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
 		 struct lov_mds_md *lmm, int lmm_bytes);
-int lov_getstripe(struct obd_export *exp,
-		  struct lov_stripe_md *lsm, struct lov_user_md __user *lump);
 int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
 		    int pattern, int magic);
 int lov_free_memmd(struct lov_stripe_md **lsmp);
diff --git a/drivers/staging/lustre/lustre/lov/lov_io.c b/drivers/staging/lustre/lustre/lov/lov_io.c
index 84032a510254..d10157985ed9 100644
--- a/drivers/staging/lustre/lustre/lov/lov_io.c
+++ b/drivers/staging/lustre/lustre/lov/lov_io.c
@@ -87,6 +87,9 @@ static void lov_io_sub_inherit(struct cl_io *io, struct lov_io *lio,
 	case CIT_SETATTR: {
 		io->u.ci_setattr.sa_attr = parent->u.ci_setattr.sa_attr;
 		io->u.ci_setattr.sa_valid = parent->u.ci_setattr.sa_valid;
+		io->u.ci_setattr.sa_stripe_index = stripe;
+		io->u.ci_setattr.sa_parent_fid =
+					parent->u.ci_setattr.sa_parent_fid;
 		if (cl_io_is_trunc(io)) {
 			loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size;
 
@@ -244,14 +247,12 @@ void lov_sub_put(struct lov_io_sub *sub)
 
 int lov_page_stripe(const struct cl_page *page)
 {
-	struct lovsub_object *subobj;
 	const struct cl_page_slice *slice;
 
-	slice = cl_page_at(page, &lovsub_device_type);
+	slice = cl_page_at(page, &lov_device_type);
 	LASSERT(slice->cpl_obj);
 
-	subobj = cl2lovsub(slice->cpl_obj);
-	return subobj->lso_index;
+	return cl2lov_page(slice)->lps_stripe;
 }
 
 struct lov_io_sub *lov_page_subio(const struct lu_env *env, struct lov_io *lio,
@@ -298,8 +299,8 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
 	return result;
 }
 
-static void lov_io_slice_init(struct lov_io *lio,
-			      struct lov_object *obj, struct cl_io *io)
+static int lov_io_slice_init(struct lov_io *lio, struct lov_object *obj,
+			     struct cl_io *io)
 {
 	io->ci_result = 0;
 	lio->lis_object = obj;
@@ -314,6 +315,15 @@ static void lov_io_slice_init(struct lov_io *lio,
 		lio->lis_io_endpos = lio->lis_endpos;
 		if (cl_io_is_append(io)) {
 			LASSERT(io->ci_type == CIT_WRITE);
+
+			/*
+			 * If there is LOV EA hole, then we may cannot locate
+			 * the current file-tail exactly.
+			 */
+			if (unlikely(obj->lo_lsm->lsm_pattern &
+				     LOV_PATTERN_F_HOLE))
+				return -EIO;
+
 			lio->lis_pos = 0;
 			lio->lis_endpos = OBD_OBJECT_EOF;
 		}
@@ -349,6 +359,7 @@ static void lov_io_slice_init(struct lov_io *lio,
 	default:
 		LBUG();
 	}
+	return 0;
 }
 
 static void lov_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
@@ -870,7 +881,7 @@ int lov_io_init_raid0(const struct lu_env *env, struct cl_object *obj,
 	struct lov_object   *lov = cl2lov(obj);
 
 	INIT_LIST_HEAD(&lio->lis_active);
-	lov_io_slice_init(lio, lov, io);
+	io->ci_result = lov_io_slice_init(lio, lov, io);
 	if (io->ci_result == 0) {
 		io->ci_result = lov_io_subio_init(env, lio, io);
 		if (io->ci_result == 0) {
diff --git a/drivers/staging/lustre/lustre/lov/lov_merge.c b/drivers/staging/lustre/lustre/lov/lov_merge.c
index b9c90865fdfc..674af106b50b 100644
--- a/drivers/staging/lustre/lustre/lov/lov_merge.c
+++ b/drivers/staging/lustre/lustre/lov/lov_merge.c
@@ -105,45 +105,6 @@ int lov_merge_lvb_kms(struct lov_stripe_md *lsm,
 	return rc;
 }
 
-/* Must be called under the lov_stripe_lock() */
-int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
-		   u64 size, int shrink)
-{
-	struct lov_oinfo *loi;
-	int stripe = 0;
-	__u64 kms;
-
-	assert_spin_locked(&lsm->lsm_lock);
-	LASSERT(lsm->lsm_lock_owner == current_pid());
-
-	if (shrink) {
-		for (; stripe < lsm->lsm_stripe_count; stripe++) {
-			struct lov_oinfo *loi = lsm->lsm_oinfo[stripe];
-
-			kms = lov_size_to_stripe(lsm, size, stripe);
-			CDEBUG(D_INODE,
-			       "stripe %d KMS %sing %llu->%llu\n",
-			       stripe, kms > loi->loi_kms ? "increase":"shrink",
-			       loi->loi_kms, kms);
-			loi->loi_lvb.lvb_size = kms;
-			loi_kms_set(loi, loi->loi_lvb.lvb_size);
-		}
-		return 0;
-	}
-
-	if (size > 0)
-		stripe = lov_stripe_number(lsm, size - 1);
-	kms = lov_size_to_stripe(lsm, size, stripe);
-	loi = lsm->lsm_oinfo[stripe];
-
-	CDEBUG(D_INODE, "stripe %d KMS %sincreasing %llu->%llu\n",
-	       stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
-	if (kms > loi->loi_kms)
-		loi_kms_set(loi, kms);
-
-	return 0;
-}
-
 void lov_merge_attrs(struct obdo *tgt, struct obdo *src, u64 valid,
 		     struct lov_stripe_md *lsm, int stripeno, int *set)
 {
diff --git a/drivers/staging/lustre/lustre/lov/lov_obd.c b/drivers/staging/lustre/lustre/lov/lov_obd.c
index 9b92d5522edb..b23016f7ec26 100644
--- a/drivers/staging/lustre/lustre/lov/lov_obd.c
+++ b/drivers/staging/lustre/lustre/lov/lov_obd.c
@@ -41,6 +41,7 @@
 #include "../../include/linux/libcfs/libcfs.h"
 
 #include "../include/obd_support.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_lib.h"
 #include "../include/lustre_net.h"
 #include "../include/lustre/lustre_idl.h"
@@ -940,7 +941,7 @@ int lov_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg,
 	}
 	case LCFG_PARAM: {
 		struct lprocfs_static_vars lvars = { NULL };
-		struct lov_desc *desc = &(obd->u.lov.desc);
+		struct lov_desc *desc = &obd->u.lov.desc;
 
 		if (!desc) {
 			rc = -EINVAL;
@@ -971,92 +972,6 @@ out:
 	return rc;
 }
 
-static int lov_recreate(struct obd_export *exp, struct obdo *src_oa,
-			struct lov_stripe_md **ea, struct obd_trans_info *oti)
-{
-	struct lov_stripe_md *obj_mdp, *lsm;
-	struct lov_obd *lov = &exp->exp_obd->u.lov;
-	unsigned ost_idx;
-	int rc, i;
-
-	LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS &&
-		src_oa->o_flags & OBD_FL_RECREATE_OBJS);
-
-	obj_mdp = kzalloc(sizeof(*obj_mdp), GFP_NOFS);
-	if (!obj_mdp)
-		return -ENOMEM;
-
-	ost_idx = src_oa->o_nlink;
-	lsm = *ea;
-	if (!lsm) {
-		rc = -EINVAL;
-		goto out;
-	}
-	if (ost_idx >= lov->desc.ld_tgt_count ||
-	    !lov->lov_tgts[ost_idx]) {
-		rc = -EINVAL;
-		goto out;
-	}
-
-	for (i = 0; i < lsm->lsm_stripe_count; i++) {
-		struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
-		if (lov_oinfo_is_dummy(loi))
-			continue;
-
-		if (loi->loi_ost_idx == ost_idx) {
-			if (ostid_id(&loi->loi_oi) != ostid_id(&src_oa->o_oi)) {
-				rc = -EINVAL;
-				goto out;
-			}
-			break;
-		}
-	}
-	if (i == lsm->lsm_stripe_count) {
-		rc = -EINVAL;
-		goto out;
-	}
-
-	rc = obd_create(NULL, lov->lov_tgts[ost_idx]->ltd_exp,
-			src_oa, &obj_mdp, oti);
-out:
-	kfree(obj_mdp);
-	return rc;
-}
-
-/* the LOV expects oa->o_id to be set to the LOV object id */
-static int lov_create(const struct lu_env *env, struct obd_export *exp,
-		      struct obdo *src_oa, struct lov_stripe_md **ea,
-		      struct obd_trans_info *oti)
-{
-	struct lov_obd *lov;
-	int rc = 0;
-
-	LASSERT(ea);
-	if (!exp)
-		return -EINVAL;
-
-	if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
-	    src_oa->o_flags == OBD_FL_DELORPHAN) {
-		/* should be used with LOV anymore */
-		LBUG();
-	}
-
-	lov = &exp->exp_obd->u.lov;
-	if (!lov->desc.ld_active_tgt_count)
-		return -EIO;
-
-	obd_getref(exp->exp_obd);
-	/* Recreate a specific object id at the given OST index */
-	if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
-	    (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) {
-		rc = lov_recreate(exp, src_oa, ea, oti);
-	}
-
-	obd_putref(exp->exp_obd);
-	return rc;
-}
-
 #define ASSERT_LSM_MAGIC(lsmp)						  \
 do {									    \
 	LASSERT((lsmp));						\
@@ -1065,59 +980,6 @@ do {									    \
 		 "%p->lsm_magic=%x\n", (lsmp), (lsmp)->lsm_magic);	      \
 } while (0)
 
-static int lov_destroy(const struct lu_env *env, struct obd_export *exp,
-		       struct obdo *oa, struct lov_stripe_md *lsm,
-		       struct obd_trans_info *oti, struct obd_export *md_exp)
-{
-	struct lov_request_set *set;
-	struct obd_info oinfo;
-	struct lov_request *req;
-	struct lov_obd *lov;
-	int rc = 0, err = 0;
-
-	ASSERT_LSM_MAGIC(lsm);
-
-	if (!exp || !exp->exp_obd)
-		return -ENODEV;
-
-	if (oa->o_valid & OBD_MD_FLCOOKIE) {
-		LASSERT(oti);
-		LASSERT(oti->oti_logcookies);
-	}
-
-	lov = &exp->exp_obd->u.lov;
-	obd_getref(exp->exp_obd);
-	rc = lov_prep_destroy_set(exp, &oinfo, oa, lsm, oti, &set);
-	if (rc)
-		goto out;
-
-	list_for_each_entry(req, &set->set_list, rq_link) {
-		if (oa->o_valid & OBD_MD_FLCOOKIE)
-			oti->oti_logcookies = set->set_cookies + req->rq_stripe;
-
-		err = obd_destroy(env, lov->lov_tgts[req->rq_idx]->ltd_exp,
-				  req->rq_oi.oi_oa, NULL, oti, NULL);
-		err = lov_update_common_set(set, req, err);
-		if (err) {
-			CERROR("%s: destroying objid "DOSTID" subobj "
-			       DOSTID" on OST idx %d: rc = %d\n",
-			       exp->exp_obd->obd_name, POSTID(&oa->o_oi),
-			       POSTID(&req->rq_oi.oi_oa->o_oi),
-			       req->rq_idx, err);
-			if (!rc)
-				rc = err;
-		}
-	}
-
-	if (rc == 0)
-		rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp);
-
-	err = lov_fini_destroy_set(set);
-out:
-	obd_putref(exp->exp_obd);
-	return rc ? rc : err;
-}
-
 static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
 				 void *data, int rc)
 {
@@ -1267,46 +1129,6 @@ static int lov_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
 	return 0;
 }
 
-/* find any ldlm lock of the inode in lov
- * return 0    not find
- *	1    find one
- *      < 0    error
- */
-static int lov_find_cbdata(struct obd_export *exp,
-			   struct lov_stripe_md *lsm, ldlm_iterator_t it,
-			   void *data)
-{
-	struct lov_obd *lov;
-	int rc = 0, i;
-
-	ASSERT_LSM_MAGIC(lsm);
-
-	if (!exp || !exp->exp_obd)
-		return -ENODEV;
-
-	lov = &exp->exp_obd->u.lov;
-	for (i = 0; i < lsm->lsm_stripe_count; i++) {
-		struct lov_stripe_md submd;
-		struct lov_oinfo *loi = lsm->lsm_oinfo[i];
-
-		if (lov_oinfo_is_dummy(loi))
-			continue;
-
-		if (!lov->lov_tgts[loi->loi_ost_idx]) {
-			CDEBUG(D_HA, "lov idx %d NULL\n", loi->loi_ost_idx);
-			continue;
-		}
-
-		submd.lsm_oi = loi->loi_oi;
-		submd.lsm_stripe_count = 0;
-		rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
-				     &submd, it, data);
-		if (rc != 0)
-			return rc;
-	}
-	return rc;
-}
-
 int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc)
 {
 	struct lov_request_set *lovset = (struct lov_request_set *)data;
@@ -1460,7 +1282,7 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 		}
 
 		desc = (struct lov_desc *)data->ioc_inlbuf1;
-		memcpy(desc, &(lov->desc), sizeof(*desc));
+		memcpy(desc, &lov->desc, sizeof(*desc));
 
 		uuidp = (struct obd_uuid *)data->ioc_inlbuf2;
 		genp = (__u32 *)data->ioc_inlbuf3;
@@ -1477,9 +1299,6 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 		obd_ioctl_freedata(buf, len);
 		break;
 	}
-	case LL_IOC_LOV_GETSTRIPE:
-		rc = lov_getstripe(exp, karg, uarg);
-		break;
 	case OBD_IOC_QUOTACTL: {
 		struct if_quotactl *qctl = karg;
 		struct lov_tgt_desc *tgt = NULL;
@@ -1726,6 +1545,8 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
 	u64 fm_start, fm_end, fm_length, fm_end_offset;
 	u64 curr_loc;
 	int current_extent = 0, rc = 0, i;
+	/* Whether have we collected enough extents */
+	bool enough = false;
 	int ost_eof = 0; /* EOF for object */
 	int ost_done = 0; /* done with required mapping for this OST? */
 	int last_stripe;
@@ -1860,7 +1681,7 @@ static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
 			lun_start += len_mapped_single_call;
 			fm_local->fm_length = req_fm_len - len_mapped_single_call;
 			req_fm_len = fm_local->fm_length;
-			fm_local->fm_extent_count = count_local;
+			fm_local->fm_extent_count = enough ? 1 : count_local;
 			fm_local->fm_mapped_extents = 0;
 			fm_local->fm_flags = fiemap->fm_flags;
 
@@ -1908,6 +1729,12 @@ inactive_tgt:
 					goto finish;
 				}
 				break;
+			} else if (enough) {
+				/*
+				 * We've collected enough extents and there are
+				 * more extents after it.
+				 */
+				goto finish;
 			}
 
 			/* If we just need num of extents then go to next device */
@@ -1916,8 +1743,9 @@ inactive_tgt:
 				break;
 			}
 
-			len_mapped_single_call = lcl_fm_ext[ext_count-1].fe_logical -
-				  lun_start + lcl_fm_ext[ext_count - 1].fe_length;
+			len_mapped_single_call =
+				lcl_fm_ext[ext_count - 1].fe_logical -
+				lun_start + lcl_fm_ext[ext_count - 1].fe_length;
 
 			/* Have we finished mapping on this device? */
 			if (req_fm_len <= len_mapped_single_call)
@@ -1926,14 +1754,15 @@ inactive_tgt:
 			/* Clear the EXTENT_LAST flag which can be present on
 			 * last extent
 			 */
-			if (lcl_fm_ext[ext_count-1].fe_flags & FIEMAP_EXTENT_LAST)
+			if (lcl_fm_ext[ext_count - 1].fe_flags &
+			    FIEMAP_EXTENT_LAST)
 				lcl_fm_ext[ext_count - 1].fe_flags &=
 							    ~FIEMAP_EXTENT_LAST;
 
 			curr_loc = lov_stripe_size(lsm,
-					   lcl_fm_ext[ext_count - 1].fe_logical+
-					   lcl_fm_ext[ext_count - 1].fe_length,
-					   cur_stripe);
+					lcl_fm_ext[ext_count - 1].fe_logical +
+					lcl_fm_ext[ext_count - 1].fe_length,
+					cur_stripe);
 			if (curr_loc >= fm_key->oa.o_size)
 				ost_eof = 1;
 
@@ -1945,7 +1774,7 @@ inactive_tgt:
 
 			/* Ran out of available extents? */
 			if (current_extent >= fiemap->fm_extent_count)
-				goto finish;
+				enough = true;
 		} while (ost_done == 0 && ost_eof == 0);
 
 		if (cur_stripe_wrap == last_stripe)
@@ -1985,73 +1814,14 @@ static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
 {
 	struct obd_device *obddev = class_exp2obd(exp);
 	struct lov_obd *lov = &obddev->u.lov;
-	int i, rc;
+	int rc;
 
 	if (!vallen || !val)
 		return -EFAULT;
 
 	obd_getref(obddev);
 
-	if (KEY_IS(KEY_LOCK_TO_STRIPE)) {
-		struct {
-			char name[16];
-			struct ldlm_lock *lock;
-		} *data = key;
-		struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name;
-		struct lov_oinfo *loi;
-		__u32 *stripe = val;
-
-		if (*vallen < sizeof(*stripe)) {
-			rc = -EFAULT;
-			goto out;
-		}
-		*vallen = sizeof(*stripe);
-
-		/* XXX This is another one of those bits that will need to
-		 * change if we ever actually support nested LOVs.  It uses
-		 * the lock's export to find out which stripe it is.
-		 */
-		/* XXX - it's assumed all the locks for deleted OSTs have
-		 * been cancelled. Also, the export for deleted OSTs will
-		 * be NULL and won't match the lock's export.
-		 */
-		for (i = 0; i < lsm->lsm_stripe_count; i++) {
-			loi = lsm->lsm_oinfo[i];
-			if (lov_oinfo_is_dummy(loi))
-				continue;
-
-			if (!lov->lov_tgts[loi->loi_ost_idx])
-				continue;
-			if (lov->lov_tgts[loi->loi_ost_idx]->ltd_exp ==
-			    data->lock->l_conn_export &&
-			    ostid_res_name_eq(&loi->loi_oi, res_id)) {
-				*stripe = i;
-				rc = 0;
-				goto out;
-			}
-		}
-		LDLM_ERROR(data->lock, "lock on inode without such object");
-		dump_lsm(D_ERROR, lsm);
-		rc = -ENXIO;
-		goto out;
-	} else if (KEY_IS(KEY_LAST_ID)) {
-		struct obd_id_info *info = val;
-		__u32 size = sizeof(u64);
-		struct lov_tgt_desc *tgt;
-
-		LASSERT(*vallen == sizeof(struct obd_id_info));
-		tgt = lov->lov_tgts[info->idx];
-
-		if (!tgt || !tgt->ltd_active) {
-			rc = -ESRCH;
-			goto out;
-		}
-
-		rc = obd_get_info(env, tgt->ltd_exp, keylen, key,
-				  &size, info->data, NULL);
-		rc = 0;
-		goto out;
-	} else if (KEY_IS(KEY_LOVDESC)) {
+	if (KEY_IS(KEY_LOVDESC)) {
 		struct lov_desc *desc_ret = val;
 		*desc_ret = lov->desc;
 
@@ -2060,22 +1830,6 @@ static int lov_get_info(const struct lu_env *env, struct obd_export *exp,
 	} else if (KEY_IS(KEY_FIEMAP)) {
 		rc = lov_fiemap(lov, keylen, key, vallen, val, lsm);
 		goto out;
-	} else if (KEY_IS(KEY_CONNECT_FLAG)) {
-		struct lov_tgt_desc *tgt;
-		__u64 ost_idx = *((__u64 *)val);
-
-		LASSERT(*vallen == sizeof(__u64));
-		LASSERT(ost_idx < lov->desc.ld_tgt_count);
-		tgt = lov->lov_tgts[ost_idx];
-
-		if (!tgt || !tgt->ltd_exp) {
-			rc = -ESRCH;
-			goto out;
-		}
-
-		*((__u64 *)val) = exp_connect_flags(tgt->ltd_exp);
-		rc = 0;
-		goto out;
 	} else if (KEY_IS(KEY_TGT_COUNT)) {
 		*((int *)val) = lov->desc.ld_tgt_count;
 		rc = 0;
@@ -2098,8 +1852,7 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 	u32 count;
 	int i, rc = 0, err;
 	struct lov_tgt_desc *tgt;
-	unsigned int incr = 0, check_uuid = 0, do_inactive = 0, no_set = 0;
-	unsigned int next_id = 0, mds_con = 0;
+	int do_inactive = 0, no_set = 0;
 
 	if (!set) {
 		no_set = 1;
@@ -2111,18 +1864,8 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 	obd_getref(obddev);
 	count = lov->desc.ld_tgt_count;
 
-	if (KEY_IS(KEY_NEXT_ID)) {
-		count = vallen / sizeof(struct obd_id_info);
-		vallen = sizeof(u64);
-		incr = sizeof(struct obd_id_info);
-		do_inactive = 1;
-		next_id = 1;
-	} else if (KEY_IS(KEY_CHECKSUM)) {
+	if (KEY_IS(KEY_CHECKSUM)) {
 		do_inactive = 1;
-	} else if (KEY_IS(KEY_EVICT_BY_NID)) {
-		/* use defaults:  do_inactive = incr = 0; */
-	} else if (KEY_IS(KEY_MDS_CONN)) {
-		mds_con = 1;
 	} else if (KEY_IS(KEY_CACHE_SET)) {
 		LASSERT(!lov->lov_cache);
 		lov->lov_cache = val;
@@ -2130,11 +1873,9 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		cl_cache_incref(lov->lov_cache);
 	}
 
-	for (i = 0; i < count; i++, val = (char *)val + incr) {
-		if (next_id)
-			tgt = lov->lov_tgts[((struct obd_id_info *)val)->idx];
-		else
-			tgt = lov->lov_tgts[i];
+	for (i = 0; i < count; i++) {
+		tgt = lov->lov_tgts[i];
+
 		/* OST was disconnected */
 		if (!tgt || !tgt->ltd_exp)
 			continue;
@@ -2143,34 +1884,8 @@ static int lov_set_info_async(const struct lu_env *env, struct obd_export *exp,
 		if (!tgt->ltd_active && !do_inactive)
 			continue;
 
-		if (mds_con) {
-			struct mds_group_info *mgi;
-
-			LASSERT(vallen == sizeof(*mgi));
-			mgi = (struct mds_group_info *)val;
-
-			/* Only want a specific OSC */
-			if (mgi->uuid && !obd_uuid_equals(mgi->uuid,
-							  &tgt->ltd_uuid))
-				continue;
-
-			err = obd_set_info_async(env, tgt->ltd_exp,
-						 keylen, key, sizeof(int),
-						 &mgi->group, set);
-		} else if (next_id) {
-			err = obd_set_info_async(env, tgt->ltd_exp,
-					 keylen, key, vallen,
-					 ((struct obd_id_info *)val)->data, set);
-		} else {
-			/* Only want a specific OSC */
-			if (check_uuid &&
-			    !obd_uuid_equals(val, &tgt->ltd_uuid))
-				continue;
-
-			err = obd_set_info_async(env, tgt->ltd_exp,
-						 keylen, key, vallen, val, set);
-		}
-
+		err = obd_set_info_async(env, tgt->ltd_exp, keylen, key,
+					 vallen, val, set);
 		if (!rc)
 			rc = err;
 	}
@@ -2318,12 +2033,8 @@ static struct obd_ops lov_obd_ops = {
 	.statfs_async   = lov_statfs_async,
 	.packmd         = lov_packmd,
 	.unpackmd       = lov_unpackmd,
-	.create         = lov_create,
-	.destroy        = lov_destroy,
 	.getattr_async  = lov_getattr_async,
 	.setattr_async  = lov_setattr_async,
-	.adjust_kms     = lov_adjust_kms,
-	.find_cbdata    = lov_find_cbdata,
 	.iocontrol      = lov_iocontrol,
 	.get_info       = lov_get_info,
 	.set_info_async = lov_set_info_async,
diff --git a/drivers/staging/lustre/lustre/lov/lov_object.c b/drivers/staging/lustre/lustre/lov/lov_object.c
index f9621b0fd469..52f736338887 100644
--- a/drivers/staging/lustre/lustre/lov/lov_object.c
+++ b/drivers/staging/lustre/lustre/lov/lov_object.c
@@ -75,6 +75,13 @@ struct lov_layout_operations {
 
 static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov);
 
+void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm)
+{
+	if (lsm)
+		lov_free_memmd(&lsm);
+}
+EXPORT_SYMBOL(lov_lsm_put);
+
 /*****************************************************************************
  *
  * Lov object layout operations.
@@ -195,6 +202,10 @@ static int lov_page_slice_fixup(struct lov_object *lov,
 	struct cl_object_header *hdr = cl_object_header(&lov->lo_cl);
 	struct cl_object *o;
 
+	if (!stripe)
+		return hdr->coh_page_bufsize - lov->lo_cl.co_slice_off -
+		       cfs_size_round(sizeof(struct lov_page));
+
 	cl_object_for_each(o, stripe)
 		o->co_slice_off += hdr->coh_page_bufsize;
 
@@ -224,6 +235,7 @@ static int lov_init_raid0(const struct lu_env *env,
 
 	LASSERT(!lov->lo_lsm);
 	lov->lo_lsm = lsm_addref(lsm);
+	lov->lo_layout_invalid = true;
 	r0->lo_nr  = lsm->lsm_stripe_count;
 	LASSERT(r0->lo_nr <= lov_targets_nr(dev));
 
@@ -719,6 +731,10 @@ static int lov_layout_change(const struct lu_env *unused,
 		LASSERT(atomic_read(&lov->lo_active_ios) == 0);
 
 		lov->lo_type = LLT_EMPTY;
+		/* page bufsize fixup */
+		cl_object_header(&lov->lo_cl)->coh_page_bufsize -=
+			lov_page_slice_fixup(lov, NULL);
+
 		result = new_ops->llo_init(env,
 					lu2lov_dev(lov->lo_cl.co_lu.lo_dev),
 					lov, conf, state);
@@ -878,8 +894,8 @@ static int lov_attr_get(const struct lu_env *env, struct cl_object *obj,
 	return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_getattr, env, obj, attr);
 }
 
-static int lov_attr_set(const struct lu_env *env, struct cl_object *obj,
-			const struct cl_attr *attr, unsigned valid)
+static int lov_attr_update(const struct lu_env *env, struct cl_object *obj,
+			   const struct cl_attr *attr, unsigned int valid)
 {
 	/*
 	 * No dispatch is required here, as no layout implements this.
@@ -895,13 +911,30 @@ int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
 				    io);
 }
 
+static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+				struct lov_user_md __user *lum)
+{
+	struct lov_object *lov = cl2lov(obj);
+	struct lov_stripe_md *lsm;
+	int rc = 0;
+
+	lsm = lov_lsm_addref(lov);
+	if (!lsm)
+		return -ENODATA;
+
+	rc = lov_getstripe(cl2lov(obj), lsm, lum);
+	lov_lsm_put(obj, lsm);
+	return rc;
+}
+
 static const struct cl_object_operations lov_ops = {
 	.coo_page_init = lov_page_init,
 	.coo_lock_init = lov_lock_init,
 	.coo_io_init   = lov_io_init,
 	.coo_attr_get  = lov_attr_get,
-	.coo_attr_set  = lov_attr_set,
-	.coo_conf_set  = lov_conf_set
+	.coo_attr_update = lov_attr_update,
+	.coo_conf_set  = lov_conf_set,
+	.coo_getstripe = lov_object_getstripe
 };
 
 static const struct lu_object_operations lov_lu_obj_ops = {
@@ -938,7 +971,7 @@ struct lu_object *lov_object_alloc(const struct lu_env *env,
 	return obj;
 }
 
-static struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
+struct lov_stripe_md *lov_lsm_addref(struct lov_object *lov)
 {
 	struct lov_stripe_md *lsm = NULL;
 
@@ -969,13 +1002,6 @@ struct lov_stripe_md *lov_lsm_get(struct cl_object *clobj)
 }
 EXPORT_SYMBOL(lov_lsm_get);
 
-void lov_lsm_put(struct cl_object *unused, struct lov_stripe_md *lsm)
-{
-	if (lsm)
-		lov_free_memmd(&lsm);
-}
-EXPORT_SYMBOL(lov_lsm_put);
-
 int lov_read_and_clear_async_rc(struct cl_object *clob)
 {
 	struct lu_object *luobj;
diff --git a/drivers/staging/lustre/lustre/lov/lov_pack.c b/drivers/staging/lustre/lustre/lov/lov_pack.c
index 869ef41b13ca..be6e9857ce2a 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pack.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pack.c
@@ -45,6 +45,7 @@
 #include "../include/lustre/lustre_user.h"
 
 #include "lov_internal.h"
+#include "lov_cl_internal.h"
 
 void lov_dump_lmm_common(int level, void *lmmp)
 {
@@ -104,11 +105,9 @@ void lov_dump_lmm_v3(int level, struct lov_mds_md_v3 *lmm)
  *     LOVs properly.  For now lov_mds_md_size() just assumes one u64
  *     per stripe.
  */
-int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
-	       struct lov_stripe_md *lsm)
+int lov_obd_packmd(struct lov_obd *lov, struct lov_mds_md **lmmp,
+		   struct lov_stripe_md *lsm)
 {
-	struct obd_device *obd = class_exp2obd(exp);
-	struct lov_obd *lov = &obd->u.lov;
 	struct lov_mds_md_v1 *lmmv1;
 	struct lov_mds_md_v3 *lmmv3;
 	__u16 stripe_count;
@@ -148,16 +147,11 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 			stripe_count = 0;
 		}
 	} else {
-		/* No need to allocate more than maximum supported stripes.
-		 * Anyway, this is pretty inaccurate since ld_tgt_count now
-		 * represents max index and we should rely on the actual number
-		 * of OSTs instead
+		/*
+		 * To calculate maximum easize by active targets at present,
+		 * which is exactly the maximum easize to be seen by LOV
 		 */
-		stripe_count = lov_mds_md_max_stripe_count(
-			lov->lov_ocd.ocd_max_easize, lmm_magic);
-
-		if (stripe_count > lov->desc.ld_tgt_count)
-			stripe_count = lov->desc.ld_tgt_count;
+		stripe_count = lov->desc.ld_active_tgt_count;
 	}
 
 	/* XXX LOV STACKING call into osc for sizes */
@@ -225,6 +219,15 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
 	return lmm_size;
 }
 
+int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+	       struct lov_stripe_md *lsm)
+{
+	struct obd_device *obd = class_exp2obd(exp);
+	struct lov_obd *lov = &obd->u.lov;
+
+	return lov_obd_packmd(lov, lmmp, lsm);
+}
+
 /* Find the max stripecount we should use */
 __u16 lov_get_stripecnt(struct lov_obd *lov, __u32 magic, __u16 stripe_count)
 {
@@ -284,7 +287,7 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, __u16 stripe_count,
 	spin_lock_init(&(*lsmp)->lsm_lock);
 	(*lsmp)->lsm_magic = magic;
 	(*lsmp)->lsm_stripe_count = stripe_count;
-	(*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+	(*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES * stripe_count;
 	(*lsmp)->lsm_pattern = pattern;
 	(*lsmp)->lsm_pool_name[0] = '\0';
 	(*lsmp)->lsm_layout_gen = 0;
@@ -372,16 +375,17 @@ int lov_unpackmd(struct obd_export *exp,  struct lov_stripe_md **lsmp,
  * the maximum number of OST indices which will fit in the user buffer.
  * lmm_magic must be LOV_USER_MAGIC.
  */
-int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
+int lov_getstripe(struct lov_object *obj, struct lov_stripe_md *lsm,
 		  struct lov_user_md __user *lump)
 {
 	/*
 	 * XXX huge struct allocated on stack.
 	 */
 	/* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
+	struct lov_obd *lov;
 	struct lov_user_md_v3 lum;
 	struct lov_mds_md *lmmk = NULL;
-	int rc, lmm_size;
+	int rc, lmmk_size, lmm_size;
 	int lum_size;
 	mm_segment_t seg;
 
@@ -401,12 +405,13 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 	lum_size = sizeof(struct lov_user_md_v1);
 	if (copy_from_user(&lum, lump, lum_size)) {
 		rc = -EFAULT;
-		goto out_set;
+		goto out;
 	}
-	if ((lum.lmm_magic != LOV_USER_MAGIC) &&
-	    (lum.lmm_magic != LOV_USER_MAGIC_V3)) {
+	if (lum.lmm_magic != LOV_USER_MAGIC_V1 &&
+	    lum.lmm_magic != LOV_USER_MAGIC_V3 &&
+	    lum.lmm_magic != LOV_USER_MAGIC_SPECIFIC) {
 		rc = -EINVAL;
-		goto out_set;
+		goto out;
 	}
 
 	if (lum.lmm_stripe_count &&
@@ -415,11 +420,13 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 		lum.lmm_stripe_count = lsm->lsm_stripe_count;
 		rc = copy_to_user(lump, &lum, lum_size);
 		rc = -EOVERFLOW;
-		goto out_set;
+		goto out;
 	}
-	rc = lov_packmd(exp, &lmmk, lsm);
+	lov = lu2lov_dev(obj->lo_cl.co_lu.lo_dev)->ld_lov;
+	rc = lov_obd_packmd(lov, &lmmk, lsm);
 	if (rc < 0)
-		goto out_set;
+		goto out;
+	lmmk_size = rc;
 	lmm_size = rc;
 	rc = 0;
 
@@ -455,7 +462,7 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 		lmm_size = lum_size;
 	} else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) {
 		rc = -EOVERFLOW;
-		goto out_set;
+		goto out_free;
 	}
 	/*
 	 * Have a difference between lov_mds_md & lov_user_md.
@@ -468,8 +475,9 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
 	if (copy_to_user(lump, lmmk, lmm_size))
 		rc = -EFAULT;
 
-	obd_free_diskmd(exp, &lmmk);
-out_set:
+out_free:
+	kfree(lmmk);
+out:
 	set_fs(seg);
 	return rc;
 }
diff --git a/drivers/staging/lustre/lustre/lov/lov_page.c b/drivers/staging/lustre/lustre/lov/lov_page.c
index c17026f14896..00bfabad78eb 100644
--- a/drivers/staging/lustre/lustre/lov/lov_page.c
+++ b/drivers/staging/lustre/lustre/lov/lov_page.c
@@ -65,7 +65,9 @@ static int lov_raid0_page_is_under_lock(const struct lu_env *env,
 	pgoff_t index = *max_index;
 	unsigned int pps; /* pages per stripe */
 
-	CDEBUG(D_READA, "*max_index = %lu, nr = %d\n", index, r0->lo_nr);
+	CDEBUG(D_READA, DFID "*max_index = %lu, nr = %d\n",
+	       PFID(lu_object_fid(lov2lu(loo))), index, r0->lo_nr);
+
 	if (index == 0) /* the page is not covered by any lock */
 		return 0;
 
@@ -80,7 +82,12 @@ static int lov_raid0_page_is_under_lock(const struct lu_env *env,
 
 	/* calculate the end of current stripe */
 	pps = loo->lo_lsm->lsm_stripe_size >> PAGE_SHIFT;
-	index = ((slice->cpl_index + pps) & ~(pps - 1)) - 1;
+	index = slice->cpl_index + pps - slice->cpl_index % pps - 1;
+
+	CDEBUG(D_READA, DFID "*max_index = %lu, index = %lu, pps = %u, stripe_size = %u, stripe no = %u, page index = %lu\n",
+	       PFID(lu_object_fid(lov2lu(loo))), *max_index, index, pps,
+	       loo->lo_lsm->lsm_stripe_size, lov_page_stripe(slice->cpl_page),
+	       slice->cpl_index);
 
 	/* never exceed the end of the stripe */
 	*max_index = min_t(pgoff_t, *max_index, index);
@@ -122,6 +129,7 @@ int lov_page_init_raid0(const struct lu_env *env, struct cl_object *obj,
 	rc = lov_stripe_offset(loo->lo_lsm, offset, stripe, &suboff);
 	LASSERT(rc == 0);
 
+	lpg->lps_stripe = stripe;
 	cl_page_slice_add(page, &lpg->lps_cl, obj, index, &lov_raid0_page_ops);
 
 	sub = lov_sub_get(env, lio, stripe);
diff --git a/drivers/staging/lustre/lustre/lov/lov_pool.c b/drivers/staging/lustre/lustre/lov/lov_pool.c
index 4c2d21729589..f8c8a361ef79 100644
--- a/drivers/staging/lustre/lustre/lov/lov_pool.c
+++ b/drivers/staging/lustre/lustre/lov/lov_pool.c
@@ -61,7 +61,7 @@ void lov_pool_putref(struct pool_desc *pool)
 		LASSERT(hlist_unhashed(&pool->pool_hash));
 		LASSERT(list_empty(&pool->pool_list));
 		LASSERT(!pool->pool_debugfs_entry);
-		lov_ost_pool_free(&(pool->pool_obds));
+		lov_ost_pool_free(&pool->pool_obds);
 		kfree(pool);
 	}
 }
@@ -92,7 +92,7 @@ static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key, unsigned m
 	for (i = 0; i < LOV_MAXPOOLNAME; i++) {
 		if (poolname[i] == '\0')
 			break;
-		result = (result << 4)^(result >> 28) ^  poolname[i];
+		result = (result << 4) ^ (result >> 28) ^  poolname[i];
 	}
 	return (result % mask);
 }
@@ -260,7 +260,7 @@ static int pool_proc_show(struct seq_file *s, void *v)
 	tgt = pool_tgt(iter->pool, iter->idx);
 	up_read(&pool_tgt_rw_sem(iter->pool));
 	if (tgt)
-		seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
+		seq_printf(s, "%s\n", obd_uuid2str(&tgt->ltd_uuid));
 
 	return 0;
 }
@@ -400,7 +400,7 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
 	struct pool_desc *new_pool;
 	int rc;
 
-	lov = &(obd->u.lov);
+	lov = &obd->u.lov;
 
 	if (strlen(poolname) > LOV_MAXPOOLNAME)
 		return -ENAMETOOLONG;
@@ -471,7 +471,7 @@ int lov_pool_del(struct obd_device *obd, char *poolname)
 	struct lov_obd *lov;
 	struct pool_desc *pool;
 
-	lov = &(obd->u.lov);
+	lov = &obd->u.lov;
 
 	/* lookup and kill hash reference */
 	pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
@@ -503,7 +503,7 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 	unsigned int lov_idx;
 	int rc;
 
-	lov = &(obd->u.lov);
+	lov = &obd->u.lov;
 
 	pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
 	if (!pool)
@@ -517,7 +517,7 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 		if (!lov->lov_tgts[lov_idx])
 			continue;
 		if (obd_uuid_equals(&ost_uuid,
-				    &(lov->lov_tgts[lov_idx]->ltd_uuid)))
+				    &lov->lov_tgts[lov_idx]->ltd_uuid))
 			break;
 	}
 	/* test if ost found in lov */
@@ -547,7 +547,7 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 	unsigned int lov_idx;
 	int rc = 0;
 
-	lov = &(obd->u.lov);
+	lov = &obd->u.lov;
 
 	pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
 	if (!pool)
@@ -562,7 +562,7 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 			continue;
 
 		if (obd_uuid_equals(&ost_uuid,
-				    &(lov->lov_tgts[lov_idx]->ltd_uuid)))
+				    &lov->lov_tgts[lov_idx]->ltd_uuid))
 			break;
 	}
 
diff --git a/drivers/staging/lustre/lustre/lov/lov_request.c b/drivers/staging/lustre/lustre/lov/lov_request.c
index 4099b51f826e..09dcaf484c89 100644
--- a/drivers/staging/lustre/lustre/lov/lov_request.c
+++ b/drivers/staging/lustre/lustre/lov/lov_request.c
@@ -325,84 +325,6 @@ out_set:
 	return rc;
 }
 
-int lov_fini_destroy_set(struct lov_request_set *set)
-{
-	if (!set)
-		return 0;
-	LASSERT(set->set_exp);
-	if (atomic_read(&set->set_completes)) {
-		/* FIXME update qos data here */
-	}
-
-	lov_put_reqset(set);
-
-	return 0;
-}
-
-int lov_prep_destroy_set(struct obd_export *exp, struct obd_info *oinfo,
-			 struct obdo *src_oa, struct lov_stripe_md *lsm,
-			 struct obd_trans_info *oti,
-			 struct lov_request_set **reqset)
-{
-	struct lov_request_set *set;
-	struct lov_obd *lov = &exp->exp_obd->u.lov;
-	int rc = 0, i;
-
-	set = kzalloc(sizeof(*set), GFP_NOFS);
-	if (!set)
-		return -ENOMEM;
-	lov_init_set(set);
-
-	set->set_exp = exp;
-	set->set_oi = oinfo;
-	set->set_oi->oi_md = lsm;
-	set->set_oi->oi_oa = src_oa;
-	if (oti && src_oa->o_valid & OBD_MD_FLCOOKIE)
-		set->set_cookies = oti->oti_logcookies;
-
-	for (i = 0; i < lsm->lsm_stripe_count; i++) {
-		struct lov_oinfo *loi;
-		struct lov_request *req;
-
-		loi = lsm->lsm_oinfo[i];
-		if (lov_oinfo_is_dummy(loi))
-			continue;
-
-		if (!lov_check_and_wait_active(lov, loi->loi_ost_idx)) {
-			CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
-			continue;
-		}
-
-		req = kzalloc(sizeof(*req), GFP_NOFS);
-		if (!req) {
-			rc = -ENOMEM;
-			goto out_set;
-		}
-
-		req->rq_stripe = i;
-		req->rq_idx = loi->loi_ost_idx;
-
-		req->rq_oi.oi_oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
-		if (!req->rq_oi.oi_oa) {
-			kfree(req);
-			rc = -ENOMEM;
-			goto out_set;
-		}
-		memcpy(req->rq_oi.oi_oa, src_oa, sizeof(*req->rq_oi.oi_oa));
-		req->rq_oi.oi_oa->o_oi = loi->loi_oi;
-		lov_set_add_req(req, set);
-	}
-	if (!set->set_count) {
-		rc = -EIO;
-		goto out_set;
-	}
-	*reqset = set;
-	return rc;
-out_set:
-	lov_fini_destroy_set(set);
-	return rc;
-}
-
 int lov_fini_setattr_set(struct lov_request_set *set)
 {
 	int rc = 0;
diff --git a/drivers/staging/lustre/lustre/lov/lovsub_object.c b/drivers/staging/lustre/lustre/lov/lovsub_object.c
index fb2f2660b3e9..a2bac7a3b71b 100644
--- a/drivers/staging/lustre/lustre/lov/lovsub_object.c
+++ b/drivers/staging/lustre/lustre/lov/lovsub_object.c
@@ -98,8 +98,8 @@ static int lovsub_object_print(const struct lu_env *env, void *cookie,
 	return (*p)(env, cookie, "[%d]", los->lso_index);
 }
 
-static int lovsub_attr_set(const struct lu_env *env, struct cl_object *obj,
-			   const struct cl_attr *attr, unsigned valid)
+static int lovsub_attr_update(const struct lu_env *env, struct cl_object *obj,
+			      const struct cl_attr *attr, unsigned int valid)
 {
 	struct lov_object *lov = cl2lovsub(obj)->lso_super;
 
@@ -119,7 +119,7 @@ static int lovsub_object_glimpse(const struct lu_env *env,
 static const struct cl_object_operations lovsub_ops = {
 	.coo_page_init = lovsub_page_init,
 	.coo_lock_init = lovsub_lock_init,
-	.coo_attr_set  = lovsub_attr_set,
+	.coo_attr_update = lovsub_attr_update,
 	.coo_glimpse   = lovsub_object_glimpse
 };
 
diff --git a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
index 98d15fb247bc..fca9450de57c 100644
--- a/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
+++ b/drivers/staging/lustre/lustre/mdc/lproc_mdc.c
@@ -43,11 +43,10 @@ static ssize_t max_rpcs_in_flight_show(struct kobject *kobj,
 	int len;
 	struct obd_device *dev = container_of(kobj, struct obd_device,
 					      obd_kobj);
-	struct client_obd *cli = &dev->u.cli;
+	__u32 max;
 
-	spin_lock(&cli->cl_loi_list_lock);
-	len = sprintf(buf, "%u\n", cli->cl_max_rpcs_in_flight);
-	spin_unlock(&cli->cl_loi_list_lock);
+	max = obd_get_max_rpcs_in_flight(&dev->u.cli);
+	len = sprintf(buf, "%u\n", max);
 
 	return len;
 }
@@ -59,7 +58,6 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 {
 	struct obd_device *dev = container_of(kobj, struct obd_device,
 					      obd_kobj);
-	struct client_obd *cli = &dev->u.cli;
 	int rc;
 	unsigned long val;
 
@@ -67,12 +65,9 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 	if (rc)
 		return rc;
 
-	if (val < 1 || val > MDC_MAX_RIF_MAX)
-		return -ERANGE;
-
-	spin_lock(&cli->cl_loi_list_lock);
-	cli->cl_max_rpcs_in_flight = val;
-	spin_unlock(&cli->cl_loi_list_lock);
+	rc = obd_set_max_rpcs_in_flight(&dev->u.cli, val);
+	if (rc)
+		count = rc;
 
 	return count;
 }
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
index 58f2841cabe4..f446c1c2584b 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h
+++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h
@@ -34,63 +34,57 @@
 #define _MDC_INTERNAL_H
 
 #include "../include/lustre_mdc.h"
-#include "../include/lustre_mds.h"
 
 void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars);
 
 void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
-		   __u64 valid, int ea_size, __u32 suppgid, int flags);
-void mdc_is_subdir_pack(struct ptlrpc_request *req, const struct lu_fid *pfid,
-			const struct lu_fid *cfid, int flags);
+		   __u64 valid, size_t ea_size, __u32 suppgid, u32 flags);
 void mdc_swap_layouts_pack(struct ptlrpc_request *req,
 			   struct md_op_data *op_data);
-void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, __u32 size,
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
 		      const struct lu_fid *fid);
-void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
-		      struct md_op_data *data, int ea_size);
+void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
+		      struct md_op_data *data, size_t ea_size);
 void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		      void *ea, int ealen, void *ea2, int ea2len);
+		      void *ea, size_t ealen, void *ea2, size_t ea2len);
 void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		     const void *data, int datalen, __u32 mode, __u32 uid,
-		     __u32 gid, cfs_cap_t capability, __u64 rdev);
+		     const void *data, size_t datalen, umode_t mode, uid_t uid,
+		     gid_t gid, cfs_cap_t capability, __u64 rdev);
 void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		   __u32 mode, __u64 rdev, __u64 flags, const void *data,
-		   int datalen);
+		   umode_t mode, __u64 rdev, __u64 flags, const void *data,
+		   size_t datalen);
 void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
 void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
 void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		     const char *old, int oldlen, const char *new, int newlen);
+		     const char *old, size_t oldlen,
+		     const char *new, size_t newlen);
 void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
-int mdc_enter_request(struct client_obd *cli);
-void mdc_exit_request(struct client_obd *cli);
 
 /* mdc/mdc_locks.c */
 int mdc_set_lock_data(struct obd_export *exp,
-		      __u64 *lockh, void *data, __u64 *bits);
+		      const struct lustre_handle *lockh,
+		      void *data, __u64 *bits);
 
 int mdc_null_inode(struct obd_export *exp, const struct lu_fid *fid);
 
-int mdc_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
-		    ldlm_iterator_t it, void *data);
-
 int mdc_intent_lock(struct obd_export *exp,
-		    struct md_op_data *,
-		    void *lmm, int lmmsize,
-		    struct lookup_intent *, int,
+		    struct md_op_data *op_data,
+		    struct lookup_intent *it,
 		    struct ptlrpc_request **reqp,
 		    ldlm_blocking_callback cb_blocking,
 		    __u64 extra_lock_flags);
+
 int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+		const ldlm_policy_data_t *policy,
 		struct lookup_intent *it, struct md_op_data *op_data,
-		struct lustre_handle *lockh, void *lmm, int lmmsize,
-		struct ptlrpc_request **req, __u64 extra_lock_flags);
+		struct lustre_handle *lockh, __u64 extra_lock_flags);
 
 int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
 			    struct list_head *cancels, enum ldlm_mode  mode,
 			    __u64 bits);
 /* mdc/mdc_request.c */
-int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
-		  struct md_op_data *op_data);
+int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+		  struct lu_fid *fid, struct md_op_data *op_data);
 struct obd_client_handle;
 
 int mdc_set_open_replay_data(struct obd_export *exp,
@@ -101,16 +95,17 @@ void mdc_commit_open(struct ptlrpc_request *req);
 void mdc_replay_open(struct ptlrpc_request *req);
 
 int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
-	       const void *data, int datalen, int mode, __u32 uid, __u32 gid,
-	       cfs_cap_t capability, __u64 rdev,
+	       const void *data, size_t datalen, umode_t mode, uid_t uid,
+	       gid_t gid, cfs_cap_t capability, __u64 rdev,
 	       struct ptlrpc_request **request);
 int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
 	     struct ptlrpc_request **request);
 int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
-	       const char *old, int oldlen, const char *new, int newlen,
+	       const char *old, size_t oldlen,
+	       const char *new, size_t newlen,
 	       struct ptlrpc_request **request);
 int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
-		void *ea, int ealen, void *ea2, int ea2len,
+		void *ea, size_t ealen, void *ea2, size_t ea2len,
 		struct ptlrpc_request **request, struct md_open_data **mod);
 int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
 	       struct ptlrpc_request **request);
@@ -138,4 +133,12 @@ static inline int mdc_prep_elc_req(struct obd_export *exp,
 				 count);
 }
 
+static inline unsigned long hash_x_index(__u64 hash, int hash64)
+{
+	if (BITS_PER_LONG == 32 && hash64)
+		hash >>= 32;
+	/* save hash 0 with hash 1 */
+	return ~0UL - (hash + !hash);
+}
+
 #endif
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
index 143bd7628572..aac7e04873e2 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c
@@ -37,27 +37,12 @@
 
 static void __mdc_pack_body(struct mdt_body *b, __u32 suppgid)
 {
-	b->suppgid = suppgid;
-	b->uid = from_kuid(&init_user_ns, current_uid());
-	b->gid = from_kgid(&init_user_ns, current_gid());
-	b->fsuid = from_kuid(&init_user_ns, current_fsuid());
-	b->fsgid = from_kgid(&init_user_ns, current_fsgid());
-	b->capability = cfs_curproc_cap_pack();
-}
-
-void mdc_is_subdir_pack(struct ptlrpc_request *req, const struct lu_fid *pfid,
-			const struct lu_fid *cfid, int flags)
-{
-	struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
-						    &RMF_MDT_BODY);
-
-	if (pfid) {
-		b->fid1 = *pfid;
-		b->valid = OBD_MD_FLID;
-	}
-	if (cfid)
-		b->fid2 = *cfid;
-	b->flags = flags;
+	b->mbo_suppgid = suppgid;
+	b->mbo_uid = from_kuid(&init_user_ns, current_uid());
+	b->mbo_gid = from_kgid(&init_user_ns, current_gid());
+	b->mbo_fsuid = from_kuid(&init_user_ns, current_fsuid());
+	b->mbo_fsgid = from_kgid(&init_user_ns, current_fsgid());
+	b->mbo_capability = cfs_curproc_cap_pack();
 }
 
 void mdc_swap_layouts_pack(struct ptlrpc_request *req,
@@ -67,43 +52,74 @@ void mdc_swap_layouts_pack(struct ptlrpc_request *req,
 						    &RMF_MDT_BODY);
 
 	__mdc_pack_body(b, op_data->op_suppgids[0]);
-	b->fid1 = op_data->op_fid1;
-	b->fid2 = op_data->op_fid2;
-	b->valid |= OBD_MD_FLID;
+	b->mbo_fid1 = op_data->op_fid1;
+	b->mbo_fid2 = op_data->op_fid2;
+	b->mbo_valid |= OBD_MD_FLID;
 }
 
 void mdc_pack_body(struct ptlrpc_request *req, const struct lu_fid *fid,
-		   __u64 valid, int ea_size, __u32 suppgid, int flags)
+		   __u64 valid, size_t ea_size, __u32 suppgid, u32 flags)
 {
 	struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
 						    &RMF_MDT_BODY);
-	b->valid = valid;
-	b->eadatasize = ea_size;
-	b->flags = flags;
+	b->mbo_valid = valid;
+	b->mbo_eadatasize = ea_size;
+	b->mbo_flags = flags;
 	__mdc_pack_body(b, suppgid);
 	if (fid) {
-		b->fid1 = *fid;
-		b->valid |= OBD_MD_FLID;
+		b->mbo_fid1 = *fid;
+		b->mbo_valid |= OBD_MD_FLID;
 	}
 }
 
-void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff,
-		      __u32 size, const struct lu_fid *fid)
+/**
+ * Pack a name (path component) into a request
+ *
+ * \param[in] req	request
+ * \param[in] field	request field (usually RMF_NAME)
+ * \param[in] name	path component
+ * \param[in] name_len	length of path component
+ *
+ * \a field must be present in \a req and of size \a name_len + 1.
+ *
+ * \a name must be '\0' terminated of length \a name_len and represent
+ * a single path component (not contain '/').
+ */
+static void mdc_pack_name(struct ptlrpc_request *req,
+			  const struct req_msg_field *field,
+			  const char *name, size_t name_len)
+{
+	size_t buf_size;
+	size_t cpy_len;
+	char *buf;
+
+	buf = req_capsule_client_get(&req->rq_pill, field);
+	buf_size = req_capsule_get_size(&req->rq_pill, field, RCL_CLIENT);
+
+	LASSERT(name && name_len && buf && buf_size == name_len + 1);
+
+	cpy_len = strlcpy(buf, name, buf_size);
+
+	LASSERT(cpy_len == name_len && lu_name_is_valid_2(buf, cpy_len));
+}
+
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 pgoff, size_t size,
+		      const struct lu_fid *fid)
 {
 	struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
 						    &RMF_MDT_BODY);
-	b->fid1 = *fid;
-	b->valid |= OBD_MD_FLID;
-	b->size = pgoff;		       /* !! */
-	b->nlink = size;			/* !! */
+	b->mbo_fid1 = *fid;
+	b->mbo_valid |= OBD_MD_FLID;
+	b->mbo_size = pgoff;		       /* !! */
+	b->mbo_nlink = size;			/* !! */
 	__mdc_pack_body(b, -1);
-	b->mode = LUDA_FID | LUDA_TYPE;
+	b->mbo_mode = LUDA_FID | LUDA_TYPE;
 }
 
 /* packing of MDS records */
 void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		     const void *data, int datalen, __u32 mode,
-		     __u32 uid, __u32 gid, cfs_cap_t cap_effective, __u64 rdev)
+		     const void *data, size_t datalen, umode_t mode,
+		     uid_t uid, gid_t gid, cfs_cap_t cap_effective, __u64 rdev)
 {
 	struct mdt_rec_create	*rec;
 	char			*tmp;
@@ -130,22 +146,17 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 	rec->cr_bias     = op_data->op_bias;
 	rec->cr_umask    = current_umask();
 
-	tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
-	LOGL0(op_data->op_name, op_data->op_namelen, tmp);
-
+	mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
 	if (data) {
 		tmp = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
 		memcpy(tmp, data, datalen);
 	}
 }
 
-static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
+static inline __u64 mds_pack_open_flags(__u64 flags)
 {
 	__u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
-				   MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
-				   MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
-				   MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
-				   MDS_OPEN_RELEASE));
+				   MDS_OPEN_FL_INTERNAL));
 	if (flags & O_CREAT)
 		cr_flags |= MDS_OPEN_CREAT;
 	if (flags & O_EXCL)
@@ -171,8 +182,8 @@ static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
 
 /* packing of MDS records */
 void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		   __u32 mode, __u64 rdev, __u64 flags, const void *lmm,
-		   int lmmlen)
+		   umode_t mode, __u64 rdev, __u64 flags, const void *lmm,
+		   size_t lmmlen)
 {
 	struct mdt_rec_create *rec;
 	char *tmp;
@@ -190,7 +201,7 @@ void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 	rec->cr_fid2 = op_data->op_fid2;
 
 	rec->cr_mode     = mode;
-	cr_flags = mds_pack_open_flags(flags, mode);
+	cr_flags = mds_pack_open_flags(flags);
 	rec->cr_rdev     = rdev;
 	rec->cr_time     = op_data->op_mod_time;
 	rec->cr_suppgid1 = op_data->op_suppgids[0];
@@ -200,8 +211,9 @@ void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 	rec->cr_old_handle = op_data->op_handle;
 
 	if (op_data->op_name) {
-		tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
-		LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+		mdc_pack_name(req, &RMF_NAME, op_data->op_name,
+			      op_data->op_namelen);
+
 		if (op_data->op_bias & MDS_CREATE_VOLATILE)
 			cr_flags |= MDS_OPEN_VOLATILE;
 	}
@@ -295,7 +307,7 @@ static void mdc_ioepoch_pack(struct mdt_ioepoch *epoch,
 }
 
 void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		      void *ea, int ealen, void *ea2, int ea2len)
+		      void *ea, size_t ealen, void *ea2, size_t ea2len)
 {
 	struct mdt_rec_setattr *rec;
 	struct mdt_ioepoch *epoch;
@@ -316,7 +328,7 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 
 	lum = req_capsule_client_get(&req->rq_pill, &RMF_EADATA);
 	if (!ea) { /* Remove LOV EA */
-		lum->lmm_magic = LOV_USER_MAGIC_V1;
+		lum->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
 		lum->lmm_stripe_size = 0;
 		lum->lmm_stripe_count = 0;
 		lum->lmm_stripe_offset = (typeof(lum->lmm_stripe_offset))(-1);
@@ -334,7 +346,6 @@ void mdc_setattr_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 {
 	struct mdt_rec_unlink *rec;
-	char *tmp;
 
 	CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_unlink));
 	rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
@@ -352,15 +363,12 @@ void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 	rec->ul_time     = op_data->op_mod_time;
 	rec->ul_bias     = op_data->op_bias;
 
-	tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
-	LASSERT(tmp);
-	LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+	mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
 }
 
 void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 {
 	struct mdt_rec_link *rec;
-	char *tmp;
 
 	CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_link));
 	rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
@@ -376,20 +384,21 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 	rec->lk_time     = op_data->op_mod_time;
 	rec->lk_bias     = op_data->op_bias;
 
-	tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
-	LOGL0(op_data->op_name, op_data->op_namelen, tmp);
+	mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
 }
 
 void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
-		     const char *old, int oldlen, const char *new, int newlen)
+		     const char *old, size_t oldlen,
+		     const char *new, size_t newlen)
 {
 	struct mdt_rec_rename *rec;
-	char *tmp;
 
 	CLASSERT(sizeof(struct mdt_rec_reint) == sizeof(struct mdt_rec_rename));
 	rec = req_capsule_client_get(&req->rq_pill, &RMF_REC_REINT);
 
 	/* XXX do something about time, uid, gid */
+	rec->rn_opcode	 = op_data->op_cli_flags & CLI_MIGRATE ?
+				REINT_MIGRATE : REINT_RENAME;
 	rec->rn_opcode   = REINT_RENAME;
 	rec->rn_fsuid    = op_data->op_fsuid;
 	rec->rn_fsgid    = op_data->op_fsgid;
@@ -402,39 +411,34 @@ void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
 	rec->rn_mode     = op_data->op_mode;
 	rec->rn_bias     = op_data->op_bias;
 
-	tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
-	LOGL0(old, oldlen, tmp);
+	mdc_pack_name(req, &RMF_NAME, old, oldlen);
 
-	if (new) {
-		tmp = req_capsule_client_get(&req->rq_pill, &RMF_SYMTGT);
-		LOGL0(new, newlen, tmp);
-	}
+	if (new)
+		mdc_pack_name(req, &RMF_SYMTGT, new, newlen);
 }
 
-void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, int flags,
-		      struct md_op_data *op_data, int ea_size)
+void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, u32 flags,
+		      struct md_op_data *op_data, size_t ea_size)
 {
 	struct mdt_body *b = req_capsule_client_get(&req->rq_pill,
 						    &RMF_MDT_BODY);
 
-	b->valid = valid;
+	b->mbo_valid = valid;
 	if (op_data->op_bias & MDS_CHECK_SPLIT)
-		b->valid |= OBD_MD_FLCKSPLIT;
+		b->mbo_valid |= OBD_MD_FLCKSPLIT;
 	if (op_data->op_bias & MDS_CROSS_REF)
-		b->valid |= OBD_MD_FLCROSSREF;
-	b->eadatasize = ea_size;
-	b->flags = flags;
+		b->mbo_valid |= OBD_MD_FLCROSSREF;
+	b->mbo_eadatasize = ea_size;
+	b->mbo_flags = flags;
 	__mdc_pack_body(b, op_data->op_suppgids[0]);
 
-	b->fid1 = op_data->op_fid1;
-	b->fid2 = op_data->op_fid2;
-	b->valid |= OBD_MD_FLID;
-
-	if (op_data->op_name) {
-		char *tmp = req_capsule_client_get(&req->rq_pill, &RMF_NAME);
+	b->mbo_fid1 = op_data->op_fid1;
+	b->mbo_fid2 = op_data->op_fid2;
+	b->mbo_valid |= OBD_MD_FLID;
 
-		LOGL0(op_data->op_name, op_data->op_namelen, tmp);
-	}
+	if (op_data->op_name)
+		mdc_pack_name(req, &RMF_NAME, op_data->op_name,
+			      op_data->op_namelen);
 }
 
 static void mdc_hsm_release_pack(struct ptlrpc_request *req,
@@ -482,67 +486,3 @@ void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
 	mdc_ioepoch_pack(epoch, op_data);
 	mdc_hsm_release_pack(req, op_data);
 }
-
-static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
-{
-	int rc;
-
-	spin_lock(&cli->cl_loi_list_lock);
-	rc = list_empty(&mcw->mcw_entry);
-	spin_unlock(&cli->cl_loi_list_lock);
-	return rc;
-};
-
-/* We record requests in flight in cli->cl_r_in_flight here.
- * There is only one write rpc possible in mdc anyway. If this to change
- * in the future - the code may need to be revisited.
- */
-int mdc_enter_request(struct client_obd *cli)
-{
-	int rc = 0;
-	struct mdc_cache_waiter mcw;
-	struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
-
-	spin_lock(&cli->cl_loi_list_lock);
-	if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
-		list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
-		init_waitqueue_head(&mcw.mcw_waitq);
-		spin_unlock(&cli->cl_loi_list_lock);
-		rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw),
-				  &lwi);
-		if (rc) {
-			spin_lock(&cli->cl_loi_list_lock);
-			if (list_empty(&mcw.mcw_entry))
-				cli->cl_r_in_flight--;
-			list_del_init(&mcw.mcw_entry);
-			spin_unlock(&cli->cl_loi_list_lock);
-		}
-	} else {
-		cli->cl_r_in_flight++;
-		spin_unlock(&cli->cl_loi_list_lock);
-	}
-	return rc;
-}
-
-void mdc_exit_request(struct client_obd *cli)
-{
-	struct list_head *l, *tmp;
-	struct mdc_cache_waiter *mcw;
-
-	spin_lock(&cli->cl_loi_list_lock);
-	cli->cl_r_in_flight--;
-	list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
-		if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
-			/* No free request slots anymore */
-			break;
-		}
-
-		mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
-		list_del_init(&mcw->mcw_entry);
-		cli->cl_r_in_flight++;
-		wake_up(&mcw->mcw_waitq);
-	}
-	/* Empty waiting list? Decrease reqs in-flight number */
-
-	spin_unlock(&cli->cl_loi_list_lock);
-}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
index f48b58423307..f1f6c082fa42 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c
@@ -93,8 +93,8 @@ int it_open_error(int phase, struct lookup_intent *it)
 EXPORT_SYMBOL(it_open_error);
 
 /* this must be called on a lockh that is known to have a referenced lock */
-int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
-		      __u64 *bits)
+int mdc_set_lock_data(struct obd_export *exp, const struct lustre_handle *lockh,
+		      void *data, __u64 *bits)
 {
 	struct ldlm_lock *lock;
 	struct inode *new_inode = data;
@@ -102,10 +102,10 @@ int mdc_set_lock_data(struct obd_export *exp, __u64 *lockh, void *data,
 	if (bits)
 		*bits = 0;
 
-	if (!*lockh)
+	if (!lustre_handle_is_used(lockh))
 		return 0;
 
-	lock = ldlm_handle2lock((struct lustre_handle *)lockh);
+	lock = ldlm_handle2lock(lockh);
 
 	LASSERT(lock);
 	lock_res_and_lock(lock);
@@ -174,7 +174,7 @@ int mdc_null_inode(struct obd_export *exp,
 	fid_build_reg_res_name(fid, &res_id);
 
 	res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
-	if (!res)
+	if (IS_ERR(res))
 		return 0;
 
 	lock_res(res);
@@ -185,28 +185,6 @@ int mdc_null_inode(struct obd_export *exp,
 	return 0;
 }
 
-/* find any ldlm lock of the inode in mdc
- * return 0    not find
- *	1    find one
- *      < 0    error
- */
-int mdc_find_cbdata(struct obd_export *exp,
-		    const struct lu_fid *fid,
-		    ldlm_iterator_t it, void *data)
-{
-	struct ldlm_res_id res_id;
-	int rc = 0;
-
-	fid_build_reg_res_name((struct lu_fid *)fid, &res_id);
-	rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
-				   it, data);
-	if (rc == LDLM_ITER_STOP)
-		return 1;
-	else if (rc == LDLM_ITER_CONTINUE)
-		return 0;
-	return rc;
-}
-
 static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
 {
 	/* Don't hold error requests for replay. */
@@ -240,24 +218,24 @@ static void mdc_realloc_openmsg(struct ptlrpc_request *req,
 
 	/* FIXME: remove this explicit offset. */
 	rc = sptlrpc_cli_enlarge_reqbuf(req, DLM_INTENT_REC_OFF + 4,
-					body->eadatasize);
+					body->mbo_eadatasize);
 	if (rc) {
 		CERROR("Can't enlarge segment %d size to %d\n",
-		       DLM_INTENT_REC_OFF + 4, body->eadatasize);
-		body->valid &= ~OBD_MD_FLEASIZE;
-		body->eadatasize = 0;
+		       DLM_INTENT_REC_OFF + 4, body->mbo_eadatasize);
+		body->mbo_valid &= ~OBD_MD_FLEASIZE;
+		body->mbo_eadatasize = 0;
 	}
 }
 
-static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
-						   struct lookup_intent *it,
-						   struct md_op_data *op_data,
-						   void *lmm, int lmmsize,
-						   void *cb_data)
+static struct ptlrpc_request *
+mdc_intent_open_pack(struct obd_export *exp, struct lookup_intent *it,
+		     struct md_op_data *op_data)
 {
 	struct ptlrpc_request *req;
 	struct obd_device     *obddev = class_exp2obd(exp);
 	struct ldlm_intent    *lit;
+	const void *lmm = op_data->op_data;
+	u32 lmmsize = op_data->op_data_size;
 	LIST_HEAD(cancels);
 	int		    count = 0;
 	int		    mode;
@@ -274,7 +252,7 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
 			else
 				mode = LCK_PR;
 		} else {
-			if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
+			if (it->it_flags & (FMODE_WRITE | MDS_OPEN_TRUNC))
 				mode = LCK_CW;
 			else if (it->it_flags & __FMODE_EXEC)
 				mode = LCK_PR;
@@ -325,6 +303,9 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp,
 	mdc_open_pack(req, op_data, it->it_create_mode, 0, it->it_flags, lmm,
 		      lmmsize);
 
+	req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+			     obddev->u.cli.cl_max_mds_easize);
+
 	ptlrpc_request_set_replen(req);
 	return req;
 }
@@ -336,7 +317,8 @@ mdc_intent_getxattr_pack(struct obd_export *exp,
 {
 	struct ptlrpc_request	*req;
 	struct ldlm_intent	*lit;
-	int			rc, count = 0, maxdata;
+	int rc, count = 0;
+	u32 maxdata;
 	LIST_HEAD(cancels);
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -421,7 +403,7 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp,
 				       OBD_MD_MEA | OBD_MD_FLACL;
 	struct ldlm_intent    *lit;
 	int		    rc;
-	int		    easize;
+	u32 easize;
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 				   &RQF_LDLM_INTENT_GETATTR);
@@ -526,7 +508,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 	struct ldlm_reply   *lockrep;
 	struct ldlm_lock    *lock;
 	void		*lvb_data = NULL;
-	int		  lvb_len = 0;
+	u32 lvb_len = 0;
 
 	LASSERT(rc >= 0);
 	/* Similarly, if we're going to replay this request, we don't want to
@@ -605,7 +587,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 			mdc_set_open_replay_data(NULL, NULL, it);
 		}
 
-		if ((body->valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
+		if ((body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_FLEASIZE)) != 0) {
 			void *eadata;
 
 			mdc_update_max_ea_from_body(exp, body);
@@ -615,7 +597,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 			 * Eventually, obd_unpackmd() will check the contents.
 			 */
 			eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
-							      body->eadatasize);
+							      body->mbo_eadatasize);
 			if (!eadata)
 				return -EPROTO;
 
@@ -623,7 +605,7 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 			 * lock
 			 */
 			lvb_data = eadata;
-			lvb_len = body->eadatasize;
+			lvb_len = body->mbo_eadatasize;
 
 			/*
 			 * We save the reply LOV EA in case we have to replay a
@@ -639,20 +621,20 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 
 				if (req_capsule_get_size(pill, &RMF_EADATA,
 							 RCL_CLIENT) <
-				    body->eadatasize)
+				    body->mbo_eadatasize)
 					mdc_realloc_openmsg(req, body);
 				else
 					req_capsule_shrink(pill, &RMF_EADATA,
-							   body->eadatasize,
+							   body->mbo_eadatasize,
 							   RCL_CLIENT);
 
 				req_capsule_set_size(pill, &RMF_EADATA,
 						     RCL_CLIENT,
-						     body->eadatasize);
+						     body->mbo_eadatasize);
 
 				lmm = req_capsule_client_get(pill, &RMF_EADATA);
 				if (lmm)
-					memcpy(lmm, eadata, body->eadatasize);
+					memcpy(lmm, eadata, body->mbo_eadatasize);
 			}
 		}
 	} else if (it->it_op & IT_LAYOUT) {
@@ -662,7 +644,8 @@ static int mdc_finish_enqueue(struct obd_export *exp,
 		lvb_len = req_capsule_get_size(pill, &RMF_DLM_LVB, RCL_SERVER);
 		if (lvb_len > 0) {
 			lvb_data = req_capsule_server_sized_get(pill,
-							&RMF_DLM_LVB, lvb_len);
+								&RMF_DLM_LVB,
+								lvb_len);
 			if (!lvb_data)
 				return -EPROTO;
 		}
@@ -705,9 +688,9 @@ static int mdc_finish_enqueue(struct obd_export *exp,
  * we don't know in advance the file type.
  */
 int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
+		const ldlm_policy_data_t *policy,
 		struct lookup_intent *it, struct md_op_data *op_data,
-		struct lustre_handle *lockh, void *lmm, int lmmsize,
-		struct ptlrpc_request **reqp, u64 extra_lock_flags)
+		struct lustre_handle *lockh, u64 extra_lock_flags)
 {
 	static const ldlm_policy_data_t lookup_policy = {
 		.l_inodebits = { MDS_INODELOCK_LOOKUP }
@@ -721,9 +704,8 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 	static const ldlm_policy_data_t getxattr_policy = {
 		.l_inodebits = { MDS_INODELOCK_XATTR }
 	};
-	ldlm_policy_data_t const *policy = &lookup_policy;
 	struct obd_device *obddev = class_exp2obd(exp);
-	struct ptlrpc_request *req;
+	struct ptlrpc_request *req = NULL;
 	u64 flags, saved_flags = extra_lock_flags;
 	struct ldlm_res_id res_id;
 	int generation, resends = 0;
@@ -733,40 +715,32 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo,
 
 	LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n",
 		 einfo->ei_type);
-
 	fid_build_reg_res_name(&op_data->op_fid1, &res_id);
 
 	if (it) {
+		LASSERT(!policy);
+
 		saved_flags |= LDLM_FL_HAS_INTENT;
-		if (it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR))
+		if (it->it_op & (IT_OPEN | IT_UNLINK | IT_GETATTR | IT_READDIR))
 			policy = &update_policy;
 		else if (it->it_op & IT_LAYOUT)
 			policy = &layout_policy;
 		else if (it->it_op & (IT_GETXATTR | IT_SETXATTR))
 			policy = &getxattr_policy;
+		else
+			policy = &lookup_policy;
 	}
 
-	LASSERT(!reqp);
-
 	generation = obddev->u.cli.cl_import->imp_generation;
 resend:
 	flags = saved_flags;
 	if (!it) {
-		/* The only way right now is FLOCK, in this case we hide flock
-		 * policy as lmm, but lmmsize is 0
-		 */
-		LASSERT(lmm && lmmsize == 0);
+		/* The only way right now is FLOCK. */
 		LASSERTF(einfo->ei_type == LDLM_FLOCK, "lock type %d\n",
 			 einfo->ei_type);
-		policy = lmm;
 		res_id.name[3] = LDLM_FLOCK;
-		req = NULL;
 	} else if (it->it_op & IT_OPEN) {
-		req = mdc_intent_open_pack(exp, it, op_data, lmm, lmmsize,
-					   einfo->ei_cbdata);
-		policy = &update_policy;
-		einfo->ei_cbdata = NULL;
-		lmm = NULL;
+		req = mdc_intent_open_pack(exp, it, op_data);
 	} else if (it->it_op & IT_UNLINK) {
 		req = mdc_intent_unlink_pack(exp, it, op_data);
 	} else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
@@ -806,7 +780,7 @@ resend:
 	 */
 	if (it) {
 		mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
-		rc = mdc_enter_request(&obddev->u.cli);
+		rc = obd_get_request_slot(&obddev->u.cli);
 		if (rc != 0) {
 			mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
 			mdc_clear_replay_flag(req, 0);
@@ -834,13 +808,12 @@ resend:
 		return rc;
 	}
 
-	mdc_exit_request(&obddev->u.cli);
+	obd_put_request_slot(&obddev->u.cli);
 	mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
 
 	if (rc < 0) {
-		CDEBUG_LIMIT((rc == -EACCES || rc == -EIDRM) ? D_INFO : D_ERROR,
-			     "%s: ldlm_cli_enqueue failed: rc = %d\n",
-			     obddev->obd_name, rc);
+		CDEBUG(D_INFO, "%s: ldlm_cli_enqueue failed: rc = %d\n",
+		       obddev->obd_name, rc);
 
 		mdc_clear_replay_flag(req, rc);
 		ptlrpc_req_finished(req);
@@ -903,6 +876,9 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 	LASSERT(request != LP_POISON);
 	LASSERT(request->rq_repmsg != LP_POISON);
 
+	if (it->it_op & IT_READDIR)
+		return 0;
+
 	if (!it_disposition(it, DISP_IT_EXECD)) {
 		/* The server failed before it even started executing the
 		 * intent, i.e. because it couldn't unpack the request.
@@ -917,27 +893,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 	mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
 	LASSERT(mdt_body);      /* mdc_enqueue checked */
 
-	/* If we were revalidating a fid/name pair, mark the intent in
-	 * case we fail and get called again from lookup
-	 */
-	if (fid_is_sane(&op_data->op_fid2) &&
-	    it->it_create_mode & M_CHECK_STALE &&
-	    it->it_op != IT_GETATTR) {
-		/* Also: did we find the same inode? */
-		/* sever can return one of two fids:
-		 * op_fid2 - new allocated fid - if file is created.
-		 * op_fid3 - existent fid - if file only open.
-		 * op_fid3 is saved in lmv_intent_open
-		 */
-		if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->fid1)) &&
-		    (!lu_fid_eq(&op_data->op_fid3, &mdt_body->fid1))) {
-			CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID
-			       "\n", PFID(&op_data->op_fid2),
-			       PFID(&op_data->op_fid2), PFID(&mdt_body->fid1));
-			return -ESTALE;
-		}
-	}
-
 	rc = it_open_error(DISP_LOOKUP_EXECD, it);
 	if (rc)
 		return rc;
@@ -980,10 +935,10 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 
 		LDLM_DEBUG(lock, "matching against this");
 
-		LASSERTF(fid_res_name_eq(&mdt_body->fid1,
+		LASSERTF(fid_res_name_eq(&mdt_body->mbo_fid1,
 					 &lock->l_resource->lr_name),
 			 "Lock res_id: "DLDLMRES", fid: "DFID"\n",
-			 PLDLMRES(lock->l_resource), PFID(&mdt_body->fid1));
+			 PLDLMRES(lock->l_resource), PFID(&mdt_body->mbo_fid1));
 		LDLM_LOCK_PUT(lock);
 
 		memcpy(&old_lock, lockh, sizeof(*lockh));
@@ -998,8 +953,8 @@ static int mdc_finish_intent_lock(struct obd_export *exp,
 	}
 	CDEBUG(D_DENTRY,
 	       "D_IT dentry %.*s intent: %s status %d disp %x rc %d\n",
-	       op_data->op_namelen, op_data->op_name, ldlm_it2str(it->it_op),
-	       it->it_status, it->it_disposition, rc);
+	       (int)op_data->op_namelen, op_data->op_name,
+	       ldlm_it2str(it->it_op), it->it_status, it->it_disposition, rc);
 	return rc;
 }
 
@@ -1042,6 +997,9 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
 						  MDS_INODELOCK_LOOKUP |
 						  MDS_INODELOCK_PERM;
 			break;
+		case IT_READDIR:
+			policy.l_inodebits.bits = MDS_INODELOCK_UPDATE;
+			break;
 		case IT_LAYOUT:
 			policy.l_inodebits.bits = MDS_INODELOCK_LAYOUT;
 			break;
@@ -1095,10 +1053,8 @@ int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it,
  * child lookup.
  */
 int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
-		    void *lmm, int lmmsize, struct lookup_intent *it,
-		    int lookup_flags, struct ptlrpc_request **reqp,
-		    ldlm_blocking_callback cb_blocking,
-		    __u64 extra_lock_flags)
+		    struct lookup_intent *it, struct ptlrpc_request **reqp,
+		    ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags)
 {
 	struct ldlm_enqueue_info einfo = {
 		.ei_type	= LDLM_IBITS,
@@ -1112,14 +1068,14 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 	LASSERT(it);
 
 	CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID
-		", intent: %s flags %#Lo\n", op_data->op_namelen,
+		", intent: %s flags %#Lo\n", (int)op_data->op_namelen,
 		op_data->op_name, PFID(&op_data->op_fid2),
 		PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
 		it->it_flags);
 
 	lockh.cookie = 0;
 	if (fid_is_sane(&op_data->op_fid2) &&
-	    (it->it_op & (IT_LOOKUP | IT_GETATTR))) {
+	    (it->it_op & (IT_LOOKUP | IT_GETATTR | IT_READDIR))) {
 		/* We could just return 1 immediately, but since we should only
 		 * be called in revalidate_it if we already have a lock, let's
 		 * verify that.
@@ -1135,13 +1091,13 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data,
 
 	/* For case if upper layer did not alloc fid, do it now. */
 	if (!fid_is_sane(&op_data->op_fid2) && it->it_op & IT_CREAT) {
-		rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+		rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
 		if (rc < 0) {
 			CERROR("Can't alloc new fid, rc %d\n", rc);
 			return rc;
 		}
 	}
-	rc = mdc_enqueue(exp, &einfo, it, op_data, &lockh, lmm, lmmsize, NULL,
+	rc = mdc_enqueue(exp, &einfo, NULL, it, op_data, &lockh,
 			 extra_lock_flags);
 	if (rc < 0)
 		return rc;
@@ -1170,7 +1126,7 @@ static int mdc_intent_getattr_async_interpret(const struct lu_env *env,
 
 	obddev = class_exp2obd(exp);
 
-	mdc_exit_request(&obddev->u.cli);
+	obd_put_request_slot(&obddev->u.cli);
 	if (OBD_FAIL_CHECK(OBD_FAIL_MDC_GETATTR_ENQUEUE))
 		rc = -ETIMEDOUT;
 
@@ -1222,15 +1178,15 @@ int mdc_intent_getattr_async(struct obd_export *exp,
 
 	CDEBUG(D_DLMTRACE,
 	       "name: %.*s in inode " DFID ", intent: %s flags %#Lo\n",
-	       op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
-	       ldlm_it2str(it->it_op), it->it_flags);
+	       (int)op_data->op_namelen, op_data->op_name,
+	       PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), it->it_flags);
 
 	fid_build_reg_res_name(&op_data->op_fid1, &res_id);
 	req = mdc_intent_getattr_pack(exp, it, op_data);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	rc = mdc_enter_request(&obddev->u.cli);
+	rc = obd_get_request_slot(&obddev->u.cli);
 	if (rc != 0) {
 		ptlrpc_req_finished(req);
 		return rc;
@@ -1239,7 +1195,7 @@ int mdc_intent_getattr_async(struct obd_export *exp,
 	rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, &policy, &flags, NULL,
 			      0, LVB_T_NONE, &minfo->mi_lockh, 1);
 	if (rc < 0) {
-		mdc_exit_request(&obddev->u.cli);
+		obd_put_request_slot(&obddev->u.cli);
 		ptlrpc_req_finished(req);
 		return rc;
 	}
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_reint.c b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
index 5dba2c813857..c921e471fa27 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_reint.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_reint.c
@@ -86,7 +86,7 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
 	fid_build_reg_res_name(fid, &res_id);
 	res = ldlm_resource_get(exp->exp_obd->obd_namespace,
 				NULL, &res_id, 0, 0);
-	if (!res)
+	if (IS_ERR(res))
 		return 0;
 	LDLM_RESOURCE_ADDREF(res);
 	/* Initialize ibits lock policy. */
@@ -99,7 +99,7 @@ int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid,
 }
 
 int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
-		void *ea, int ealen, void *ea2, int ea2len,
+		void *ea, size_t ealen, void *ea2, size_t ea2len,
 		struct ptlrpc_request **request, struct md_open_data **mod)
 {
 	LIST_HEAD(cancels);
@@ -110,11 +110,10 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
 	__u64 bits;
 
 	bits = MDS_INODELOCK_UPDATE;
-	if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))
+	if (op_data->op_attr.ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 		bits |= MDS_INODELOCK_LOOKUP;
 	if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
-	    (fid_is_sane(&op_data->op_fid1)) &&
-	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+	    (fid_is_sane(&op_data->op_fid1)))
 		count = mdc_resource_get_unused(exp, &op_data->op_fid1,
 						&cancels, LCK_EX, bits);
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -177,8 +176,8 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
 
 		epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
 		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
-		epoch->handle = body->handle;
-		epoch->ioepoch = body->ioepoch;
+		epoch->handle = body->mbo_handle;
+		epoch->ioepoch = body->mbo_ioepoch;
 		req->rq_replay_cb = mdc_replay_open;
 	/** bug 3633, open may be committed and estale answer is not error */
 	} else if (rc == -ESTALE && (op_data->op_flags & MF_SOM_CHANGE)) {
@@ -197,9 +196,9 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
-	       const void *data, int datalen, int mode, __u32 uid, __u32 gid,
-	       cfs_cap_t cap_effective, __u64 rdev,
-	       struct ptlrpc_request **request)
+	       const void *data, size_t datalen, umode_t mode,
+	       uid_t uid, gid_t gid, cfs_cap_t cap_effective,
+	       __u64 rdev, struct ptlrpc_request **request)
 {
 	struct ptlrpc_request *req;
 	int level, rc;
@@ -214,11 +213,9 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
 		 * mdc_fid_alloc() may return errno 1 in case of switch to new
 		 * sequence, handle this.
 		 */
-		rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
-		if (rc < 0) {
-			CERROR("Can't alloc new fid, rc %d\n", rc);
+		rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
+		if (rc < 0)
 			return rc;
-		}
 	}
 
 rebuild:
@@ -307,14 +304,12 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
 	LASSERT(!req);
 
 	if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
-	    (fid_is_sane(&op_data->op_fid1)) &&
-	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+	    (fid_is_sane(&op_data->op_fid1)))
 		count = mdc_resource_get_unused(exp, &op_data->op_fid1,
 						&cancels, LCK_EX,
 						MDS_INODELOCK_UPDATE);
 	if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
-	    (fid_is_sane(&op_data->op_fid3)) &&
-	    !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+	    (fid_is_sane(&op_data->op_fid3)))
 		count += mdc_resource_get_unused(exp, &op_data->op_fid3,
 						 &cancels, LCK_EX,
 						 MDS_INODELOCK_FULL);
@@ -394,7 +389,7 @@ int mdc_link(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
-	       const char *old, int oldlen, const char *new, int newlen,
+	       const char *old, size_t oldlen, const char *new, size_t newlen,
 	       struct ptlrpc_request **request)
 {
 	LIST_HEAD(cancels);
@@ -431,7 +426,8 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
 	}
 
 	req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1);
-	req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT, newlen+1);
+	req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT,
+			     newlen + 1);
 
 	rc = mdc_prep_elc_req(exp, req, MDS_REINT, &cancels, count);
 	if (rc) {
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 542801f04b0d..f56ea643f9bf 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -39,7 +39,9 @@
 # include <linux/utsname.h>
 
 #include "../include/lustre_acl.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/obd_class.h"
+#include "../include/lustre_lmv.h"
 #include "../include/lustre_fid.h"
 #include "../include/lprocfs_status.h"
 #include "../include/lustre_param.h"
@@ -57,16 +59,16 @@ static inline int mdc_queue_wait(struct ptlrpc_request *req)
 	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
 	int rc;
 
-	/* mdc_enter_request() ensures that this client has no more
+	/* obd_get_request_slot() ensures that this client has no more
 	 * than cl_max_rpcs_in_flight RPCs simultaneously inf light
 	 * against an MDT.
 	 */
-	rc = mdc_enter_request(cli);
+	rc = obd_get_request_slot(cli);
 	if (rc != 0)
 		return rc;
 
 	rc = ptlrpc_queue_wait(req);
-	mdc_exit_request(cli);
+	obd_put_request_slot(cli);
 
 	return rc;
 }
@@ -98,7 +100,7 @@ static int mdc_getstatus(struct obd_export *exp, struct lu_fid *rootfid)
 		goto out;
 	}
 
-	*rootfid = body->fid1;
+	*rootfid = body->mbo_fid1;
 	CDEBUG(D_NET,
 	       "root fid="DFID", last_committed=%llu\n",
 	       PFID(rootfid),
@@ -136,12 +138,12 @@ static int mdc_getattr_common(struct obd_export *exp,
 	if (!body)
 		return -EPROTO;
 
-	CDEBUG(D_NET, "mode: %o\n", body->mode);
+	CDEBUG(D_NET, "mode: %o\n", body->mbo_mode);
 
 	mdc_update_max_ea_from_body(exp, body);
-	if (body->eadatasize != 0) {
+	if (body->mbo_eadatasize != 0) {
 		eadata = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
-						      body->eadatasize);
+						      body->mbo_eadatasize);
 		if (!eadata)
 			return -EPROTO;
 	}
@@ -230,32 +232,6 @@ static int mdc_getattr_name(struct obd_export *exp, struct md_op_data *op_data,
 	return rc;
 }
 
-static int mdc_is_subdir(struct obd_export *exp,
-			 const struct lu_fid *pfid,
-			 const struct lu_fid *cfid,
-			 struct ptlrpc_request **request)
-{
-	struct ptlrpc_request  *req;
-	int		     rc;
-
-	*request = NULL;
-	req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp),
-					&RQF_MDS_IS_SUBDIR, LUSTRE_MDS_VERSION,
-					MDS_IS_SUBDIR);
-	if (!req)
-		return -ENOMEM;
-
-	mdc_is_subdir_pack(req, pfid, cfid, 0);
-	ptlrpc_request_set_replen(req);
-
-	rc = ptlrpc_queue_wait(req);
-	if (rc && rc != -EREMOTE)
-		ptlrpc_req_finished(req);
-	else
-		*request = req;
-	return rc;
-}
-
 static int mdc_xattr_common(struct obd_export *exp,
 			    const struct req_format *fmt,
 			    const struct lu_fid *fid,
@@ -397,15 +373,15 @@ static int mdc_unpack_acl(struct ptlrpc_request *req, struct lustre_md *md)
 	void		   *buf;
 	int		     rc;
 
-	if (!body->aclsize)
+	if (!body->mbo_aclsize)
 		return 0;
 
-	buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->aclsize);
+	buf = req_capsule_server_sized_get(pill, &RMF_ACL, body->mbo_aclsize);
 
 	if (!buf)
 		return -EPROTO;
 
-	acl = posix_acl_from_xattr(&init_user_ns, buf, body->aclsize);
+	acl = posix_acl_from_xattr(&init_user_ns, buf, body->mbo_aclsize);
 	if (!acl)
 		return 0;
 
@@ -443,24 +419,24 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 
 	md->body = req_capsule_server_get(pill, &RMF_MDT_BODY);
 
-	if (md->body->valid & OBD_MD_FLEASIZE) {
+	if (md->body->mbo_valid & OBD_MD_FLEASIZE) {
 		int lmmsize;
 		struct lov_mds_md *lmm;
 
-		if (!S_ISREG(md->body->mode)) {
+		if (!S_ISREG(md->body->mbo_mode)) {
 			CDEBUG(D_INFO,
 			       "OBD_MD_FLEASIZE set, should be a regular file, but is not\n");
 			rc = -EPROTO;
 			goto out;
 		}
 
-		if (md->body->eadatasize == 0) {
+		if (md->body->mbo_eadatasize == 0) {
 			CDEBUG(D_INFO,
 			       "OBD_MD_FLEASIZE set, but eadatasize 0\n");
 			rc = -EPROTO;
 			goto out;
 		}
-		lmmsize = md->body->eadatasize;
+		lmmsize = md->body->mbo_eadatasize;
 		lmm = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmmsize);
 		if (!lmm) {
 			rc = -EPROTO;
@@ -471,7 +447,7 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 		if (rc < 0)
 			goto out;
 
-		if (rc < sizeof(*md->lsm)) {
+		if (rc < (typeof(rc))sizeof(*md->lsm)) {
 			CDEBUG(D_INFO,
 			       "lsm size too small: rc < sizeof (*md->lsm) (%d < %d)\n",
 			       rc, (int)sizeof(*md->lsm));
@@ -479,24 +455,24 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 			goto out;
 		}
 
-	} else if (md->body->valid & OBD_MD_FLDIREA) {
+	} else if (md->body->mbo_valid & OBD_MD_FLDIREA) {
 		int lmvsize;
 		struct lov_mds_md *lmv;
 
-		if (!S_ISDIR(md->body->mode)) {
+		if (!S_ISDIR(md->body->mbo_mode)) {
 			CDEBUG(D_INFO,
 			       "OBD_MD_FLDIREA set, should be a directory, but is not\n");
 			rc = -EPROTO;
 			goto out;
 		}
 
-		if (md->body->eadatasize == 0) {
+		if (md->body->mbo_eadatasize == 0) {
 			CDEBUG(D_INFO,
 			       "OBD_MD_FLDIREA is set, but eadatasize 0\n");
 			return -EPROTO;
 		}
-		if (md->body->valid & OBD_MD_MEA) {
-			lmvsize = md->body->eadatasize;
+		if (md->body->mbo_valid & OBD_MD_MEA) {
+			lmvsize = md->body->mbo_eadatasize;
 			lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD,
 							   lmvsize);
 			if (!lmv) {
@@ -504,15 +480,15 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 				goto out;
 			}
 
-			rc = obd_unpackmd(md_exp, (void *)&md->mea, lmv,
+			rc = obd_unpackmd(md_exp, (void *)&md->lmv, lmv,
 					  lmvsize);
 			if (rc < 0)
 				goto out;
 
-			if (rc < sizeof(*md->mea)) {
+			if (rc < (typeof(rc))sizeof(*md->lmv)) {
 				CDEBUG(D_INFO,
-				       "size too small: rc < sizeof(*md->mea) (%d < %d)\n",
-					rc, (int)sizeof(*md->mea));
+				       "size too small: rc < sizeof(*md->lmv) (%d < %d)\n",
+					rc, (int)sizeof(*md->lmv));
 				rc = -EPROTO;
 				goto out;
 			}
@@ -520,12 +496,12 @@ static int mdc_get_lustre_md(struct obd_export *exp,
 	}
 	rc = 0;
 
-	if (md->body->valid & OBD_MD_FLACL) {
+	if (md->body->mbo_valid & OBD_MD_FLACL) {
 		/* for ACL, it's possible that FLACL is set but aclsize is zero.
 		 * only when aclsize != 0 there's an actual segment for ACL
 		 * in reply buffer.
 		 */
-		if (md->body->aclsize) {
+		if (md->body->mbo_aclsize) {
 			rc = mdc_unpack_acl(req, md);
 			if (rc)
 				goto out;
@@ -580,9 +556,9 @@ void mdc_replay_open(struct ptlrpc_request *req)
 
 		file_fh = &och->och_fh;
 		CDEBUG(D_HA, "updating handle from %#llx to %#llx\n",
-		       file_fh->cookie, body->handle.cookie);
+		       file_fh->cookie, body->mbo_handle.cookie);
 		old = *file_fh;
-		*file_fh = body->handle;
+		*file_fh = body->mbo_handle;
 	}
 	close_req = mod->mod_close_req;
 	if (close_req) {
@@ -597,7 +573,7 @@ void mdc_replay_open(struct ptlrpc_request *req)
 		if (och)
 			LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
 		DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
-		epoch->handle = body->handle;
+		epoch->handle = body->mbo_handle;
 	}
 }
 
@@ -679,11 +655,11 @@ int mdc_set_open_replay_data(struct obd_export *exp,
 		spin_unlock(&open_req->rq_lock);
 	}
 
-	rec->cr_fid2 = body->fid1;
-	rec->cr_ioepoch = body->ioepoch;
-	rec->cr_old_handle.cookie = body->handle.cookie;
+	rec->cr_fid2 = body->mbo_fid1;
+	rec->cr_ioepoch = body->mbo_ioepoch;
+	rec->cr_old_handle.cookie = body->mbo_handle.cookie;
 	open_req->rq_replay_cb = mdc_replay_open;
-	if (!fid_is_sane(&body->fid1)) {
+	if (!fid_is_sane(&body->mbo_fid1)) {
 		DEBUG_REQ(D_ERROR, open_req,
 			  "Saving replay request with insane fid");
 		LBUG();
@@ -701,9 +677,15 @@ static void mdc_free_open(struct md_open_data *mod)
 	    imp_connect_disp_stripe(mod->mod_open_req->rq_import))
 		committed = 1;
 
-	LASSERT(mod->mod_open_req->rq_replay == 0);
-
-	DEBUG_REQ(D_RPCTRACE, mod->mod_open_req, "free open request\n");
+	/*
+	 * No reason to asssert here if the open request has
+	 * rq_replay == 1. It means that mdc_close failed, and
+	 * close request wasn`t sent. It is not fatal to client.
+	 * The worst thing is eviction if the client gets open lock
+	 */
+	DEBUG_REQ(D_RPCTRACE, mod->mod_open_req,
+		  "free open request rq_replay = %d\n",
+		   mod->mod_open_req->rq_replay);
 
 	ptlrpc_request_committed(mod->mod_open_req, committed);
 	if (mod->mod_close_req)
@@ -744,7 +726,7 @@ static void mdc_close_handle_reply(struct ptlrpc_request *req,
 		epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
 
 		epoch->flags |= MF_SOM_AU;
-		if (repbody->valid & OBD_MD_FLGETATTRLOCK)
+		if (repbody->mbo_valid & OBD_MD_FLGETATTRLOCK)
 			op_data->op_flags |= MF_GETATTR_LOCK;
 	}
 }
@@ -763,7 +745,7 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 		req_fmt = &RQF_MDS_RELEASE_CLOSE;
 
 		/* allocate a FID for volatile file */
-		rc = mdc_fid_alloc(exp, &op_data->op_fid2, op_data);
+		rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
 		if (rc < 0) {
 			CERROR("%s: "DFID" failed to allocate FID: %d\n",
 			       obd->obd_name, PFID(&op_data->op_fid1), rc);
@@ -773,22 +755,10 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 	}
 
 	*request = NULL;
-	req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
-	if (!req)
-		return -ENOMEM;
-
-	rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
-	if (rc) {
-		ptlrpc_request_free(req);
-		return rc;
-	}
-
-	/* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
-	 * portal whose threads are not taking any DLM locks and are therefore
-	 * always progressing
-	 */
-	req->rq_request_portal = MDS_READPAGE_PORTAL;
-	ptlrpc_at_set_req_timeout(req);
+	if (OBD_FAIL_CHECK(OBD_FAIL_MDC_CLOSE))
+		req = NULL;
+	else
+		req = ptlrpc_request_alloc(class_exp2cliimp(exp), req_fmt);
 
 	/* Ensure that this close's handle is fixed up during replay. */
 	if (likely(mod)) {
@@ -809,6 +779,29 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 		 CDEBUG(D_HA,
 			"couldn't find open req; expecting close error\n");
 	}
+	if (!req) {
+		/*
+		 * TODO: repeat close after errors
+		 */
+		CWARN("%s: close of FID "DFID" failed, file reference will be dropped when this client unmounts or is evicted\n",
+		      obd->obd_name, PFID(&op_data->op_fid1));
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_CLOSE);
+	if (rc) {
+		ptlrpc_request_free(req);
+		goto out;
+	}
+
+	/*
+	 * To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
+	 * portal whose threads are not taking any DLM locks and are therefore
+	 * always progressing
+	 */
+	req->rq_request_portal = MDS_READPAGE_PORTAL;
+	ptlrpc_at_set_req_timeout(req);
 
 	mdc_close_pack(req, op_data);
 
@@ -854,6 +847,7 @@ static int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 		}
 	}
 
+out:
 	if (mod) {
 		if (rc != 0)
 			mod->mod_close_req = NULL;
@@ -936,16 +930,17 @@ static int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
 	return rc;
 }
 
-static int mdc_readpage(struct obd_export *exp, struct md_op_data *op_data,
-			struct page **pages, struct ptlrpc_request **request)
+static int mdc_getpage(struct obd_export *exp, const struct lu_fid *fid,
+		       u64 offset, struct page **pages, int npages,
+		       struct ptlrpc_request **request)
 {
-	struct ptlrpc_request   *req;
 	struct ptlrpc_bulk_desc *desc;
-	int		      i;
-	wait_queue_head_t	      waitq;
-	int		      resends = 0;
-	struct l_wait_info       lwi;
-	int		      rc;
+	struct ptlrpc_request *req;
+	wait_queue_head_t waitq;
+	struct l_wait_info lwi;
+	int resends = 0;
+	int rc;
+	int i;
 
 	*request = NULL;
 	init_waitqueue_head(&waitq);
@@ -964,7 +959,7 @@ restart_bulk:
 	req->rq_request_portal = MDS_READPAGE_PORTAL;
 	ptlrpc_at_set_req_timeout(req);
 
-	desc = ptlrpc_prep_bulk_imp(req, op_data->op_npages, 1, BULK_PUT_SINK,
+	desc = ptlrpc_prep_bulk_imp(req, npages, 1, BULK_PUT_SINK,
 				    MDS_BULK_PORTAL);
 	if (!desc) {
 		ptlrpc_request_free(req);
@@ -972,12 +967,10 @@ restart_bulk:
 	}
 
 	/* NB req now owns desc and will free it when it gets freed */
-	for (i = 0; i < op_data->op_npages; i++)
+	for (i = 0; i < npages; i++)
 		ptlrpc_prep_bulk_page_pin(desc, pages[i], 0, PAGE_SIZE);
 
-	mdc_readdir_pack(req, op_data->op_offset,
-			 PAGE_SIZE * op_data->op_npages,
-			 &op_data->op_fid1);
+	mdc_readdir_pack(req, offset, PAGE_SIZE * npages, fid);
 
 	ptlrpc_request_set_replen(req);
 	rc = ptlrpc_queue_wait(req);
@@ -988,11 +981,12 @@ restart_bulk:
 
 		resends++;
 		if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
-			CERROR("too many resend retries, returning error\n");
+			CERROR("%s: too many resend retries: rc = %d\n",
+			       exp->exp_obd->obd_name, -EIO);
 			return -EIO;
 		}
-		lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends),
-				       NULL, NULL, NULL);
+		lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL,
+				       NULL);
 		l_wait_event(waitq, 0, &lwi);
 
 		goto restart_bulk;
@@ -1006,9 +1000,9 @@ restart_bulk:
 	}
 
 	if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
-		CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
-		       req->rq_bulk->bd_nob_transferred,
-		       PAGE_SIZE * op_data->op_npages);
+		CERROR("%s: unexpected bytes transferred: %d (%ld expected)\n",
+		       exp->exp_obd->obd_name, req->rq_bulk->bd_nob_transferred,
+		       PAGE_SIZE * npages);
 		ptlrpc_req_finished(req);
 		return -EPROTO;
 	}
@@ -1017,6 +1011,453 @@ restart_bulk:
 	return 0;
 }
 
+static void mdc_release_page(struct page *page, int remove)
+{
+	if (remove) {
+		lock_page(page);
+		if (likely(page->mapping))
+			truncate_complete_page(page->mapping, page);
+		unlock_page(page);
+	}
+	put_page(page);
+}
+
+static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
+				    __u64 *start, __u64 *end, int hash64)
+{
+	/*
+	 * Complement of hash is used as an index so that
+	 * radix_tree_gang_lookup() can be used to find a page with starting
+	 * hash _smaller_ than one we are looking for.
+	 */
+	unsigned long offset = hash_x_index(*hash, hash64);
+	struct page *page;
+	int found;
+
+	spin_lock_irq(&mapping->tree_lock);
+	found = radix_tree_gang_lookup(&mapping->page_tree,
+				       (void **)&page, offset, 1);
+	if (found > 0 && !radix_tree_exceptional_entry(page)) {
+		struct lu_dirpage *dp;
+
+		get_page(page);
+		spin_unlock_irq(&mapping->tree_lock);
+		/*
+		 * In contrast to find_lock_page() we are sure that directory
+		 * page cannot be truncated (while DLM lock is held) and,
+		 * hence, can avoid restart.
+		 *
+		 * In fact, page cannot be locked here at all, because
+		 * mdc_read_page_remote does synchronous io.
+		 */
+		wait_on_page_locked(page);
+		if (PageUptodate(page)) {
+			dp = kmap(page);
+			if (BITS_PER_LONG == 32 && hash64) {
+				*start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+				*end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+				*hash  = *hash >> 32;
+			} else {
+				*start = le64_to_cpu(dp->ldp_hash_start);
+				*end   = le64_to_cpu(dp->ldp_hash_end);
+			}
+			if (unlikely(*start == 1 && *hash == 0))
+				*hash = *start;
+			else
+				LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n",
+					 *start, *end, *hash);
+			CDEBUG(D_VFSTRACE, "offset %lx [%#llx %#llx], hash %#llx\n",
+			       offset, *start, *end, *hash);
+			if (*hash > *end) {
+				kunmap(page);
+				mdc_release_page(page, 0);
+				page = NULL;
+			} else if (*end != *start && *hash == *end) {
+				/*
+				 * upon hash collision, remove this page,
+				 * otherwise put page reference, and
+				 * mdc_read_page_remote() will issue RPC to
+				 * fetch the page we want.
+				 */
+				kunmap(page);
+				mdc_release_page(page,
+						 le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
+				page = NULL;
+			}
+		} else {
+			put_page(page);
+			page = ERR_PTR(-EIO);
+		}
+	} else {
+		spin_unlock_irq(&mapping->tree_lock);
+		page = NULL;
+	}
+	return page;
+}
+
+/*
+ * Adjust a set of pages, each page containing an array of lu_dirpages,
+ * so that each page can be used as a single logical lu_dirpage.
+ *
+ * A lu_dirpage is laid out as follows, where s = ldp_hash_start,
+ * e = ldp_hash_end, f = ldp_flags, p = padding, and each "ent" is a
+ * struct lu_dirent.  It has size up to LU_PAGE_SIZE. The ldp_hash_end
+ * value is used as a cookie to request the next lu_dirpage in a
+ * directory listing that spans multiple pages (two in this example):
+ *   ________
+ *  |        |
+ * .|--------v-------   -----.
+ * |s|e|f|p|ent|ent| ... |ent|
+ * '--|--------------   -----'   Each PAGE contains a single
+ *    '------.                   lu_dirpage.
+ * .---------v-------   -----.
+ * |s|e|f|p|ent| 0 | ... | 0 |
+ * '-----------------   -----'
+ *
+ * However, on hosts where the native VM page size (PAGE_SIZE) is
+ * larger than LU_PAGE_SIZE, a single host page may contain multiple
+ * lu_dirpages. After reading the lu_dirpages from the MDS, the
+ * ldp_hash_end of the first lu_dirpage refers to the one immediately
+ * after it in the same PAGE (arrows simplified for brevity, but
+ * in general e0==s1, e1==s2, etc.):
+ *
+ * .--------------------   -----.
+ * |s0|e0|f0|p|ent|ent| ... |ent|
+ * |---v----------------   -----|
+ * |s1|e1|f1|p|ent|ent| ... |ent|
+ * |---v----------------   -----|  Here, each PAGE contains
+ *             ...                 multiple lu_dirpages.
+ * |---v----------------   -----|
+ * |s'|e'|f'|p|ent|ent| ... |ent|
+ * '---|----------------   -----'
+ *     v
+ * .----------------------------.
+ * |        next PAGE           |
+ *
+ * This structure is transformed into a single logical lu_dirpage as follows:
+ *
+ * - Replace e0 with e' so the request for the next lu_dirpage gets the page
+ *   labeled 'next PAGE'.
+ *
+ * - Copy the LDF_COLLIDE flag from f' to f0 to correctly reflect whether
+ *   a hash collision with the next page exists.
+ *
+ * - Adjust the lde_reclen of the ending entry of each lu_dirpage to span
+ *   to the first entry of the next lu_dirpage.
+ */
+#if PAGE_SIZE > LU_PAGE_SIZE
+static void mdc_adjust_dirpages(struct page **pages, int cfs_pgs, int lu_pgs)
+{
+	int i;
+
+	for (i = 0; i < cfs_pgs; i++) {
+		struct lu_dirpage *dp = kmap(pages[i]);
+		__u64 hash_end = le64_to_cpu(dp->ldp_hash_end);
+		__u32 flags = le32_to_cpu(dp->ldp_flags);
+		struct lu_dirpage *first = dp;
+		struct lu_dirent *end_dirent = NULL;
+		struct lu_dirent *ent;
+
+		while (--lu_pgs > 0) {
+			ent = lu_dirent_start(dp);
+			for (end_dirent = ent; ent;
+			     end_dirent = ent, ent = lu_dirent_next(ent));
+
+			/* Advance dp to next lu_dirpage. */
+			dp = (struct lu_dirpage *)((char *)dp + LU_PAGE_SIZE);
+
+			/* Check if we've reached the end of the CFS_PAGE. */
+			if (!((unsigned long)dp & ~PAGE_MASK))
+				break;
+
+			/* Save the hash and flags of this lu_dirpage. */
+			hash_end = le64_to_cpu(dp->ldp_hash_end);
+			flags = le32_to_cpu(dp->ldp_flags);
+
+			/* Check if lu_dirpage contains no entries. */
+			if (!end_dirent)
+				break;
+
+			/*
+			 * Enlarge the end entry lde_reclen from 0 to
+			 * first entry of next lu_dirpage.
+			 */
+			LASSERT(!le16_to_cpu(end_dirent->lde_reclen));
+			end_dirent->lde_reclen =
+				cpu_to_le16((char *)(dp->ldp_entries) -
+					    (char *)end_dirent);
+		}
+
+		first->ldp_hash_end = hash_end;
+		first->ldp_flags &= ~cpu_to_le32(LDF_COLLIDE);
+		first->ldp_flags |= flags & cpu_to_le32(LDF_COLLIDE);
+
+		kunmap(pages[i]);
+	}
+	LASSERTF(lu_pgs == 0, "left = %d", lu_pgs);
+}
+#else
+#define mdc_adjust_dirpages(pages, cfs_pgs, lu_pgs) do {} while (0)
+#endif  /* PAGE_SIZE > LU_PAGE_SIZE */
+
+/* parameters for readdir page */
+struct readpage_param {
+	struct md_op_data	*rp_mod;
+	__u64			rp_off;
+	int			rp_hash64;
+	struct obd_export	*rp_exp;
+	struct md_callback	*rp_cb;
+};
+
+/**
+ * Read pages from server.
+ *
+ * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
+ * a header lu_dirpage which describes the start/end hash, and whether this
+ * page is empty (contains no dir entry) or hash collide with next page.
+ * After client receives reply, several pages will be integrated into dir page
+ * in PAGE_SIZE (if PAGE_SIZE greater than LU_PAGE_SIZE), and the
+ * lu_dirpage for this integrated page will be adjusted.
+ **/
+static int mdc_read_page_remote(void *data, struct page *page0)
+{
+	struct readpage_param *rp = data;
+	struct page **page_pool;
+	struct page *page;
+	struct lu_dirpage *dp;
+	int rd_pgs = 0; /* number of pages read actually */
+	int npages;
+	struct md_op_data *op_data = rp->rp_mod;
+	struct ptlrpc_request *req;
+	int max_pages = op_data->op_max_pages;
+	struct inode *inode;
+	struct lu_fid *fid;
+	int i;
+	int rc;
+
+	LASSERT(max_pages > 0 && max_pages <= PTLRPC_MAX_BRW_PAGES);
+	inode = op_data->op_data;
+	fid = &op_data->op_fid1;
+	LASSERT(inode);
+
+	page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
+	if (page_pool) {
+		page_pool[0] = page0;
+	} else {
+		page_pool = &page0;
+		max_pages = 1;
+	}
+
+	for (npages = 1; npages < max_pages; npages++) {
+		page = page_cache_alloc_cold(inode->i_mapping);
+		if (!page)
+			break;
+		page_pool[npages] = page;
+	}
+
+	rc = mdc_getpage(rp->rp_exp, fid, rp->rp_off, page_pool, npages, &req);
+	if (!rc) {
+		int lu_pgs = req->rq_bulk->bd_nob_transferred;
+
+		rd_pgs = (req->rq_bulk->bd_nob_transferred +
+			  PAGE_SIZE - 1) >> PAGE_SHIFT;
+		lu_pgs >>= LU_PAGE_SHIFT;
+		LASSERT(!(req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK));
+
+		CDEBUG(D_INODE, "read %d(%d) pages\n", rd_pgs, lu_pgs);
+
+		mdc_adjust_dirpages(page_pool, rd_pgs, lu_pgs);
+
+		SetPageUptodate(page0);
+	}
+
+	unlock_page(page0);
+	ptlrpc_req_finished(req);
+	CDEBUG(D_CACHE, "read %d/%d pages\n", rd_pgs, npages);
+	for (i = 1; i < npages; i++) {
+		unsigned long offset;
+		__u64 hash;
+		int ret;
+
+		page = page_pool[i];
+
+		if (rc < 0 || i >= rd_pgs) {
+			put_page(page);
+			continue;
+		}
+
+		SetPageUptodate(page);
+
+		dp = kmap(page);
+		hash = le64_to_cpu(dp->ldp_hash_start);
+		kunmap(page);
+
+		offset = hash_x_index(hash, rp->rp_hash64);
+
+		prefetchw(&page->flags);
+		ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
+					    GFP_KERNEL);
+		if (!ret)
+			unlock_page(page);
+		else
+			CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: rc = %d\n",
+			       offset, ret);
+		put_page(page);
+	}
+
+	if (page_pool != &page0)
+		kfree(page_pool);
+
+	return rc;
+}
+
+/**
+ * Read dir page from cache first, if it can not find it, read it from
+ * server and add into the cache.
+ *
+ * \param[in] exp	MDC export
+ * \param[in] op_data	client MD stack parameters, transferring parameters
+ *			between different layers on client MD stack.
+ * \param[in] cb_op	callback required for ldlm lock enqueue during
+ *			read page
+ * \param[in] hash_offset the hash offset of the page to be read
+ * \param[in] ppage	the page to be read
+ *
+ * retval		= 0 get the page successfully
+ *			errno(<0) get the page failed
+ */
+static int mdc_read_page(struct obd_export *exp, struct md_op_data *op_data,
+			 struct md_callback *cb_op, __u64 hash_offset,
+			 struct page **ppage)
+{
+	struct lookup_intent it = { .it_op = IT_READDIR };
+	struct page *page;
+	struct inode *dir = op_data->op_data;
+	struct address_space *mapping;
+	struct lu_dirpage *dp;
+	__u64 start = 0;
+	__u64 end = 0;
+	struct lustre_handle lockh;
+	struct ptlrpc_request *enq_req = NULL;
+	struct readpage_param rp_param;
+	int rc;
+
+	*ppage = NULL;
+
+	LASSERT(dir);
+	mapping = dir->i_mapping;
+
+	rc = mdc_intent_lock(exp, op_data, &it, &enq_req,
+			     cb_op->md_blocking_ast, 0);
+	if (enq_req)
+		ptlrpc_req_finished(enq_req);
+
+	if (rc < 0) {
+		CERROR("%s: "DFID" lock enqueue fails: rc = %d\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1), rc);
+		return rc;
+	}
+
+	rc = 0;
+	lockh.cookie = it.it_lock_handle;
+	mdc_set_lock_data(exp, &lockh, dir, NULL);
+
+	rp_param.rp_off = hash_offset;
+	rp_param.rp_hash64 = op_data->op_cli_flags & CLI_HASH64;
+	page = mdc_page_locate(mapping, &rp_param.rp_off, &start, &end,
+			       rp_param.rp_hash64);
+	if (IS_ERR(page)) {
+		CDEBUG(D_INFO, "%s: dir page locate: " DFID " at %llu: rc %ld\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, PTR_ERR(page));
+		rc = PTR_ERR(page);
+		goto out_unlock;
+	} else if (page) {
+		/*
+		 * XXX nikita: not entirely correct handling of a corner case:
+		 * suppose hash chain of entries with hash value HASH crosses
+		 * border between pages P0 and P1. First both P0 and P1 are
+		 * cached, seekdir() is called for some entry from the P0 part
+		 * of the chain. Later P0 goes out of cache. telldir(HASH)
+		 * happens and finds P1, as it starts with matching hash
+		 * value. Remaining entries from P0 part of the chain are
+		 * skipped. (Is that really a bug?)
+		 *
+		 * Possible solutions: 0. don't cache P1 is such case, handle
+		 * it as an "overflow" page. 1. invalidate all pages at
+		 * once. 2. use HASH|1 as an index for P1.
+		 */
+		goto hash_collision;
+	}
+
+	rp_param.rp_exp = exp;
+	rp_param.rp_mod = op_data;
+	page = read_cache_page(mapping,
+			       hash_x_index(rp_param.rp_off,
+					    rp_param.rp_hash64),
+			       mdc_read_page_remote, &rp_param);
+	if (IS_ERR(page)) {
+		CERROR("%s: read cache page: "DFID" at %llu: rc %ld\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, PTR_ERR(page));
+		rc = PTR_ERR(page);
+		goto out_unlock;
+	}
+
+	wait_on_page_locked(page);
+	(void)kmap(page);
+	if (!PageUptodate(page)) {
+		CERROR("%s: page not updated: "DFID" at %llu: rc %d\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, -5);
+		goto fail;
+	}
+	if (!PageChecked(page))
+		SetPageChecked(page);
+	if (PageError(page)) {
+		CERROR("%s: page error: "DFID" at %llu: rc %d\n",
+		       exp->exp_obd->obd_name, PFID(&op_data->op_fid1),
+		       rp_param.rp_off, -5);
+		goto fail;
+	}
+
+hash_collision:
+	dp = page_address(page);
+	if (BITS_PER_LONG == 32 && rp_param.rp_hash64) {
+		start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+		end = le64_to_cpu(dp->ldp_hash_end) >> 32;
+		rp_param.rp_off = hash_offset >> 32;
+	} else {
+		start = le64_to_cpu(dp->ldp_hash_start);
+		end = le64_to_cpu(dp->ldp_hash_end);
+		rp_param.rp_off = hash_offset;
+	}
+	if (end == start) {
+		LASSERT(start == rp_param.rp_off);
+		CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
+#if BITS_PER_LONG == 32
+		CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
+		      le64_to_cpu(dp->ldp_hash_start),
+		      le64_to_cpu(dp->ldp_hash_end), hash_offset);
+#endif
+		/*
+		 * Fetch whole overflow chain...
+		 *
+		 * XXX not yet.
+		 */
+		goto fail;
+	}
+	*ppage = page;
+out_unlock:
+	ldlm_lock_decref(&lockh, it.it_lock_mode);
+	return rc;
+fail:
+	kunmap(page);
+	mdc_release_page(page, 1);
+	rc = -EIO;
+	goto out_unlock;
+}
+
 static int mdc_statfs(const struct lu_env *env,
 		      struct obd_export *exp, struct obd_statfs *osfs,
 		      __u64 max_age, __u32 flags)
@@ -1401,7 +1842,7 @@ out:
 	return rc;
 }
 
-static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags)
+static struct kuc_hdr *changelog_kuc_hdr(char *buf, size_t len, u32 flags)
 {
 	struct kuc_hdr *lh = (struct kuc_hdr *)buf;
 
@@ -1415,40 +1856,44 @@ static struct kuc_hdr *changelog_kuc_hdr(char *buf, int len, int flags)
 	return lh;
 }
 
-#define D_CHANGELOG 0
-
 struct changelog_show {
 	__u64		cs_startrec;
-	__u32		cs_flags;
+	enum changelog_send_flag	cs_flags;
 	struct file	*cs_fp;
 	char		*cs_buf;
 	struct obd_device *cs_obd;
 };
 
+static inline char *cs_obd_name(struct changelog_show *cs)
+{
+	return cs->cs_obd->obd_name;
+}
+
 static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh,
 			     struct llog_rec_hdr *hdr, void *data)
 {
 	struct changelog_show *cs = data;
 	struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
 	struct kuc_hdr *lh;
-	int len, rc;
+	size_t len;
+	int rc;
 
 	if (rec->cr_hdr.lrh_type != CHANGELOG_REC) {
 		rc = -EINVAL;
 		CERROR("%s: not a changelog rec %x/%d: rc = %d\n",
-		       cs->cs_obd->obd_name, rec->cr_hdr.lrh_type,
+		       cs_obd_name(cs), rec->cr_hdr.lrh_type,
 		       rec->cr.cr_type, rc);
 		return rc;
 	}
 
 	if (rec->cr.cr_index < cs->cs_startrec) {
 		/* Skip entries earlier than what we are interested in */
-		CDEBUG(D_CHANGELOG, "rec=%llu start=%llu\n",
+		CDEBUG(D_HSM, "rec=%llu start=%llu\n",
 		       rec->cr.cr_index, cs->cs_startrec);
 		return 0;
 	}
 
-	CDEBUG(D_CHANGELOG, "%llu %02d%-5s %llu 0x%x t="DFID" p="DFID
+	CDEBUG(D_HSM, "%llu %02d%-5s %llu 0x%x t=" DFID " p=" DFID
 		" %.*s\n", rec->cr.cr_index, rec->cr.cr_type,
 		changelog_type2str(rec->cr.cr_type), rec->cr.cr_time,
 		rec->cr.cr_flags & CLF_FLAGMASK,
@@ -1462,20 +1907,21 @@ static int changelog_kkuc_cb(const struct lu_env *env, struct llog_handle *llh,
 	memcpy(lh + 1, &rec->cr, len - sizeof(*lh));
 
 	rc = libcfs_kkuc_msg_put(cs->cs_fp, lh);
-	CDEBUG(D_CHANGELOG, "kucmsg fp %p len %d rc %d\n", cs->cs_fp, len, rc);
+	CDEBUG(D_HSM, "kucmsg fp %p len %zu rc %d\n", cs->cs_fp, len, rc);
 
 	return rc;
 }
 
 static int mdc_changelog_send_thread(void *csdata)
 {
+	enum llog_flag flags = LLOG_F_IS_CAT;
 	struct changelog_show *cs = csdata;
 	struct llog_ctxt *ctxt = NULL;
 	struct llog_handle *llh = NULL;
 	struct kuc_hdr *kuch;
 	int rc;
 
-	CDEBUG(D_CHANGELOG, "changelog to fp=%p start %llu\n",
+	CDEBUG(D_HSM, "changelog to fp=%p start %llu\n",
 	       cs->cs_fp, cs->cs_startrec);
 
 	cs->cs_buf = kzalloc(KUC_CHANGELOG_MSG_MAXSIZE, GFP_NOFS);
@@ -1494,10 +1940,14 @@ static int mdc_changelog_send_thread(void *csdata)
 		       LLOG_OPEN_EXISTS);
 	if (rc) {
 		CERROR("%s: fail to open changelog catalog: rc = %d\n",
-		       cs->cs_obd->obd_name, rc);
+		       cs_obd_name(cs), rc);
 		goto out;
 	}
-	rc = llog_init_handle(NULL, llh, LLOG_F_IS_CAT, NULL);
+
+	if (cs->cs_flags & CHANGELOG_FLAG_JOBID)
+		flags |= LLOG_F_EXT_JOBID;
+
+	rc = llog_init_handle(NULL, llh, flags, NULL);
 	if (rc) {
 		CERROR("llog_init_handle failed %d\n", rc);
 		goto out;
@@ -1550,12 +2000,12 @@ static int mdc_ioc_changelog_send(struct obd_device *obd,
 	if (IS_ERR(task)) {
 		rc = PTR_ERR(task);
 		CERROR("%s: can't start changelog thread: rc = %d\n",
-		       obd->obd_name, rc);
+		       cs_obd_name(cs), rc);
 		kfree(cs);
 	} else {
 		rc = 0;
-		CDEBUG(D_CHANGELOG, "%s: started changelog thread\n",
-		       obd->obd_name);
+		CDEBUG(D_HSM, "%s: started changelog thread\n",
+		       cs_obd_name(cs));
 	}
 
 	CERROR("Failed to start changelog thread: %d\n", rc);
@@ -1669,9 +2119,11 @@ static int mdc_ioc_swap_layouts(struct obd_export *exp,
 	 * with the request RPC to avoid extra RPC round trips
 	 */
 	count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels,
-					LCK_CR, MDS_INODELOCK_LAYOUT);
+					LCK_CR, MDS_INODELOCK_LAYOUT |
+					MDS_INODELOCK_XATTR);
 	count += mdc_resource_get_unused(exp, &op_data->op_fid2, &cancels,
-					 LCK_CR, MDS_INODELOCK_LAYOUT);
+					 LCK_CR, MDS_INODELOCK_LAYOUT |
+					 MDS_INODELOCK_XATTR);
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp),
 				   &RQF_MDS_SWAP_LAYOUTS);
@@ -1917,7 +2369,7 @@ static void lustre_swab_hai(struct hsm_action_item *h)
 static void lustre_swab_hal(struct hsm_action_list *h)
 {
 	struct hsm_action_item	*hai;
-	int			 i;
+	u32 i;
 
 	__swab32s(&h->hal_version);
 	__swab32s(&h->hal_count);
@@ -1966,14 +2418,14 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
  * @param val KUC message (kuc_hdr + hsm_action_list)
  * @param len total length of message
  */
-static int mdc_hsm_copytool_send(int len, void *val)
+static int mdc_hsm_copytool_send(size_t len, void *val)
 {
 	struct kuc_hdr		*lh = (struct kuc_hdr *)val;
 	struct hsm_action_list	*hal = (struct hsm_action_list *)(lh + 1);
 
 	if (len < sizeof(*lh) + sizeof(*hal)) {
-		CERROR("Short HSM message %d < %d\n", len,
-		       (int)(sizeof(*lh) + sizeof(*hal)));
+		CERROR("Short HSM message %zu < %zu\n", len,
+		       sizeof(*lh) + sizeof(*hal));
 		return -EPROTO;
 	}
 	if (lh->kuc_magic == __swab16(KUC_MAGIC)) {
@@ -2044,9 +2496,8 @@ static int mdc_set_info_async(const struct lu_env *env,
 		}
 		spin_unlock(&imp->imp_lock);
 
-		rc = do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
-				       keylen, key, vallen, val, set);
-		return rc;
+		return do_set_info_async(imp, MDS_SET_INFO, LUSTRE_MDS_VERSION,
+					 keylen, key, vallen, val, set);
 	}
 	if (KEY_IS(KEY_SPTLRPC_CONF)) {
 		sptlrpc_conf_client_adapt(exp->exp_obd);
@@ -2065,6 +2516,12 @@ static int mdc_set_info_async(const struct lu_env *env,
 		rc = mdc_hsm_copytool_send(vallen, val);
 		return rc;
 	}
+	if (KEY_IS(KEY_DEFAULT_EASIZE)) {
+		u32 *default_easize = val;
+
+		exp->exp_obd->u.cli.cl_default_mds_easize = *default_easize;
+		return 0;
+	}
 
 	CERROR("Unknown key %s\n", (char *)key);
 	return -EINVAL;
@@ -2077,18 +2534,18 @@ static int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
 	int rc = -EINVAL;
 
 	if (KEY_IS(KEY_MAX_EASIZE)) {
-		int mdsize, *max_easize;
+		u32 mdsize, *max_easize;
 
 		if (*vallen != sizeof(int))
 			return -EINVAL;
-		mdsize = *(int *)val;
+		mdsize = *(u32 *)val;
 		if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize)
 			exp->exp_obd->u.cli.cl_max_mds_easize = mdsize;
 		max_easize = val;
 		*max_easize = exp->exp_obd->u.cli.cl_max_mds_easize;
 		return 0;
 	} else if (KEY_IS(KEY_DEFAULT_EASIZE)) {
-		int *default_easize;
+		u32 *default_easize;
 
 		if (*vallen != sizeof(int))
 			return -EINVAL;
@@ -2105,7 +2562,7 @@ static int mdc_get_info(const struct lu_env *env, struct obd_export *exp,
 		*data = imp->imp_connect_data;
 		return 0;
 	} else if (KEY_IS(KEY_TGT_COUNT)) {
-		*((int *)val) = 1;
+		*((u32 *)val) = 1;
 		return 0;
 	}
 
@@ -2199,13 +2656,13 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
 	return rc;
 }
 
-int mdc_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
-		  struct md_op_data *op_data)
+int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp,
+		  struct lu_fid *fid, struct md_op_data *op_data)
 {
 	struct client_obd *cli = &exp->exp_obd->u.cli;
 	struct lu_client_seq *seq = cli->cl_seq;
 
-	return seq_client_alloc_fid(NULL, seq, fid);
+	return seq_client_alloc_fid(env, seq, fid);
 }
 
 static struct obd_uuid *mdc_get_uuid(struct obd_export *exp)
@@ -2333,8 +2790,8 @@ err_rpc_lock:
  * a large number of stripes is possible.  If a larger reply buffer is
  * required it will be reallocated in the ptlrpc layer due to overflow.
  */
-static int mdc_init_ea_size(struct obd_export *exp, int easize,
-			    int def_easize, int cookiesize, int def_cookiesize)
+static int mdc_init_ea_size(struct obd_export *exp, u32 easize, u32 def_easize,
+			    u32 cookiesize, u32 def_cookiesize)
 {
 	struct obd_device *obd = exp->exp_obd;
 	struct client_obd *cli = &obd->u.cli;
@@ -2430,7 +2887,6 @@ static struct obd_ops mdc_obd_ops = {
 static struct md_ops mdc_md_ops = {
 	.getstatus		= mdc_getstatus,
 	.null_inode		= mdc_null_inode,
-	.find_cbdata		= mdc_find_cbdata,
 	.close			= mdc_close,
 	.create			= mdc_create,
 	.done_writing		= mdc_done_writing,
@@ -2439,13 +2895,12 @@ static struct md_ops mdc_md_ops = {
 	.getattr_name		= mdc_getattr_name,
 	.intent_lock		= mdc_intent_lock,
 	.link			= mdc_link,
-	.is_subdir		= mdc_is_subdir,
 	.rename			= mdc_rename,
 	.setattr		= mdc_setattr,
 	.setxattr		= mdc_setxattr,
 	.getxattr		= mdc_getxattr,
 	.sync			= mdc_sync,
-	.readpage		= mdc_readpage,
+	.read_page		= mdc_read_page,
 	.unlink			= mdc_unlink,
 	.cancel_unused		= mdc_cancel_unused,
 	.init_ea_size		= mdc_init_ea_size,
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 9d0bd4745865..23374cae5133 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -549,8 +549,9 @@ static int mgc_requeue_thread(void *data)
 		 * caused the lock revocation to finish its setup, plus some
 		 * random so everyone doesn't try to reconnect at once.
 		 */
-		to = MGC_TIMEOUT_MIN_SECONDS * HZ;
-		to += rand * HZ / 100; /* rand is centi-seconds */
+		to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC);
+		/* rand is centi-seconds */
+		to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100);
 		lwi = LWI_TIMEOUT(to, NULL, NULL);
 		l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP),
 			     &lwi);
@@ -1158,7 +1159,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 
 	while (datalen > 0) {
 		int   entry_len = sizeof(*entry);
-		int   is_ost;
+		int is_ost, i;
 		struct obd_device *obd;
 		char *obdname;
 		char *cname;
@@ -1264,11 +1265,17 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 			continue;
 		}
 
-		/* TODO: iterate all nids to find one */
+		/* iterate all nids to find one */
 		/* find uuid by nid */
-		rc = client_import_find_conn(obd->u.cli.cl_import,
-					     entry->u.nids[0],
-					     (struct obd_uuid *)uuid);
+		rc = -ENOENT;
+		for (i = 0; i < entry->mne_nid_count; i++) {
+			rc = client_import_find_conn(obd->u.cli.cl_import,
+						     entry->u.nids[0],
+						     (struct obd_uuid *)uuid);
+			if (!rc)
+				break;
+		}
+
 		up_read(&obd->u.cli.cl_sem);
 		if (rc < 0) {
 			CERROR("mgc: cannot find uuid by nid %s\n",
@@ -1428,14 +1435,12 @@ again:
 	}
 
 	mne_swab = !!ptlrpc_rep_need_swab(req);
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
 	/* This import flag means the server did an extra swab of IR MNE
 	 * records (fixed in LU-1252), reverse it here if needed. LU-1644
 	 */
 	if (unlikely(req->rq_import->imp_need_mne_swab))
 		mne_swab = !mne_swab;
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
 #endif
 
 	for (i = 0; i < nrpages && ealen > 0; i++) {
@@ -1740,8 +1745,6 @@ static struct obd_ops mgc_obd_ops = {
 	.del_conn       = client_import_del_conn,
 	.connect        = client_connect_import,
 	.disconnect     = client_disconnect_export,
-	/* .enqueue     = mgc_enqueue, */
-	/* .iocontrol   = mgc_iocontrol, */
 	.set_info_async = mgc_set_info_async,
 	.get_info       = mgc_get_info,
 	.import_event   = mgc_import_event,
diff --git a/drivers/staging/lustre/lustre/obdclass/Makefile b/drivers/staging/lustre/lustre/obdclass/Makefile
index df7e47f35a66..b42e109b30e0 100644
--- a/drivers/staging/lustre/lustre/obdclass/Makefile
+++ b/drivers/staging/lustre/lustre/obdclass/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_LUSTRE_FS) += obdclass.o
 obdclass-y := linux/linux-module.o linux/linux-obdo.o linux/linux-sysctl.o \
 	      llog.o llog_cat.o llog_obd.o llog_swab.o class_obd.o debug.o \
 	      genops.o uuid.o lprocfs_status.o lprocfs_counters.o \
-	      lustre_handles.o lustre_peer.o statfs_pack.o \
+	      lustre_handles.o lustre_peer.o statfs_pack.o linkea.o \
 	      obdo.o obd_config.o obd_mount.o lu_object.o lu_ref.o \
 	      cl_object.o cl_page.o cl_lock.o cl_io.o kernelcomm.o
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_io.c b/drivers/staging/lustre/lustre/obdclass/cl_io.c
index e72f1fc00a13..bc4b7b6b9a20 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_io.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_io.c
@@ -73,7 +73,6 @@ int cl_io_is_going(const struct lu_env *env)
 {
 	return cl_env_info(env)->clt_current_io != NULL;
 }
-EXPORT_SYMBOL(cl_io_is_going);
 
 /**
  * cl_io invariant that holds at all times when exported cl_io_*() functions
@@ -859,9 +858,6 @@ void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page)
 	LASSERT(page->cp_owner);
 	LINVRNT(plist->pl_owner == current);
 
-	lockdep_off();
-	mutex_lock(&page->cp_mutex);
-	lockdep_on();
 	LASSERT(list_empty(&page->cp_batch));
 	list_add_tail(&page->cp_batch, &plist->pl_pages);
 	++plist->pl_nr;
@@ -877,12 +873,10 @@ void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
 		      struct cl_page *page)
 {
 	LASSERT(plist->pl_nr > 0);
+	LASSERT(cl_page_is_vmlocked(env, page));
 	LINVRNT(plist->pl_owner == current);
 
 	list_del_init(&page->cp_batch);
-	lockdep_off();
-	mutex_unlock(&page->cp_mutex);
-	lockdep_on();
 	--plist->pl_nr;
 	lu_ref_del_at(&page->cp_reference, &page->cp_queue_ref, "queue", plist);
 	cl_page_put(env, page);
@@ -941,8 +935,6 @@ void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head)
 }
 EXPORT_SYMBOL(cl_page_list_splice);
 
-void cl_page_disown0(const struct lu_env *env,
-		     struct cl_io *io, struct cl_page *pg);
 
 /**
  * Disowns pages in a queue.
@@ -959,9 +951,6 @@ void cl_page_list_disown(const struct lu_env *env,
 		LASSERT(plist->pl_nr > 0);
 
 		list_del_init(&page->cp_batch);
-		lockdep_off();
-		mutex_unlock(&page->cp_mutex);
-		lockdep_on();
 		--plist->pl_nr;
 		/*
 		 * cl_page_disown0 rather than usual cl_page_disown() is used,
@@ -1221,7 +1210,7 @@ void cl_req_page_add(const struct lu_env *env,
 {
 	struct cl_object  *obj;
 	struct cl_req_obj *rqo;
-	int i;
+	unsigned int i;
 
 	LASSERT(list_empty(&page->cp_flight));
 	LASSERT(!page->cp_req);
@@ -1268,7 +1257,7 @@ EXPORT_SYMBOL(cl_req_page_done);
  */
 int cl_req_prep(const struct lu_env *env, struct cl_req *req)
 {
-	int i;
+	unsigned int i;
 	int result;
 	const struct cl_req_slice *slice;
 
@@ -1301,7 +1290,7 @@ void cl_req_attr_set(const struct lu_env *env, struct cl_req *req,
 {
 	const struct cl_req_slice *slice;
 	struct cl_page	    *page;
-	int i;
+	unsigned int i;
 
 	LASSERT(!list_empty(&req->crq_pages));
 
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_object.c b/drivers/staging/lustre/lustre/obdclass/cl_object.c
index 91a5806d0239..3199dd4a3b72 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_object.c
@@ -163,7 +163,7 @@ static spinlock_t *cl_object_attr_guard(struct cl_object *o)
  *
  * Prevents data-attributes from changing, until lock is released by
  * cl_object_attr_unlock(). This has to be called before calls to
- * cl_object_attr_get(), cl_object_attr_set().
+ * cl_object_attr_get(), cl_object_attr_update().
  */
 void cl_object_attr_lock(struct cl_object *o)
 	__acquires(cl_object_attr_guard(o))
@@ -217,11 +217,11 @@ EXPORT_SYMBOL(cl_object_attr_get);
  * Updates data-attributes of an object \a obj.
  *
  * Only attributes, mentioned in a validness bit-mask \a v are
- * updated. Calls cl_object_operations::coo_attr_set() on every layer, bottom
- * to top.
+ * updated. Calls cl_object_operations::coo_attr_update() on every layer,
+ * bottom to top.
  */
-int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
-		       const struct cl_attr *attr, unsigned v)
+int cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
+			  const struct cl_attr *attr, unsigned int v)
 {
 	struct lu_object_header *top;
 	int result;
@@ -231,8 +231,9 @@ int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
 	top = obj->co_lu.lo_header;
 	result = 0;
 	list_for_each_entry_reverse(obj, &top->loh_layers, co_lu.lo_linkage) {
-		if (obj->co_ops->coo_attr_set) {
-			result = obj->co_ops->coo_attr_set(env, obj, attr, v);
+		if (obj->co_ops->coo_attr_update) {
+			result = obj->co_ops->coo_attr_update(env, obj, attr,
+							      v);
 			if (result != 0) {
 				if (result > 0)
 					result = 0;
@@ -242,7 +243,7 @@ int cl_object_attr_set(const struct lu_env *env, struct cl_object *obj,
 	}
 	return result;
 }
-EXPORT_SYMBOL(cl_object_attr_set);
+EXPORT_SYMBOL(cl_object_attr_update);
 
 /**
  * Notifies layers (bottom-to-top) that glimpse AST was received.
@@ -321,6 +322,27 @@ int cl_object_prune(const struct lu_env *env, struct cl_object *obj)
 EXPORT_SYMBOL(cl_object_prune);
 
 /**
+ * Get stripe information of this object.
+ */
+int cl_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+			struct lov_user_md __user *uarg)
+{
+	struct lu_object_header *top;
+	int result = 0;
+
+	top = obj->co_lu.lo_header;
+	list_for_each_entry(obj, &top->loh_layers, co_lu.lo_linkage) {
+		if (obj->co_ops->coo_getstripe) {
+			result = obj->co_ops->coo_getstripe(env, obj, uarg);
+			if (result)
+			break;
+		}
+	}
+	return result;
+}
+EXPORT_SYMBOL(cl_object_getstripe);
+
+/**
  * Helper function removing all object locks, and marking object for
  * deletion. All object pages must have been deleted at this point.
  *
@@ -377,7 +399,7 @@ static void cl_env_percpu_refill(void);
  */
 int cl_site_init(struct cl_site *s, struct cl_device *d)
 {
-	int i;
+	size_t i;
 	int result;
 
 	result = lu_site_init(&s->cs_lu, &d->cd_lu_dev);
@@ -411,7 +433,7 @@ static struct cache_stats cl_env_stats = {
  */
 int cl_site_stats_print(const struct cl_site *site, struct seq_file *m)
 {
-	int i;
+	size_t i;
 	static const char *pstate[] = {
 		[CPS_CACHED]  = "c",
 		[CPS_OWNED]   = "o",
@@ -1000,7 +1022,7 @@ static int cl_env_percpu_init(void)
 		 * thus we must uninitialize up to i, the rest are undefined.
 		 */
 		for (j = 0; j < i; j++) {
-			cle = &cl_env_percpu[i];
+			cle = &cl_env_percpu[j];
 			lu_context_exit(&cle->ce_ses);
 			lu_context_fini(&cle->ce_ses);
 			lu_env_fini(&cle->ce_lu);
@@ -1126,7 +1148,7 @@ static void *cl_key_init(const struct lu_context *ctx,
 
 	info = cl0_key_init(ctx, key);
 	if (!IS_ERR(info)) {
-		int i;
+		size_t i;
 
 		for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
 			lu_ref_init(&info->clt_counters[i].ctc_locks_locked);
@@ -1138,7 +1160,7 @@ static void cl_key_fini(const struct lu_context *ctx,
 			struct lu_context_key *key, void *data)
 {
 	struct cl_thread_info *info;
-	int i;
+	size_t i;
 
 	info = data;
 	for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i)
@@ -1150,7 +1172,7 @@ static void cl_key_exit(const struct lu_context *ctx,
 			struct lu_context_key *key, void *data)
 {
 	struct cl_thread_info *info = data;
-	int i;
+	size_t i;
 
 	for (i = 0; i < ARRAY_SIZE(info->clt_counters); ++i) {
 		LASSERT(info->clt_counters[i].ctc_nr_held == 0);
diff --git a/drivers/staging/lustre/lustre/obdclass/cl_page.c b/drivers/staging/lustre/lustre/obdclass/cl_page.c
index db2dc6b39073..63973ba096da 100644
--- a/drivers/staging/lustre/lustre/obdclass/cl_page.c
+++ b/drivers/staging/lustre/lustre/obdclass/cl_page.c
@@ -151,7 +151,6 @@ struct cl_page *cl_page_alloc(const struct lu_env *env,
 		INIT_LIST_HEAD(&page->cp_layers);
 		INIT_LIST_HEAD(&page->cp_batch);
 		INIT_LIST_HEAD(&page->cp_flight);
-		mutex_init(&page->cp_mutex);
 		lu_ref_init(&page->cp_reference);
 		head = o->co_lu.lo_header;
 		list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
@@ -171,7 +170,6 @@ struct cl_page *cl_page_alloc(const struct lu_env *env,
 	}
 	return page;
 }
-EXPORT_SYMBOL(cl_page_alloc);
 
 /**
  * Returns a cl_page with index \a idx at the object \a o, and associated with
@@ -229,11 +227,6 @@ EXPORT_SYMBOL(cl_page_find);
 
 static inline int cl_page_invariant(const struct cl_page *pg)
 {
-	/*
-	 * Page invariant is protected by a VM lock.
-	 */
-	LINVRNT(cl_page_is_vmlocked(NULL, pg));
-
 	return cl_page_in_use_noref(pg);
 }
 
@@ -478,7 +471,6 @@ static void cl_page_owner_clear(struct cl_page *page)
 		LASSERT(page->cp_owner->ci_owned_nr > 0);
 		page->cp_owner->ci_owned_nr--;
 		page->cp_owner = NULL;
-		page->cp_task = NULL;
 	}
 }
 
@@ -562,7 +554,6 @@ static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
 			PASSERT(env, pg, !pg->cp_owner);
 			PASSERT(env, pg, !pg->cp_req);
 			pg->cp_owner = cl_io_top(io);
-			pg->cp_task  = current;
 			cl_page_owner_set(pg);
 			if (pg->cp_state != CPS_FREEING) {
 				cl_page_state_set(env, pg, CPS_OWNED);
@@ -619,7 +610,6 @@ void cl_page_assume(const struct lu_env *env,
 	cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
 	PASSERT(env, pg, !pg->cp_owner);
 	pg->cp_owner = cl_io_top(io);
-	pg->cp_task = current;
 	cl_page_owner_set(pg);
 	cl_page_state_set(env, pg, CPS_OWNED);
 }
@@ -860,10 +850,6 @@ void cl_page_completion(const struct lu_env *env,
 	PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
 
 	CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
-	if (crt == CRT_READ && ioret == 0) {
-		PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
-		pg->cp_flags |= CPF_READ_COMPLETED;
-	}
 
 	cl_page_state_set(env, pg, CPS_CACHED);
 	if (crt >= CRT_NR)
@@ -872,7 +858,6 @@ void cl_page_completion(const struct lu_env *env,
 			       (const struct lu_env *,
 				const struct cl_page_slice *, int), ioret);
 	if (anchor) {
-		LASSERT(cl_page_is_vmlocked(env, pg));
 		LASSERT(pg->cp_sync_io == anchor);
 		pg->cp_sync_io = NULL;
 	}
@@ -989,10 +974,10 @@ void cl_page_header_print(const struct lu_env *env, void *cookie,
 			  lu_printer_t printer, const struct cl_page *pg)
 {
 	(*printer)(env, cookie,
-		   "page@%p[%d %p %d %d %d %p %p %#x]\n",
+		   "page@%p[%d %p %d %d %p %p]\n",
 		   pg, atomic_read(&pg->cp_ref), pg->cp_obj,
-		   pg->cp_state, pg->cp_error, pg->cp_type,
-		   pg->cp_owner, pg->cp_req, pg->cp_flags);
+		   pg->cp_state, pg->cp_type,
+		   pg->cp_owner, pg->cp_req);
 }
 EXPORT_SYMBOL(cl_page_header_print);
 
@@ -1020,7 +1005,6 @@ int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
 			      (const struct lu_env *,
 			       const struct cl_page_slice *));
 }
-EXPORT_SYMBOL(cl_page_cancel);
 
 /**
  * Converts a byte offset within object \a obj into a page index.
@@ -1046,9 +1030,9 @@ pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
 }
 EXPORT_SYMBOL(cl_index);
 
-int cl_page_size(const struct cl_object *obj)
+size_t cl_page_size(const struct cl_object *obj)
 {
-	return 1 << PAGE_SHIFT;
+	return 1UL << PAGE_SHIFT;
 }
 EXPORT_SYMBOL(cl_page_size);
 
@@ -1087,11 +1071,11 @@ struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
 	/* Initialize cache data */
 	atomic_set(&cache->ccc_users, 1);
 	cache->ccc_lru_max = lru_page_max;
-	atomic_set(&cache->ccc_lru_left, lru_page_max);
+	atomic_long_set(&cache->ccc_lru_left, lru_page_max);
 	spin_lock_init(&cache->ccc_lru_lock);
 	INIT_LIST_HEAD(&cache->ccc_lru);
 
-	atomic_set(&cache->ccc_unstable_nr, 0);
+	atomic_long_set(&cache->ccc_unstable_nr, 0);
 	init_waitqueue_head(&cache->ccc_unstable_waitq);
 
 	return cache;
diff --git a/drivers/staging/lustre/lustre/obdclass/class_obd.c b/drivers/staging/lustre/lustre/obdclass/class_obd.c
index d9d2a1952b8b..76e1ee83a723 100644
--- a/drivers/staging/lustre/lustre/obdclass/class_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/class_obd.c
@@ -40,10 +40,10 @@
 #include "../include/lprocfs_status.h"
 #include <linux/list.h>
 #include "../include/cl_object.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "llog_internal.h"
 
 struct obd_device *obd_devs[MAX_OBD_DEVICES];
-EXPORT_SYMBOL(obd_devs);
 struct list_head obd_types;
 DEFINE_RWLOCK(obd_dev_lock);
 
@@ -54,11 +54,9 @@ unsigned int obd_dump_on_timeout;
 EXPORT_SYMBOL(obd_dump_on_timeout);
 unsigned int obd_dump_on_eviction;
 EXPORT_SYMBOL(obd_dump_on_eviction);
-unsigned int obd_max_dirty_pages = 256;
+unsigned long obd_max_dirty_pages;
 EXPORT_SYMBOL(obd_max_dirty_pages);
-atomic_t obd_unstable_pages;
-EXPORT_SYMBOL(obd_unstable_pages);
-atomic_t obd_dirty_pages;
+atomic_long_t obd_dirty_pages;
 EXPORT_SYMBOL(obd_dirty_pages);
 unsigned int obd_timeout = OBD_TIMEOUT_DEFAULT;   /* seconds */
 EXPORT_SYMBOL(obd_timeout);
@@ -76,13 +74,11 @@ EXPORT_SYMBOL(at_early_margin);
 int at_extra = 30;
 EXPORT_SYMBOL(at_extra);
 
-atomic_t obd_dirty_transit_pages;
+atomic_long_t obd_dirty_transit_pages;
 EXPORT_SYMBOL(obd_dirty_transit_pages);
 
 char obd_jobid_var[JOBSTATS_JOBID_VAR_MAX_LEN + 1] = JOBSTATS_DISABLE;
-EXPORT_SYMBOL(obd_jobid_var);
-
-char obd_jobid_node[JOBSTATS_JOBID_SIZE + 1];
+char obd_jobid_node[LUSTRE_JOBID_SIZE + 1];
 
 /* Get jobid of current process from stored variable or calculate
  * it from pid and user_id.
@@ -93,14 +89,14 @@ char obd_jobid_node[JOBSTATS_JOBID_SIZE + 1];
  */
 int lustre_get_jobid(char *jobid)
 {
-	memset(jobid, 0, JOBSTATS_JOBID_SIZE);
+	memset(jobid, 0, LUSTRE_JOBID_SIZE);
 	/* Jobstats isn't enabled */
 	if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0)
 		return 0;
 
 	/* Use process name + fsuid as jobid */
 	if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
-		snprintf(jobid, JOBSTATS_JOBID_SIZE, "%s.%u",
+		snprintf(jobid, LUSTRE_JOBID_SIZE, "%s.%u",
 			 current_comm(),
 			 from_kuid(&init_user_ns, current_fsuid()));
 		return 0;
@@ -116,19 +112,6 @@ int lustre_get_jobid(char *jobid)
 }
 EXPORT_SYMBOL(lustre_get_jobid);
 
-static inline void obd_data2conn(struct lustre_handle *conn,
-				 struct obd_ioctl_data *data)
-{
-	memset(conn, 0, sizeof(*conn));
-	conn->cookie = data->ioc_cookie;
-}
-
-static inline void obd_conn2data(struct obd_ioctl_data *data,
-				 struct lustre_handle *conn)
-{
-	data->ioc_cookie = conn->cookie;
-}
-
 static int class_resolve_dev_name(__u32 len, const char *name)
 {
 	int rc;
@@ -287,13 +270,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 		goto out;
 	}
 
-	case OBD_IOC_CLOSE_UUID: {
-		CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n",
-		       data->ioc_inlbuf1);
-		err = 0;
-		goto out;
-	}
-
 	case OBD_IOC_GETDEVICE: {
 		int     index = data->ioc_count;
 		char    *status, *str;
@@ -467,15 +443,10 @@ static int obd_init_checks(void)
 	return ret;
 }
 
-extern int class_procfs_init(void);
-extern int class_procfs_clean(void);
-
 static int __init obdclass_init(void)
 {
 	int i, err;
 
-	int lustre_register_fs(void);
-
 	LCONSOLE_INFO("Lustre: Build Version: " LUSTRE_VERSION_STRING "\n");
 
 	spin_lock_init(&obd_types_lock);
@@ -542,23 +513,9 @@ static int __init obdclass_init(void)
 
 static void obdclass_exit(void)
 {
-	int i;
-
-	int lustre_unregister_fs(void);
-
 	lustre_unregister_fs();
 
 	misc_deregister(&obd_psdev);
-	for (i = 0; i < class_devno_max(); i++) {
-		struct obd_device *obd = class_num2obd(i);
-
-		if (obd && obd->obd_set_up &&
-		    OBT(obd) && OBP(obd, detach)) {
-			/* XXX should this call generic detach otherwise? */
-			LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
-			OBP(obd, detach)(obd);
-		}
-	}
 	llog_info_fini();
 	cl_global_fini();
 	lu_global_fini();
diff --git a/drivers/staging/lustre/lustre/obdclass/debug.c b/drivers/staging/lustre/lustre/obdclass/debug.c
index 8acf67239fa8..0bd4ad20aba7 100644
--- a/drivers/staging/lustre/lustre/obdclass/debug.c
+++ b/drivers/staging/lustre/lustre/obdclass/debug.c
@@ -48,10 +48,10 @@ int block_debug_setup(void *addr, int len, __u64 off, __u64 id)
 	LASSERT(addr);
 
 	put_unaligned_le64(off, addr);
-	put_unaligned_le64(id, addr+LPDS);
+	put_unaligned_le64(id, addr + LPDS);
 	addr += len - LPDS - LPDS;
 	put_unaligned_le64(off, addr);
-	put_unaligned_le64(id, addr+LPDS);
+	put_unaligned_le64(id, addr + LPDS);
 
 	return 0;
 }
diff --git a/drivers/staging/lustre/lustre/obdclass/genops.c b/drivers/staging/lustre/lustre/obdclass/genops.c
index 99c2da632b51..cf8bb2a2f40b 100644
--- a/drivers/staging/lustre/lustre/obdclass/genops.c
+++ b/drivers/staging/lustre/lustre/obdclass/genops.c
@@ -133,7 +133,6 @@ void class_put_type(struct obd_type *type)
 	module_put(type->typ_dt_ops->owner);
 	spin_unlock(&type->obd_type_lock);
 }
-EXPORT_SYMBOL(class_put_type);
 
 #define CLASS_MAX_NAME 1024
 
@@ -166,10 +165,10 @@ int class_register_type(struct obd_ops *dt_ops, struct md_ops *md_ops,
 	    !type->typ_name)
 		goto failed;
 
-	*(type->typ_dt_ops) = *dt_ops;
+	*type->typ_dt_ops = *dt_ops;
 	/* md_ops is optional */
 	if (md_ops)
-		*(type->typ_md_ops) = *md_ops;
+		*type->typ_md_ops = *md_ops;
 	strcpy(type->typ_name, name);
 	spin_lock_init(&type->obd_type_lock);
 
@@ -391,7 +390,6 @@ int class_name2dev(const char *name)
 
 	return -1;
 }
-EXPORT_SYMBOL(class_name2dev);
 
 struct obd_device *class_name2obd(const char *name)
 {
@@ -421,7 +419,6 @@ int class_uuid2dev(struct obd_uuid *uuid)
 
 	return -1;
 }
-EXPORT_SYMBOL(class_uuid2dev);
 
 /**
  * Get obd device from ::obd_devs[]
@@ -450,7 +447,6 @@ struct obd_device *class_num2obd(int num)
 
 	return obd;
 }
-EXPORT_SYMBOL(class_num2obd);
 
 /* Search for a client OBD connected to tgt_uuid.  If grp_uuid is
  * specified, then only the client with that uuid is returned,
@@ -509,7 +505,7 @@ struct obd_device *class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
 			continue;
 		if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
 			if (next)
-				*next = i+1;
+				*next = i + 1;
 			read_unlock(&obd_dev_lock);
 			return obd;
 		}
@@ -618,7 +614,7 @@ struct obd_export *class_conn2export(struct lustre_handle *conn)
 	}
 
 	CDEBUG(D_INFO, "looking for export cookie %#llx\n", conn->cookie);
-	export = class_handle2object(conn->cookie);
+	export = class_handle2object(conn->cookie, NULL);
 	return export;
 }
 EXPORT_SYMBOL(class_conn2export);
@@ -817,7 +813,6 @@ void class_unlink_export(struct obd_export *exp)
 	spin_unlock(&exp->exp_obd->obd_dev_lock);
 	class_export_put(exp);
 }
-EXPORT_SYMBOL(class_unlink_export);
 
 /* Import management functions */
 static void class_import_destroy(struct obd_import *imp)
@@ -973,7 +968,6 @@ void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
 	       lock, exp, lock->l_exp_refs_nr);
 	spin_unlock(&exp->exp_locks_list_guard);
 }
-EXPORT_SYMBOL(__class_export_add_lock_ref);
 
 void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
 {
@@ -991,7 +985,6 @@ void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
 	       lock, exp, lock->l_exp_refs_nr);
 	spin_unlock(&exp->exp_locks_list_guard);
 }
-EXPORT_SYMBOL(__class_export_del_lock_ref);
 #endif
 
 /* A connection defines an export context in which preallocation can
@@ -1100,7 +1093,6 @@ EXPORT_SYMBOL(class_fail_export);
 
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
 void (*class_export_dump_hook)(struct obd_export *) = NULL;
-EXPORT_SYMBOL(class_export_dump_hook);
 #endif
 
 /* Total amount of zombies to be destroyed */
@@ -1312,3 +1304,135 @@ void obd_zombie_impexp_stop(void)
 	obd_zombie_impexp_notify();
 	wait_for_completion(&obd_zombie_stop);
 }
+
+struct obd_request_slot_waiter {
+	struct list_head	orsw_entry;
+	wait_queue_head_t	orsw_waitq;
+	bool			orsw_signaled;
+};
+
+static bool obd_request_slot_avail(struct client_obd *cli,
+				   struct obd_request_slot_waiter *orsw)
+{
+	bool avail;
+
+	spin_lock(&cli->cl_loi_list_lock);
+	avail = !!list_empty(&orsw->orsw_entry);
+	spin_unlock(&cli->cl_loi_list_lock);
+
+	return avail;
+};
+
+/*
+ * For network flow control, the RPC sponsor needs to acquire a credit
+ * before sending the RPC. The credits count for a connection is defined
+ * by the "cl_max_rpcs_in_flight". If all the credits are occpuied, then
+ * the subsequent RPC sponsors need to wait until others released their
+ * credits, or the administrator increased the "cl_max_rpcs_in_flight".
+ */
+int obd_get_request_slot(struct client_obd *cli)
+{
+	struct obd_request_slot_waiter orsw;
+	struct l_wait_info lwi;
+	int rc;
+
+	spin_lock(&cli->cl_loi_list_lock);
+	if (cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight) {
+		cli->cl_r_in_flight++;
+		spin_unlock(&cli->cl_loi_list_lock);
+		return 0;
+	}
+
+	init_waitqueue_head(&orsw.orsw_waitq);
+	list_add_tail(&orsw.orsw_entry, &cli->cl_loi_read_list);
+	orsw.orsw_signaled = false;
+	spin_unlock(&cli->cl_loi_list_lock);
+
+	lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+	rc = l_wait_event(orsw.orsw_waitq,
+			  obd_request_slot_avail(cli, &orsw) ||
+			  orsw.orsw_signaled,
+			  &lwi);
+
+	/*
+	 * Here, we must take the lock to avoid the on-stack 'orsw' to be
+	 * freed but other (such as obd_put_request_slot) is using it.
+	 */
+	spin_lock(&cli->cl_loi_list_lock);
+	if (rc) {
+		if (!orsw.orsw_signaled) {
+			if (list_empty(&orsw.orsw_entry))
+				cli->cl_r_in_flight--;
+			else
+				list_del(&orsw.orsw_entry);
+		}
+	}
+
+	if (orsw.orsw_signaled) {
+		LASSERT(list_empty(&orsw.orsw_entry));
+
+		rc = -EINTR;
+	}
+	spin_unlock(&cli->cl_loi_list_lock);
+
+	return rc;
+}
+EXPORT_SYMBOL(obd_get_request_slot);
+
+void obd_put_request_slot(struct client_obd *cli)
+{
+	struct obd_request_slot_waiter *orsw;
+
+	spin_lock(&cli->cl_loi_list_lock);
+	cli->cl_r_in_flight--;
+
+	/* If there is free slot, wakeup the first waiter. */
+	if (!list_empty(&cli->cl_loi_read_list) &&
+	    likely(cli->cl_r_in_flight < cli->cl_max_rpcs_in_flight)) {
+		orsw = list_entry(cli->cl_loi_read_list.next,
+				  struct obd_request_slot_waiter, orsw_entry);
+		list_del_init(&orsw->orsw_entry);
+		cli->cl_r_in_flight++;
+		wake_up(&orsw->orsw_waitq);
+	}
+	spin_unlock(&cli->cl_loi_list_lock);
+}
+EXPORT_SYMBOL(obd_put_request_slot);
+
+__u32 obd_get_max_rpcs_in_flight(struct client_obd *cli)
+{
+	return cli->cl_max_rpcs_in_flight;
+}
+EXPORT_SYMBOL(obd_get_max_rpcs_in_flight);
+
+int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
+{
+	struct obd_request_slot_waiter *orsw;
+	__u32 old;
+	int diff;
+	int i;
+
+	if (max > OBD_MAX_RIF_MAX || max < 1)
+		return -ERANGE;
+
+	spin_lock(&cli->cl_loi_list_lock);
+	old = cli->cl_max_rpcs_in_flight;
+	cli->cl_max_rpcs_in_flight = max;
+	diff = max - old;
+
+	/* We increase the max_rpcs_in_flight, then wakeup some waiters. */
+	for (i = 0; i < diff; i++) {
+		if (list_empty(&cli->cl_loi_read_list))
+			break;
+
+		orsw = list_entry(cli->cl_loi_read_list.next,
+				  struct obd_request_slot_waiter, orsw_entry);
+		list_del_init(&orsw->orsw_entry);
+		cli->cl_r_in_flight++;
+		wake_up(&orsw->orsw_waitq);
+	}
+	spin_unlock(&cli->cl_loi_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(obd_set_max_rpcs_in_flight);
diff --git a/drivers/staging/lustre/lustre/obdclass/linkea.c b/drivers/staging/lustre/lustre/obdclass/linkea.c
new file mode 100644
index 000000000000..0b1d2f0a422c
--- /dev/null
+++ b/drivers/staging/lustre/lustre/obdclass/linkea.c
@@ -0,0 +1,201 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2013, 2014, Intel Corporation.
+ * Use is subject to license terms.
+ *
+ * Author: Di Wang <di.wang@intel.com>
+ */
+
+#include "../include/lustre/lustre_idl.h"
+#include "../include/obd.h"
+#include "../include/lustre_linkea.h"
+
+int linkea_data_new(struct linkea_data *ldata, struct lu_buf *buf)
+{
+	ldata->ld_buf = lu_buf_check_and_alloc(buf, PAGE_SIZE);
+	if (!ldata->ld_buf->lb_buf)
+		return -ENOMEM;
+	ldata->ld_leh = ldata->ld_buf->lb_buf;
+	ldata->ld_leh->leh_magic = LINK_EA_MAGIC;
+	ldata->ld_leh->leh_len = sizeof(struct link_ea_header);
+	ldata->ld_leh->leh_reccount = 0;
+	return 0;
+}
+EXPORT_SYMBOL(linkea_data_new);
+
+int linkea_init(struct linkea_data *ldata)
+{
+	struct link_ea_header *leh;
+
+	LASSERT(ldata->ld_buf);
+	leh = ldata->ld_buf->lb_buf;
+	if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
+		leh->leh_magic = LINK_EA_MAGIC;
+		leh->leh_reccount = __swab32(leh->leh_reccount);
+		leh->leh_len = __swab64(leh->leh_len);
+		/* entries are swabbed by linkea_entry_unpack */
+	}
+	if (leh->leh_magic != LINK_EA_MAGIC)
+		return -EINVAL;
+	if (leh->leh_reccount == 0)
+		return -ENODATA;
+
+	ldata->ld_leh = leh;
+	return 0;
+}
+EXPORT_SYMBOL(linkea_init);
+
+/**
+ * Pack a link_ea_entry.
+ * All elements are stored as chars to avoid alignment issues.
+ * Numbers are always big-endian
+ * \retval record length
+ */
+int linkea_entry_pack(struct link_ea_entry *lee, const struct lu_name *lname,
+		      const struct lu_fid *pfid)
+{
+	struct lu_fid   tmpfid;
+	int             reclen;
+
+	tmpfid = *pfid;
+	if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LINKEA_CRASH))
+		tmpfid.f_ver = ~0;
+	fid_cpu_to_be(&tmpfid, &tmpfid);
+	memcpy(&lee->lee_parent_fid, &tmpfid, sizeof(tmpfid));
+	memcpy(lee->lee_name, lname->ln_name, lname->ln_namelen);
+	reclen = sizeof(struct link_ea_entry) + lname->ln_namelen;
+
+	lee->lee_reclen[0] = (reclen >> 8) & 0xff;
+	lee->lee_reclen[1] = reclen & 0xff;
+	return reclen;
+}
+EXPORT_SYMBOL(linkea_entry_pack);
+
+void linkea_entry_unpack(const struct link_ea_entry *lee, int *reclen,
+			 struct lu_name *lname, struct lu_fid *pfid)
+{
+	*reclen = (lee->lee_reclen[0] << 8) | lee->lee_reclen[1];
+	memcpy(pfid, &lee->lee_parent_fid, sizeof(*pfid));
+	fid_be_to_cpu(pfid, pfid);
+	if (lname) {
+		lname->ln_name = lee->lee_name;
+		lname->ln_namelen = *reclen - sizeof(struct link_ea_entry);
+	}
+}
+EXPORT_SYMBOL(linkea_entry_unpack);
+
+/**
+ * Add a record to the end of link ea buf
+ **/
+int linkea_add_buf(struct linkea_data *ldata, const struct lu_name *lname,
+		   const struct lu_fid *pfid)
+{
+	LASSERT(ldata->ld_leh);
+
+	if (!lname || !pfid)
+		return -EINVAL;
+
+	ldata->ld_reclen = lname->ln_namelen + sizeof(struct link_ea_entry);
+	if (ldata->ld_leh->leh_len + ldata->ld_reclen >
+	    ldata->ld_buf->lb_len) {
+		if (lu_buf_check_and_grow(ldata->ld_buf,
+					  ldata->ld_leh->leh_len +
+					  ldata->ld_reclen) < 0)
+			return -ENOMEM;
+	}
+
+	ldata->ld_leh = ldata->ld_buf->lb_buf;
+	ldata->ld_lee = ldata->ld_buf->lb_buf + ldata->ld_leh->leh_len;
+	ldata->ld_reclen = linkea_entry_pack(ldata->ld_lee, lname, pfid);
+	ldata->ld_leh->leh_len += ldata->ld_reclen;
+	ldata->ld_leh->leh_reccount++;
+	CDEBUG(D_INODE, "New link_ea name '" DFID ":%.*s' is added\n",
+	       PFID(pfid), lname->ln_namelen, lname->ln_name);
+	return 0;
+}
+EXPORT_SYMBOL(linkea_add_buf);
+
+/** Del the current record from the link ea buf */
+void linkea_del_buf(struct linkea_data *ldata, const struct lu_name *lname)
+{
+	LASSERT(ldata->ld_leh && ldata->ld_lee);
+
+	ldata->ld_leh->leh_reccount--;
+	ldata->ld_leh->leh_len -= ldata->ld_reclen;
+	memmove(ldata->ld_lee, (char *)ldata->ld_lee + ldata->ld_reclen,
+		(char *)ldata->ld_leh + ldata->ld_leh->leh_len -
+		(char *)ldata->ld_lee);
+	CDEBUG(D_INODE, "Old link_ea name '%.*s' is removed\n",
+	       lname->ln_namelen, lname->ln_name);
+
+	if ((char *)ldata->ld_lee >= ((char *)ldata->ld_leh +
+				      ldata->ld_leh->leh_len))
+		ldata->ld_lee = NULL;
+}
+EXPORT_SYMBOL(linkea_del_buf);
+
+/**
+ * Check if such a link exists in linkEA.
+ *
+ * \param ldata link data the search to be done on
+ * \param lname name in the parent's directory entry pointing to this object
+ * \param pfid parent fid the link to be found for
+ *
+ * \retval   0 success
+ * \retval -ENOENT link does not exist
+ * \retval -ve on error
+ */
+int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
+		      const struct lu_fid  *pfid)
+{
+	struct lu_name tmpname;
+	struct lu_fid  tmpfid;
+	int count;
+
+	LASSERT(ldata->ld_leh);
+
+	/* link #0 */
+	ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
+
+	for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
+		linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen,
+				    &tmpname, &tmpfid);
+		if (tmpname.ln_namelen == lname->ln_namelen &&
+		    lu_fid_eq(&tmpfid, pfid) &&
+		    (strncmp(tmpname.ln_name, lname->ln_name,
+			     tmpname.ln_namelen) == 0))
+			break;
+		ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
+							 ldata->ld_reclen);
+	}
+
+	if (count == ldata->ld_leh->leh_reccount) {
+		CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n",
+		       lname->ln_namelen, lname->ln_name);
+		ldata->ld_lee = NULL;
+		ldata->ld_reclen = 0;
+		return -ENOENT;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(linkea_links_find);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
index 33342bfcc90e..be09e04b042f 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-module.c
@@ -65,6 +65,7 @@
 #include "../../include/obd_support.h"
 #include "../../include/obd_class.h"
 #include "../../include/lprocfs_status.h"
+#include "../../include/lustre/lustre_ioctl.h"
 #include "../../include/lustre_ver.h"
 
 /* buffer MUST be at least the size of obd_ioctl_hdr */
@@ -157,7 +158,6 @@ int obd_ioctl_popdata(void __user *arg, void *data, int len)
 	err = copy_to_user(arg, data, len) ? -EFAULT : 0;
 	return err;
 }
-EXPORT_SYMBOL(obd_ioctl_popdata);
 
 /*  opening /dev/obd */
 static int obd_class_open(struct inode *inode, struct file *file)
@@ -191,7 +191,7 @@ static long obd_class_ioctl(struct file *filp, unsigned int cmd,
 }
 
 /* declare character device */
-static struct file_operations obd_psdev_fops = {
+static const struct file_operations obd_psdev_fops = {
 	.owner	  = THIS_MODULE,
 	.unlocked_ioctl = obd_class_ioctl, /* unlocked_ioctl */
 	.open	   = obd_class_open,      /* open */
@@ -291,7 +291,7 @@ static ssize_t jobid_name_store(struct kobject *kobj, struct attribute *attr,
 				const char *buffer,
 				size_t count)
 {
-	if (!count || count > JOBSTATS_JOBID_SIZE)
+	if (!count || count > LUSTRE_JOBID_SIZE)
 		return -EINVAL;
 
 	memcpy(obd_jobid_node, buffer, count);
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
index c6cc6a7666e3..41b77a30feb3 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-obdo.c
@@ -44,7 +44,7 @@
 
 #include <linux/fs.h>
 
-void obdo_refresh_inode(struct inode *dst, struct obdo *src, u32 valid)
+void obdo_refresh_inode(struct inode *dst, const struct obdo *src, u32 valid)
 {
 	valid &= src->o_valid;
 
diff --git a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
index 8f70dd2686f9..e6c785afceba 100644
--- a/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
+++ b/drivers/staging/lustre/lustre/obdclass/linux/linux-sysctl.c
@@ -45,6 +45,7 @@
 
 #include "../../include/obd_support.h"
 #include "../../include/lprocfs_status.h"
+#include "../../include/obd_class.h"
 
 struct static_lustre_uintvalue_attr {
 	struct {
@@ -95,8 +96,8 @@ LUSTRE_STATIC_UINT_ATTR(timeout, &obd_timeout);
 static ssize_t max_dirty_mb_show(struct kobject *kobj, struct attribute *attr,
 				 char *buf)
 {
-	return sprintf(buf, "%ul\n",
-			obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
+	return sprintf(buf, "%lu\n",
+		       obd_max_dirty_pages / (1 << (20 - PAGE_SHIFT)));
 }
 
 static ssize_t max_dirty_mb_store(struct kobject *kobj, struct attribute *attr,
diff --git a/drivers/staging/lustre/lustre/obdclass/llog.c b/drivers/staging/lustre/lustre/obdclass/llog.c
index 1784ca063428..43797f106745 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog.c
@@ -80,7 +80,7 @@ static void llog_free_handle(struct llog_handle *loghandle)
 		LASSERT(list_empty(&loghandle->u.phd.phd_entry));
 	else if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
 		LASSERT(list_empty(&loghandle->u.chd.chd_head));
-	LASSERT(sizeof(*(loghandle->lgh_hdr)) == LLOG_CHUNK_SIZE);
+	LASSERT(sizeof(*loghandle->lgh_hdr) == LLOG_CHUNK_SIZE);
 	kfree(loghandle->lgh_hdr);
 out:
 	kfree(loghandle);
@@ -137,6 +137,7 @@ static int llog_read_header(const struct lu_env *env,
 int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
 		     int flags, struct obd_uuid *uuid)
 {
+	enum llog_flag fmt = flags & LLOG_F_EXT_MASK;
 	struct llog_log_hdr	*llh;
 	int			 rc;
 
@@ -194,6 +195,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
 		       flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
 		rc = -EINVAL;
 	}
+	llh->llh_flags |= fmt;
 out:
 	if (rc) {
 		kfree(llh);
@@ -233,6 +235,10 @@ static int llog_process_thread(void *arg)
 	else
 		last_index = LLOG_BITMAP_BYTES * 8 - 1;
 
+	/* Record is not in this buffer. */
+	if (index > last_index)
+		goto out;
+
 	while (rc == 0) {
 		struct llog_rec_hdr *rec;
 
@@ -262,7 +268,7 @@ repeat:
 		 */
 		for (rec = (struct llog_rec_hdr *)buf;
 		     (char *)rec < buf + LLOG_CHUNK_SIZE;
-		     rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)) {
+		     rec = llog_rec_hdr_next(rec)) {
 			CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
 			       rec, rec->lrh_type);
 
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_cat.c b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
index a82a2950295a..ce8e2f6f002a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_cat.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_cat.c
@@ -63,11 +63,13 @@ static int llog_cat_id2handle(const struct lu_env *env,
 			      struct llog_logid *logid)
 {
 	struct llog_handle	*loghandle;
+	enum llog_flag fmt;
 	int			 rc = 0;
 
 	if (!cathandle)
 		return -EBADF;
 
+	fmt = cathandle->lgh_hdr->llh_flags & LLOG_F_EXT_MASK;
 	down_write(&cathandle->lgh_lock);
 	list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
 			    u.phd.phd_entry) {
@@ -99,7 +101,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
 		return rc;
 	}
 
-	rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
+	rc = llog_init_handle(env, loghandle, fmt | LLOG_F_IS_PLAIN, NULL);
 	if (rc < 0) {
 		llog_close(env, loghandle);
 		loghandle = NULL;
@@ -107,7 +109,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
 	}
 
 	down_write(&cathandle->lgh_lock);
-	list_add(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
+	list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
 	up_write(&cathandle->lgh_lock);
 
 	loghandle->u.phd.phd_cat_handle = cathandle;
@@ -123,7 +125,6 @@ out:
 int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
 {
 	struct llog_handle	*loghandle, *n;
-	int			 rc;
 
 	list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
 				 u.phd.phd_entry) {
@@ -134,8 +135,7 @@ int llog_cat_close(const struct lu_env *env, struct llog_handle *cathandle)
 	/* if handle was stored in ctxt, remove it too */
 	if (cathandle->lgh_ctxt->loc_handle == cathandle)
 		cathandle->lgh_ctxt->loc_handle = NULL;
-	rc = llog_close(env, cathandle);
-	return rc;
+	return llog_close(env, cathandle);
 }
 EXPORT_SYMBOL(llog_cat_close);
 
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_internal.h b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
index f7949525d952..21a93c73756a 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_internal.h
+++ b/drivers/staging/lustre/lustre/obdclass/llog_internal.h
@@ -70,4 +70,9 @@ int llog_process_or_fork(const struct lu_env *env,
 			 llog_cb_t cb, void *data, void *catdata, bool fork);
 int llog_cat_cleanup(const struct lu_env *env, struct llog_handle *cathandle,
 		     struct llog_handle *loghandle, int index);
+
+static inline struct llog_rec_hdr *llog_rec_hdr_next(struct llog_rec_hdr *rec)
+{
+	return (struct llog_rec_hdr *)((char *)rec + rec->lrh_len);
+}
 #endif
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_obd.c b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
index 6ace7e097859..a4277d684614 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_obd.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_obd.c
@@ -210,7 +210,6 @@ LU_KEY_INIT_FINI(llog, struct llog_thread_info);
 /* context key: llog_thread_key */
 LU_CONTEXT_KEY_DEFINE(llog, LCT_MD_THREAD | LCT_MG_THREAD | LCT_LOCAL);
 LU_KEY_INIT_GENERIC(llog);
-EXPORT_SYMBOL(llog_thread_key);
 
 int llog_info_init(void)
 {
diff --git a/drivers/staging/lustre/lustre/obdclass/llog_swab.c b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
index f7b9b190350c..8c4c1b3f1b45 100644
--- a/drivers/staging/lustre/lustre/obdclass/llog_swab.c
+++ b/drivers/staging/lustre/lustre/obdclass/llog_swab.c
@@ -172,20 +172,23 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
 		__swab64s(&cr->cr.cr_time);
 		lustre_swab_lu_fid(&cr->cr.cr_tfid);
 		lustre_swab_lu_fid(&cr->cr.cr_pfid);
-		if (CHANGELOG_REC_EXTENDED(&cr->cr)) {
-			struct llog_changelog_ext_rec *ext =
-				(struct llog_changelog_ext_rec *)rec;
-
-			lustre_swab_lu_fid(&ext->cr.cr_sfid);
-			lustre_swab_lu_fid(&ext->cr.cr_spfid);
-			tail = &ext->cr_tail;
-		} else {
-			tail = &cr->cr_tail;
+		if (cr->cr.cr_flags & CLF_RENAME) {
+			struct changelog_ext_rename *rnm =
+				changelog_rec_rename(&cr->cr);
+
+			lustre_swab_lu_fid(&rnm->cr_sfid);
+			lustre_swab_lu_fid(&rnm->cr_spfid);
 		}
-		tail = (struct llog_rec_tail *)((char *)tail +
+		/*
+		 * Because the tail follows a variable-length structure we need
+		 * to compute its location at runtime
+		 */
+		tail = (struct llog_rec_tail *)((char *)&cr->cr +
+						changelog_rec_size(&cr->cr) +
 						cr->cr.cr_namelen);
 		break;
 	}
+
 	case CHANGELOG_USER_REC:
 	{
 		struct llog_changelog_user_rec *cur =
@@ -224,6 +227,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
 		__swab32s(&lsr->lsr_uid_h);
 		__swab32s(&lsr->lsr_gid);
 		__swab32s(&lsr->lsr_gid_h);
+		__swab64s(&lsr->lsr_valid);
 		tail = &lsr->lsr_tail;
 		break;
 	}
@@ -343,7 +347,6 @@ void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg)
 
 	print_lustre_cfg(lcfg);
 }
-EXPORT_SYMBOL(lustre_swab_lustre_cfg);
 
 /* used only for compatibility with old on-disk cfg_marker data */
 struct cfg_marker32 {
@@ -403,4 +406,3 @@ void lustre_swab_cfg_marker(struct cfg_marker *marker, int swab, int size)
 		__swab64s(&marker->cm_canceltime);
 	}
 }
-EXPORT_SYMBOL(lustre_swab_cfg_marker);
diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
index 279b625f1afe..852a5acfefab 100644
--- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
+++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c
@@ -96,6 +96,12 @@ static const char * const obd_connect_names[] = {
 	"pingless",
 	"flock_deadlock",
 	"disp_stripe",
+	"open_by_fid",
+	"lfsck",
+	"unknown",
+	"unlink_close",
+	"unknown",
+	"dir_stripe",
 	"unknown",
 	NULL
 };
@@ -309,7 +315,7 @@ struct dentry *ldebugfs_add_simple(struct dentry *root,
 }
 EXPORT_SYMBOL_GPL(ldebugfs_add_simple);
 
-static struct file_operations lprocfs_generic_fops = { };
+static const struct file_operations lprocfs_generic_fops = { };
 
 int ldebugfs_add_vars(struct dentry *parent,
 		      struct lprocfs_vars *list,
@@ -615,7 +621,6 @@ void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx,
 
 	lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
 }
-EXPORT_SYMBOL(lprocfs_stats_collect);
 
 /**
  * Append a space separated list of current set flags to str.
@@ -1043,7 +1048,6 @@ int lprocfs_stats_alloc_one(struct lprocfs_stats *stats, unsigned int cpuid)
 	}
 	return rc;
 }
-EXPORT_SYMBOL(lprocfs_stats_alloc_one);
 
 struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num,
 					  enum lprocfs_stats_flags flags)
@@ -1547,6 +1551,146 @@ void lprocfs_oh_clear(struct obd_histogram *oh)
 }
 EXPORT_SYMBOL(lprocfs_oh_clear);
 
+int lprocfs_wr_root_squash(const char __user *buffer, unsigned long count,
+			   struct root_squash_info *squash, char *name)
+{
+	char kernbuf[64], *tmp, *errmsg;
+	unsigned long uid, gid;
+	int rc;
+
+	if (count >= sizeof(kernbuf)) {
+		errmsg = "string too long";
+		rc = -EINVAL;
+		goto failed_noprint;
+	}
+	if (copy_from_user(kernbuf, buffer, count)) {
+		errmsg = "bad address";
+		rc = -EFAULT;
+		goto failed_noprint;
+	}
+	kernbuf[count] = '\0';
+
+	/* look for uid gid separator */
+	tmp = strchr(kernbuf, ':');
+	if (!tmp) {
+		errmsg = "needs uid:gid format";
+		rc = -EINVAL;
+		goto failed;
+	}
+	*tmp = '\0';
+	tmp++;
+
+	/* parse uid */
+	if (kstrtoul(kernbuf, 0, &uid) != 0) {
+		errmsg = "bad uid";
+		rc = -EINVAL;
+		goto failed;
+	}
+	/* parse gid */
+	if (kstrtoul(tmp, 0, &gid) != 0) {
+		errmsg = "bad gid";
+		rc = -EINVAL;
+		goto failed;
+	}
+
+	squash->rsi_uid = uid;
+	squash->rsi_gid = gid;
+
+	LCONSOLE_INFO("%s: root_squash is set to %u:%u\n",
+		      name, squash->rsi_uid, squash->rsi_gid);
+	return count;
+
+failed:
+	if (tmp) {
+		tmp--;
+		*tmp = ':';
+	}
+	CWARN("%s: failed to set root_squash to \"%s\", %s, rc = %d\n",
+	      name, kernbuf, errmsg, rc);
+	return rc;
+failed_noprint:
+	CWARN("%s: failed to set root_squash due to %s, rc = %d\n",
+	      name, errmsg, rc);
+	return rc;
+}
+EXPORT_SYMBOL(lprocfs_wr_root_squash);
+
+int lprocfs_wr_nosquash_nids(const char __user *buffer, unsigned long count,
+			     struct root_squash_info *squash, char *name)
+{
+	char *kernbuf = NULL, *errmsg;
+	struct list_head tmp;
+	int len = count;
+	int rc;
+
+	if (count > 4096) {
+		errmsg = "string too long";
+		rc = -EINVAL;
+		goto failed;
+	}
+
+	kernbuf = kzalloc(count + 1, GFP_NOFS);
+	if (!kernbuf) {
+		errmsg = "no memory";
+		rc = -ENOMEM;
+		goto failed;
+	}
+
+	if (copy_from_user(kernbuf, buffer, count)) {
+		errmsg = "bad address";
+		rc = -EFAULT;
+		goto failed;
+	}
+	kernbuf[count] = '\0';
+
+	if (count > 0 && kernbuf[count - 1] == '\n')
+		len = count - 1;
+
+	if ((len == 4 && !strncmp(kernbuf, "NONE", len)) ||
+	    (len == 5 && !strncmp(kernbuf, "clear", len))) {
+		/* empty string is special case */
+		down_write(&squash->rsi_sem);
+		if (!list_empty(&squash->rsi_nosquash_nids))
+			cfs_free_nidlist(&squash->rsi_nosquash_nids);
+		up_write(&squash->rsi_sem);
+		LCONSOLE_INFO("%s: nosquash_nids is cleared\n", name);
+		kfree(kernbuf);
+		return count;
+	}
+
+	INIT_LIST_HEAD(&tmp);
+	if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) {
+		errmsg = "can't parse";
+		rc = -EINVAL;
+		goto failed;
+	}
+	LCONSOLE_INFO("%s: nosquash_nids set to %s\n",
+		      name, kernbuf);
+	kfree(kernbuf);
+	kernbuf = NULL;
+
+	down_write(&squash->rsi_sem);
+	if (!list_empty(&squash->rsi_nosquash_nids))
+		cfs_free_nidlist(&squash->rsi_nosquash_nids);
+	list_splice(&tmp, &squash->rsi_nosquash_nids);
+	up_write(&squash->rsi_sem);
+
+	return count;
+
+failed:
+	if (kernbuf) {
+		CWARN("%s: failed to set nosquash_nids to \"%s\", %s rc = %d\n",
+		      name, kernbuf, errmsg, rc);
+		kfree(kernbuf);
+		kernbuf = NULL;
+	} else {
+		CWARN("%s: failed to set nosquash_nids due to %s rc = %d\n",
+		      name, errmsg, rc);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(lprocfs_wr_nosquash_nids);
+
 static ssize_t lustre_attr_show(struct kobject *kobj,
 				struct attribute *attr, char *buf)
 {
diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 9b03059f34d6..054e567e6c8d 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -55,6 +55,34 @@
 #include "../include/lu_ref.h"
 #include <linux/list.h>
 
+enum {
+	LU_CACHE_PERCENT_MAX	 = 50,
+	LU_CACHE_PERCENT_DEFAULT = 20
+};
+
+#define LU_CACHE_NR_MAX_ADJUST		128
+#define LU_CACHE_NR_UNLIMITED		-1
+#define LU_CACHE_NR_DEFAULT		LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_LDISKFS_LIMIT	LU_CACHE_NR_UNLIMITED
+#define LU_CACHE_NR_ZFS_LIMIT		256
+
+#define LU_SITE_BITS_MIN	12
+#define LU_SITE_BITS_MAX	24
+/**
+ * total 256 buckets, we don't want too many buckets because:
+ * - consume too much memory
+ * - avoid unbalanced LRU list
+ */
+#define LU_SITE_BKT_BITS	8
+
+static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
+module_param(lu_cache_percent, int, 0644);
+MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
+
+static long lu_cache_nr = LU_CACHE_NR_DEFAULT;
+module_param(lu_cache_nr, long, 0644);
+MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
+
 static void lu_object_free(const struct lu_env *env, struct lu_object *o);
 static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
 
@@ -310,10 +338,10 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
 	struct cfs_hash_bd	    bd2;
 	struct list_head	       dispose;
 	int		      did_sth;
-	int		      start;
+	unsigned int start;
 	int		      count;
 	int		      bnr;
-	int		      i;
+	unsigned int i;
 
 	if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU))
 		return 0;
@@ -324,8 +352,13 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
 	 * the dispose list, removing them from LRU and hash table.
 	 */
 	start = s->ls_purge_start;
-	bnr = (nr == ~0) ? -1 : nr / CFS_HASH_NBKT(s->ls_obj_hash) + 1;
+	bnr = (nr == ~0) ? -1 : nr / (int)CFS_HASH_NBKT(s->ls_obj_hash) + 1;
  again:
+	/*
+	 * It doesn't make any sense to make purge threads parallel, that can
+	 * only bring troubles to us. See LU-5331.
+	 */
+	mutex_lock(&s->ls_purge_mutex);
 	did_sth = 0;
 	cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
 		if (i < start)
@@ -371,6 +404,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
 		if (nr == 0)
 			break;
 	}
+	mutex_unlock(&s->ls_purge_mutex);
 
 	if (nr != 0 && did_sth && start != 0) {
 		start = 0; /* restart from the first bucket */
@@ -573,6 +607,27 @@ static struct lu_object *lu_object_find(const struct lu_env *env,
 	return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf);
 }
 
+/*
+ * Limit the lu_object cache to a maximum of lu_cache_nr objects.  Because
+ * the calculation for the number of objects to reclaim is not covered by
+ * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST.
+ * This ensures that many concurrent threads will not accidentally purge
+ * the entire cache.
+ */
+static void lu_object_limit(const struct lu_env *env, struct lu_device *dev)
+{
+	__u64 size, nr;
+
+	if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
+		return;
+
+	size = cfs_hash_size_get(dev->ld_site->ls_obj_hash);
+	nr = (__u64)lu_cache_nr;
+	if (size > nr)
+		lu_site_purge(env, dev->ld_site,
+			      min_t(__u64, size - nr, LU_CACHE_NR_MAX_ADJUST));
+}
+
 static struct lu_object *lu_object_new(const struct lu_env *env,
 				       struct lu_device *dev,
 				       const struct lu_fid *f,
@@ -590,6 +645,9 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
 	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
 	cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 	cfs_hash_bd_unlock(hs, &bd, 1);
+
+	lu_object_limit(env, dev);
+
 	return o;
 }
 
@@ -656,6 +714,9 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 		cfs_hash_bd_unlock(hs, &bd, 1);
+
+		lu_object_limit(env, dev);
+
 		return o;
 	}
 
@@ -706,13 +767,15 @@ struct lu_object *lu_object_find_slice(const struct lu_env *env,
 	struct lu_object *obj;
 
 	top = lu_object_find(env, dev, f, conf);
-	if (!IS_ERR(top)) {
-		obj = lu_object_locate(top->lo_header, dev->ld_type);
-		if (!obj)
-			lu_object_put(env, top);
-	} else {
-		obj = top;
+	if (IS_ERR(top))
+		return top;
+
+	obj = lu_object_locate(top->lo_header, dev->ld_type);
+	if (unlikely(!obj)) {
+		lu_object_put(env, top);
+		obj = ERR_PTR(-ENOENT);
 	}
+
 	return obj;
 }
 EXPORT_SYMBOL(lu_object_find_slice);
@@ -726,34 +789,31 @@ int lu_device_type_init(struct lu_device_type *ldt)
 {
 	int result = 0;
 
+	atomic_set(&ldt->ldt_device_nr, 0);
 	INIT_LIST_HEAD(&ldt->ldt_linkage);
 	if (ldt->ldt_ops->ldto_init)
 		result = ldt->ldt_ops->ldto_init(ldt);
-	if (result == 0)
+
+	if (!result) {
+		spin_lock(&obd_types_lock);
 		list_add(&ldt->ldt_linkage, &lu_device_types);
+		spin_unlock(&obd_types_lock);
+	}
+
 	return result;
 }
 EXPORT_SYMBOL(lu_device_type_init);
 
 void lu_device_type_fini(struct lu_device_type *ldt)
 {
+	spin_lock(&obd_types_lock);
 	list_del_init(&ldt->ldt_linkage);
+	spin_unlock(&obd_types_lock);
 	if (ldt->ldt_ops->ldto_fini)
 		ldt->ldt_ops->ldto_fini(ldt);
 }
 EXPORT_SYMBOL(lu_device_type_fini);
 
-void lu_types_stop(void)
-{
-	struct lu_device_type *ldt;
-
-	list_for_each_entry(ldt, &lu_device_types, ldt_linkage) {
-		if (ldt->ldt_device_nr == 0 && ldt->ldt_ops->ldto_stop)
-			ldt->ldt_ops->ldto_stop(ldt);
-	}
-}
-EXPORT_SYMBOL(lu_types_stop);
-
 /**
  * Global list of all sites on this node
  */
@@ -808,22 +868,14 @@ void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
 }
 EXPORT_SYMBOL(lu_site_print);
 
-enum {
-	LU_CACHE_PERCENT_MAX     = 50,
-	LU_CACHE_PERCENT_DEFAULT = 20
-};
-
-static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
-module_param(lu_cache_percent, int, 0644);
-MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
-
 /**
  * Return desired hash table order.
  */
-static int lu_htable_order(void)
+static unsigned long lu_htable_order(struct lu_device *top)
 {
+	unsigned long bits_max = LU_SITE_BITS_MAX;
 	unsigned long cache_size;
-	int bits;
+	unsigned long bits;
 
 	/*
 	 * Calculate hash table size, assuming that we want reasonable
@@ -854,7 +906,7 @@ static int lu_htable_order(void)
 	for (bits = 1; (1 << bits) < cache_size; ++bits) {
 		;
 	}
-	return bits;
+	return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max);
 }
 
 static unsigned lu_obj_hop_hash(struct cfs_hash *hs,
@@ -930,28 +982,18 @@ static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
 /**
  * Initialize site \a s, with \a d as the top level device.
  */
-#define LU_SITE_BITS_MIN    12
-#define LU_SITE_BITS_MAX    19
-/**
- * total 256 buckets, we don't want too many buckets because:
- * - consume too much memory
- * - avoid unbalanced LRU list
- */
-#define LU_SITE_BKT_BITS    8
-
 int lu_site_init(struct lu_site *s, struct lu_device *top)
 {
 	struct lu_site_bkt_data *bkt;
 	struct cfs_hash_bd bd;
+	unsigned long bits;
+	unsigned long i;
 	char name[16];
-	int bits;
-	int i;
 
 	memset(s, 0, sizeof(*s));
-	bits = lu_htable_order();
-	snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
-	for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX);
-	     bits >= LU_SITE_BITS_MIN; bits--) {
+	mutex_init(&s->ls_purge_mutex);
+	snprintf(name, sizeof(name), "lu_site_%s", top->ld_type->ldt_name);
+	for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) {
 		s->ls_obj_hash = cfs_hash_create(name, bits, bits,
 						 bits - LU_SITE_BKT_BITS,
 						 sizeof(*bkt), 0, 0,
@@ -959,13 +1001,14 @@ int lu_site_init(struct lu_site *s, struct lu_device *top)
 						 CFS_HASH_SPIN_BKTLOCK |
 						 CFS_HASH_NO_ITEMREF |
 						 CFS_HASH_DEPTH |
-						 CFS_HASH_ASSERT_EMPTY);
+						 CFS_HASH_ASSERT_EMPTY |
+						 CFS_HASH_COUNTER);
 		if (s->ls_obj_hash)
 			break;
 	}
 
 	if (!s->ls_obj_hash) {
-		CERROR("failed to create lu_site hash with bits: %d\n", bits);
+		CERROR("failed to create lu_site hash with bits: %lu\n", bits);
 		return -ENOMEM;
 	}
 
@@ -1082,8 +1125,10 @@ EXPORT_SYMBOL(lu_device_put);
  */
 int lu_device_init(struct lu_device *d, struct lu_device_type *t)
 {
-	if (t->ldt_device_nr++ == 0 && t->ldt_ops->ldto_start)
+	if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
+	    t->ldt_ops->ldto_start)
 		t->ldt_ops->ldto_start(t);
+
 	memset(d, 0, sizeof(*d));
 	atomic_set(&d->ld_ref, 0);
 	d->ld_type = t;
@@ -1098,9 +1143,8 @@ EXPORT_SYMBOL(lu_device_init);
  */
 void lu_device_fini(struct lu_device *d)
 {
-	struct lu_device_type *t;
+	struct lu_device_type *t = d->ld_type;
 
-	t = d->ld_type;
 	if (d->ld_obd) {
 		d->ld_obd->obd_lu_dev = NULL;
 		d->ld_obd = NULL;
@@ -1109,8 +1153,10 @@ void lu_device_fini(struct lu_device *d)
 	lu_ref_fini(&d->ld_reference);
 	LASSERTF(atomic_read(&d->ld_ref) == 0,
 		 "Refcount is %u\n", atomic_read(&d->ld_ref));
-	LASSERT(t->ldt_device_nr > 0);
-	if (--t->ldt_device_nr == 0 && t->ldt_ops->ldto_stop)
+	LASSERT(atomic_read(&t->ldt_device_nr) > 0);
+
+	if (atomic_dec_and_test(&t->ldt_device_nr) &&
+	    t->ldt_ops->ldto_stop)
 		t->ldt_ops->ldto_stop(t);
 }
 EXPORT_SYMBOL(lu_device_fini);
@@ -1254,7 +1300,6 @@ void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
 		}
 	}
 }
-EXPORT_SYMBOL(lu_stack_fini);
 
 enum {
 	/**
@@ -1281,7 +1326,7 @@ static unsigned key_set_version;
 int lu_context_key_register(struct lu_context_key *key)
 {
 	int result;
-	int i;
+	unsigned int i;
 
 	LASSERT(key->lct_init);
 	LASSERT(key->lct_fini);
@@ -1476,18 +1521,16 @@ void lu_context_key_quiesce(struct lu_context_key *key)
 		++key_set_version;
 	}
 }
-EXPORT_SYMBOL(lu_context_key_quiesce);
 
 void lu_context_key_revive(struct lu_context_key *key)
 {
 	key->lct_tags &= ~LCT_QUIESCENT;
 	++key_set_version;
 }
-EXPORT_SYMBOL(lu_context_key_revive);
 
 static void keys_fini(struct lu_context *ctx)
 {
-	int	i;
+	unsigned int i;
 
 	if (!ctx->lc_value)
 		return;
@@ -1501,7 +1544,7 @@ static void keys_fini(struct lu_context *ctx)
 
 static int keys_fill(struct lu_context *ctx)
 {
-	int i;
+	unsigned int i;
 
 	LINVRNT(ctx->lc_value);
 	for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
@@ -1614,7 +1657,7 @@ EXPORT_SYMBOL(lu_context_enter);
  */
 void lu_context_exit(struct lu_context *ctx)
 {
-	int i;
+	unsigned int i;
 
 	LINVRNT(ctx->lc_state == LCS_ENTERED);
 	ctx->lc_state = LCS_LEFT;
@@ -1642,7 +1685,6 @@ int lu_context_refill(struct lu_context *ctx)
 {
 	return likely(ctx->lc_version == key_set_version) ? 0 : keys_fill(ctx);
 }
-EXPORT_SYMBOL(lu_context_refill);
 
 /**
  * lu_ctx_tags/lu_ses_tags will be updated if there are new types of
@@ -1696,7 +1738,7 @@ static void lu_site_stats_get(struct cfs_hash *hs,
 			      struct lu_site_stats *stats, int populated)
 {
 	struct cfs_hash_bd bd;
-	int	   i;
+	unsigned int i;
 
 	cfs_hash_for_each_bucket(hs, &bd, i) {
 		struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
@@ -1940,3 +1982,73 @@ void lu_kmem_fini(struct lu_kmem_descr *caches)
 	}
 }
 EXPORT_SYMBOL(lu_kmem_fini);
+
+void lu_buf_free(struct lu_buf *buf)
+{
+	LASSERT(buf);
+	if (buf->lb_buf) {
+		LASSERT(buf->lb_len > 0);
+		kvfree(buf->lb_buf);
+		buf->lb_buf = NULL;
+		buf->lb_len = 0;
+	}
+}
+EXPORT_SYMBOL(lu_buf_free);
+
+void lu_buf_alloc(struct lu_buf *buf, size_t size)
+{
+	LASSERT(buf);
+	LASSERT(!buf->lb_buf);
+	LASSERT(!buf->lb_len);
+	buf->lb_buf = libcfs_kvzalloc(size, GFP_NOFS);
+	if (likely(buf->lb_buf))
+		buf->lb_len = size;
+}
+EXPORT_SYMBOL(lu_buf_alloc);
+
+void lu_buf_realloc(struct lu_buf *buf, size_t size)
+{
+	lu_buf_free(buf);
+	lu_buf_alloc(buf, size);
+}
+EXPORT_SYMBOL(lu_buf_realloc);
+
+struct lu_buf *lu_buf_check_and_alloc(struct lu_buf *buf, size_t len)
+{
+	if (!buf->lb_buf && !buf->lb_len)
+		lu_buf_alloc(buf, len);
+
+	if ((len > buf->lb_len) && buf->lb_buf)
+		lu_buf_realloc(buf, len);
+
+	return buf;
+}
+EXPORT_SYMBOL(lu_buf_check_and_alloc);
+
+/**
+ * Increase the size of the \a buf.
+ * preserves old data in buffer
+ * old buffer remains unchanged on error
+ * \retval 0 or -ENOMEM
+ */
+int lu_buf_check_and_grow(struct lu_buf *buf, size_t len)
+{
+	char *ptr;
+
+	if (len <= buf->lb_len)
+		return 0;
+
+	ptr = libcfs_kvzalloc(len, GFP_NOFS);
+	if (!ptr)
+		return -ENOMEM;
+
+	/* Free the old buf */
+	if (buf->lb_buf) {
+		memcpy(ptr, buf->lb_buf, buf->lb_len);
+		kvfree(buf->lb_buf);
+	}
+
+	buf->lb_buf = ptr;
+	buf->lb_len = len;
+	return 0;
+}
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
index 082f530c527c..c9445e5ec271 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_handles.c
@@ -130,7 +130,7 @@ void class_handle_unhash(struct portals_handle *h)
 }
 EXPORT_SYMBOL(class_handle_unhash);
 
-void *class_handle2object(__u64 cookie)
+void *class_handle2object(__u64 cookie, const void *owner)
 {
 	struct handle_bucket *bucket;
 	struct portals_handle *h;
@@ -145,7 +145,7 @@ void *class_handle2object(__u64 cookie)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(h, &bucket->head, h_link) {
-		if (h->h_cookie != cookie)
+		if (h->h_cookie != cookie || h->h_owner != owner)
 			continue;
 
 		spin_lock(&h->h_lock);
@@ -164,8 +164,11 @@ EXPORT_SYMBOL(class_handle2object);
 
 void class_handle_free_cb(struct rcu_head *rcu)
 {
-	struct portals_handle *h = RCU2HANDLE(rcu);
-	void *ptr = (void *)(unsigned long)h->h_cookie;
+	struct portals_handle *h;
+	void *ptr;
+
+	h = container_of(rcu, struct portals_handle, h_rcu);
+	ptr = (void *)(unsigned long)h->h_cookie;
 
 	if (h->h_ops->hop_free)
 		h->h_ops->hop_free(ptr, h->h_size);
@@ -214,7 +217,7 @@ static int cleanup_all_handles(void)
 		struct portals_handle *h;
 
 		spin_lock(&handle_hash[i].lock);
-		list_for_each_entry_rcu(h, &(handle_hash[i].head), h_link) {
+		list_for_each_entry_rcu(h, &handle_hash[i].head, h_link) {
 			CERROR("force clean handle %#llx addr %p ops %p\n",
 			       h->h_cookie, h, h->h_ops);
 
diff --git a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
index 5974a9bf77c0..ffa740aa861c 100644
--- a/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
+++ b/drivers/staging/lustre/lustre/obdclass/lustre_peer.c
@@ -139,7 +139,6 @@ int class_add_uuid(const char *uuid, __u64 nid)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(class_add_uuid);
 
 /* Delete the nids for one uuid if specified, otherwise delete all */
 int class_del_uuid(const char *uuid)
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_config.c b/drivers/staging/lustre/lustre/obdclass/obd_config.c
index 0eab1236501b..bbed1b72d52e 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_config.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_config.c
@@ -37,6 +37,7 @@
 #define DEBUG_SUBSYSTEM S_CLASS
 #include "../include/obd_class.h"
 #include <linux/string.h>
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_log.h"
 #include "../include/lprocfs_status.h"
 #include "../include/lustre_param.h"
@@ -237,7 +238,7 @@ static int class_attach(struct lustre_cfg *lcfg)
 	/* recovery data */
 	init_waitqueue_head(&obd->obd_evict_inprogress_waitq);
 
-	llog_group_init(&obd->obd_olg, FID_SEQ_LLOG);
+	llog_group_init(&obd->obd_olg);
 
 	obd->obd_conn_inprogress = 0;
 
@@ -250,15 +251,6 @@ static int class_attach(struct lustre_cfg *lcfg)
 	}
 	memcpy(obd->obd_uuid.uuid, uuid, len);
 
-	/* do the attach */
-	if (OBP(obd, attach)) {
-		rc = OBP(obd, attach)(obd, sizeof(*lcfg), lcfg);
-		if (rc) {
-			rc = -EINVAL;
-			goto out;
-		}
-	}
-
 	/* Detach drops this */
 	spin_lock(&obd->obd_dev_lock);
 	atomic_set(&obd->obd_refcount, 1);
@@ -422,17 +414,12 @@ static int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
 	}
 	/* Leave this on forever */
 	obd->obd_stopping = 1;
-
-	/* wait for already-arrived-connections to finish. */
-	while (obd->obd_conn_inprogress > 0) {
-		spin_unlock(&obd->obd_dev_lock);
-
-		cond_resched();
-
-		spin_lock(&obd->obd_dev_lock);
-	}
 	spin_unlock(&obd->obd_dev_lock);
 
+	while (obd->obd_conn_inprogress > 0)
+		yield();
+	smp_rmb();
+
 	if (lcfg->lcfg_bufcount >= 2 && LUSTRE_CFG_BUFLEN(lcfg, 1) > 0) {
 		for (flag = lustre_cfg_string(lcfg, 1); *flag != 0; flag++)
 			switch (*flag) {
@@ -526,11 +513,6 @@ void class_decref(struct obd_device *obd, const char *scope, const void *source)
 				CERROR("Cleanup %s returned %d\n",
 				       obd->obd_name, err);
 		}
-		if (OBP(obd, detach)) {
-			err = OBP(obd, detach)(obd);
-			if (err)
-				CERROR("Detach returned %d\n", err);
-		}
 		class_release_dev(obd);
 	}
 }
@@ -756,7 +738,7 @@ static int process_param2_config(struct lustre_cfg *lcfg)
 	}
 
 	start = ktime_get();
-	rc = call_usermodehelper(argv[0], argv, NULL, 1);
+	rc = call_usermodehelper(argv[0], argv, NULL, UMH_WAIT_PROC);
 	end = ktime_get();
 
 	if (rc < 0) {
@@ -1026,7 +1008,7 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 
 					oldfs = get_fs();
 					set_fs(KERNEL_DS);
-					rc = (var->fops->write)(&fakefile, sval,
+					rc = var->fops->write(&fakefile, sval,
 								vallen, NULL);
 					set_fs(oldfs);
 				}
@@ -1060,8 +1042,6 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars,
 }
 EXPORT_SYMBOL(class_process_proc_param);
 
-extern int lustre_check_exclusion(struct super_block *sb, char *svname);
-
 /** Parse a configuration llog, doing various manipulations on them
  * for various reasons, (modifications for compatibility, skip obsolete
  * records, change uuids, etc), then class_process_config() resulting
@@ -1317,33 +1297,33 @@ static int class_config_parse_rec(struct llog_rec_hdr *rec, char *buf,
 	if (rc < 0)
 		return rc;
 
-	ptr += snprintf(ptr, end-ptr, "cmd=%05x ", lcfg->lcfg_command);
+	ptr += snprintf(ptr, end - ptr, "cmd=%05x ", lcfg->lcfg_command);
 	if (lcfg->lcfg_flags)
-		ptr += snprintf(ptr, end-ptr, "flags=%#08x ",
+		ptr += snprintf(ptr, end - ptr, "flags=%#08x ",
 				lcfg->lcfg_flags);
 
 	if (lcfg->lcfg_num)
-		ptr += snprintf(ptr, end-ptr, "num=%#08x ", lcfg->lcfg_num);
+		ptr += snprintf(ptr, end - ptr, "num=%#08x ", lcfg->lcfg_num);
 
 	if (lcfg->lcfg_nid) {
 		char nidstr[LNET_NIDSTR_SIZE];
 
 		libcfs_nid2str_r(lcfg->lcfg_nid, nidstr, sizeof(nidstr));
-		ptr += snprintf(ptr, end-ptr, "nid=%s(%#llx)\n     ",
+		ptr += snprintf(ptr, end - ptr, "nid=%s(%#llx)\n     ",
 				nidstr, lcfg->lcfg_nid);
 	}
 
 	if (lcfg->lcfg_command == LCFG_MARKER) {
 		struct cfg_marker *marker = lustre_cfg_buf(lcfg, 1);
 
-		ptr += snprintf(ptr, end-ptr, "marker=%d(%#x)%s '%s'",
+		ptr += snprintf(ptr, end - ptr, "marker=%d(%#x)%s '%s'",
 				marker->cm_step, marker->cm_flags,
 				marker->cm_tgtname, marker->cm_comment);
 	} else {
 		int i;
 
 		for (i = 0; i <  lcfg->lcfg_bufcount; i++) {
-			ptr += snprintf(ptr, end-ptr, "%d:%s  ", i,
+			ptr += snprintf(ptr, end - ptr, "%d:%s  ", i,
 					lustre_cfg_string(lcfg, i));
 		}
 	}
diff --git a/drivers/staging/lustre/lustre/obdclass/obd_mount.c b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
index aa84a50e9904..0d3a3b05a637 100644
--- a/drivers/staging/lustre/lustre/obdclass/obd_mount.c
+++ b/drivers/staging/lustre/lustre/obdclass/obd_mount.c
@@ -37,11 +37,11 @@
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
+#define D_MOUNT (D_SUPER | D_CONFIG/*|D_WARNING */)
 #define PRINT_CMD CDEBUG
 
 #include "../include/obd.h"
-#include "../include/linux/lustre_compat25.h"
+#include "../include/lustre_compat.h"
 #include "../include/obd_class.h"
 #include "../include/lustre/lustre_user.h"
 #include "../include/lustre_log.h"
@@ -68,7 +68,7 @@ static void (*kill_super_cb)(struct super_block *sb);
  *   this log, and is added to the mgc's list of logs to follow.
  */
 int lustre_process_log(struct super_block *sb, char *logname,
-		      struct config_llog_instance *cfg)
+		       struct config_llog_instance *cfg)
 {
 	struct lustre_cfg *lcfg;
 	struct lustre_cfg_bufs *bufs;
@@ -384,17 +384,15 @@ int lustre_start_mgc(struct super_block *sb)
 				  OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
 				  OBD_CONNECT_LVB_TYPE;
 
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
+#if OBD_OCD_VERSION(3, 0, 53, 0) > LUSTRE_VERSION_CODE
 	data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
 #endif
 
 	if (lmd_is_client(lsi->lsi_lmd) &&
 	    lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
 		data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
 	data->ocd_version = LUSTRE_VERSION_CODE;
-	rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
+	rc = obd_connect(NULL, &exp, obd, &obd->obd_uuid, data, NULL);
 	if (rc) {
 		CERROR("connect failed %d\n", rc);
 		goto out;
@@ -670,7 +668,6 @@ int lustre_common_put_super(struct super_block *sb)
 	}
 	/* Drop a ref to the mounted disk */
 	lustre_put_lsi(sb);
-	lu_types_stop();
 	return rc;
 }
 EXPORT_SYMBOL(lustre_common_put_super);
@@ -731,7 +728,7 @@ int lustre_check_exclusion(struct super_block *sb, char *svname)
 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
 {
 	const char *s1 = ptr, *s2;
-	__u32 index, *exclude_list;
+	__u32 index = 0, *exclude_list;
 	int rc = 0, devmax;
 
 	/* The shortest an ost name can be is 8 chars: -OST0000.
@@ -758,7 +755,7 @@ static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
 			exclude_list[lmd->lmd_exclude_count++] = index;
 		else
 			CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
-			       (uint)(s2-s1), s1, rc);
+			       (uint)(s2 - s1), s1, rc);
 		s1 = s2;
 		/* now we are pointing at ':' (next exclude)
 		 * or ',' (end of excludes)
@@ -880,7 +877,7 @@ static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
  */
 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
 {
-	char *s1, *s2, *devname = NULL;
+	char *s1, *s2, *s3, *devname = NULL;
 	struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
 	int rc = 0;
 
@@ -913,6 +910,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
 		/* Skip whitespace and extra commas */
 		while (*s1 == ' ' || *s1 == ',')
 			s1++;
+		s3 = s1;
 
 		/* Client options are parsed in ll_options: eg. flock,
 		 * user_xattr, acl
@@ -970,6 +968,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
 			rc = lmd_parse_mgssec(lmd, s1 + 7);
 			if (rc)
 				goto invalid;
+			s3 = s2;
 			clear++;
 		/* ost exclusion list */
 		} else if (strncmp(s1, "exclude=", 8) == 0) {
@@ -990,10 +989,19 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
 			size_t length, params_length;
 			char *tail = strchr(s1 + 6, ',');
 
-			if (!tail)
+			if (!tail) {
 				length = strlen(s1);
-			else
-				length = tail - s1;
+			} else {
+				lnet_nid_t nid;
+				char *param_str = tail + 1;
+				int supplementary = 1;
+
+				while (!class_parse_nid_quiet(param_str, &nid,
+							      &param_str)) {
+					supplementary = 0;
+				}
+				length = param_str - s1 - supplementary;
+			}
 			length -= 6;
 			params_length = strlen(lmd->lmd_params);
 			if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
@@ -1001,6 +1009,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
 			strncat(lmd->lmd_params, s1 + 6, length);
 			lmd->lmd_params[params_length + length] = '\0';
 			strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
+			s3 = s1 + 6 + length;
 			clear++;
 		} else if (strncmp(s1, "osd=", 4) == 0) {
 			rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
@@ -1097,7 +1106,7 @@ static int lustre_fill_super(struct super_block *sb, void *data, int silent)
 	struct lustre_sb_info *lsi;
 	int rc;
 
-	CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
+	CDEBUG(D_MOUNT | D_VFSTRACE, "VFS Op: sb %p\n", sb);
 
 	lsi = lustre_init_lsi(sb);
 	if (!lsi)
@@ -1133,7 +1142,7 @@ static int lustre_fill_super(struct super_block *sb, void *data, int silent)
 		} else {
 			rc = lustre_start_mgc(sb);
 			if (rc) {
-				lustre_put_lsi(sb);
+				lustre_common_put_super(sb);
 				goto out;
 			}
 			/* Connect and start */
diff --git a/drivers/staging/lustre/lustre/obdclass/obdo.c b/drivers/staging/lustre/lustre/obdclass/obdo.c
index 8583a4a8c206..79104a66da96 100644
--- a/drivers/staging/lustre/lustre/obdclass/obdo.c
+++ b/drivers/staging/lustre/lustre/obdclass/obdo.c
@@ -112,7 +112,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, u32 valid)
 }
 EXPORT_SYMBOL(obdo_from_inode);
 
-void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj)
+void obdo_to_ioobj(const struct obdo *oa, struct obd_ioobj *ioobj)
 {
 	ioobj->ioo_oid = oa->o_oi;
 	if (unlikely(!(oa->o_valid & OBD_MD_FLGROUP)))
@@ -125,7 +125,8 @@ void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj)
 }
 EXPORT_SYMBOL(obdo_to_ioobj);
 
-static void iattr_from_obdo(struct iattr *attr, struct obdo *oa, u32 valid)
+static void iattr_from_obdo(struct iattr *attr, const struct obdo *oa,
+			    u32 valid)
 {
 	valid &= oa->o_valid;
 
@@ -152,12 +153,14 @@ static void iattr_from_obdo(struct iattr *attr, struct obdo *oa, u32 valid)
 	}
 #if 0   /* you shouldn't be able to change a file's type with setattr */
 	if (valid & OBD_MD_FLTYPE) {
-		attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
+		attr->ia_mode = (attr->ia_mode & ~S_IFMT) |
+				(oa->o_mode & S_IFMT);
 		attr->ia_valid |= ATTR_MODE;
 	}
 #endif
 	if (valid & OBD_MD_FLMODE) {
-		attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
+		attr->ia_mode = (attr->ia_mode & S_IFMT) |
+				(oa->o_mode & ~S_IFMT);
 		attr->ia_valid |= ATTR_MODE;
 		if (!in_group_p(make_kgid(&init_user_ns, oa->o_gid)) &&
 		    !capable(CFS_CAP_FSETID))
@@ -173,7 +176,7 @@ static void iattr_from_obdo(struct iattr *attr, struct obdo *oa, u32 valid)
 	}
 }
 
-void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, u32 valid)
+void md_from_obdo(struct md_op_data *op_data, const struct obdo *oa, u32 valid)
 {
 	iattr_from_obdo(&op_data->op_attr, oa, valid);
 	if (valid & OBD_MD_FLBLOCKS) {
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index 5b29c4a44fe5..505582ff4d1e 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -41,6 +41,7 @@
 #include "../include/cl_object.h"
 #include "../include/lustre_fid.h"
 #include "../include/lustre_acl.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_net.h"
 
 #include "echo_internal.h"
@@ -64,14 +65,14 @@ struct echo_object {
 
 	struct echo_device     *eo_dev;
 	struct list_head	      eo_obj_chain;
-	struct lov_stripe_md   *eo_lsm;
+	struct lov_oinfo       *eo_oinfo;
 	atomic_t	    eo_npages;
 	int		     eo_deleted;
 };
 
 struct echo_object_conf {
 	struct cl_object_conf  eoc_cl;
-	struct lov_stripe_md **eoc_md;
+	struct lov_oinfo      **eoc_oinfo;
 };
 
 struct echo_page {
@@ -152,9 +153,6 @@ struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
 }
 
 /** @} echo_helpers */
-
-static struct echo_object *cl_echo_object_find(struct echo_device *d,
-					       struct lov_stripe_md **lsm);
 static int cl_echo_object_put(struct echo_object *eco);
 static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 			      struct page **pages, int npages, int async);
@@ -413,10 +411,13 @@ static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
 	cconf = lu2cl_conf(conf);
 	econf = cl2echo_conf(cconf);
 
-	LASSERT(econf->eoc_md);
-	eco->eo_lsm = *econf->eoc_md;
-	/* clear the lsm pointer so that it won't get freed. */
-	*econf->eoc_md = NULL;
+	LASSERT(econf->eoc_oinfo);
+	/*
+	 * Transfer the oinfo pointer to eco that it won't be
+	 * freed.
+	 */
+	eco->eo_oinfo = *econf->eoc_oinfo;
+	*econf->eoc_oinfo = NULL;
 
 	eco->eo_dev = ed;
 	atomic_set(&eco->eo_npages, 0);
@@ -429,52 +430,6 @@ static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
 	return 0;
 }
 
-/* taken from osc_unpackmd() */
-static int echo_alloc_memmd(struct echo_device *ed,
-			    struct lov_stripe_md **lsmp)
-{
-	int lsm_size;
-
-	/* If export is lov/osc then use their obd method */
-	if (ed->ed_next)
-		return obd_alloc_memmd(ed->ed_ec->ec_exp, lsmp);
-	/* OFD has no unpackmd method, do everything here */
-	lsm_size = lov_stripe_md_size(1);
-
-	LASSERT(!*lsmp);
-	*lsmp = kzalloc(lsm_size, GFP_NOFS);
-	if (!*lsmp)
-		return -ENOMEM;
-
-	(*lsmp)->lsm_oinfo[0] = kzalloc(sizeof(struct lov_oinfo), GFP_NOFS);
-	if (!(*lsmp)->lsm_oinfo[0]) {
-		kfree(*lsmp);
-		return -ENOMEM;
-	}
-
-	loi_init((*lsmp)->lsm_oinfo[0]);
-	(*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-	ostid_set_seq_echo(&(*lsmp)->lsm_oi);
-
-	return lsm_size;
-}
-
-static int echo_free_memmd(struct echo_device *ed, struct lov_stripe_md **lsmp)
-{
-	int lsm_size;
-
-	/* If export is lov/osc then use their obd method */
-	if (ed->ed_next)
-		return obd_free_memmd(ed->ed_ec->ec_exp, lsmp);
-	/* OFD has no unpackmd method, do everything here */
-	lsm_size = lov_stripe_md_size(1);
-
-	kfree((*lsmp)->lsm_oinfo[0]);
-	kfree(*lsmp);
-	*lsmp = NULL;
-	return 0;
-}
-
 static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
 {
 	struct echo_object *eco    = cl2echo_obj(lu2cl(obj));
@@ -489,8 +444,7 @@ static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
 	lu_object_fini(obj);
 	lu_object_header_fini(obj->lo_header);
 
-	if (eco->eo_lsm)
-		echo_free_memmd(eco->eo_dev, &eco->eo_lsm);
+	kfree(eco->eo_oinfo);
 	kmem_cache_free(echo_object_kmem, eco);
 }
 
@@ -864,25 +818,21 @@ static struct lu_device_type echo_device_type = {
  */
 
 /* Interfaces to echo client obd device */
-static struct echo_object *cl_echo_object_find(struct echo_device *d,
-					       struct lov_stripe_md **lsmp)
+static struct echo_object *
+cl_echo_object_find(struct echo_device *d, const struct ost_id *oi)
 {
 	struct lu_env *env;
 	struct echo_thread_info *info;
 	struct echo_object_conf *conf;
-	struct lov_stripe_md    *lsm;
+	struct lov_oinfo *oinfo = NULL;
 	struct echo_object *eco;
 	struct cl_object   *obj;
 	struct lu_fid *fid;
 	int refcheck;
 	int rc;
 
-	LASSERT(lsmp);
-	lsm = *lsmp;
-	LASSERT(lsm);
-	LASSERTF(ostid_id(&lsm->lsm_oi) != 0, DOSTID"\n", POSTID(&lsm->lsm_oi));
-	LASSERTF(ostid_seq(&lsm->lsm_oi) == FID_SEQ_ECHO, DOSTID"\n",
-		 POSTID(&lsm->lsm_oi));
+	LASSERTF(ostid_id(oi), DOSTID "\n", POSTID(oi));
+	LASSERTF(ostid_seq(oi) == FID_SEQ_ECHO, DOSTID "\n", POSTID(oi));
 
 	/* Never return an object if the obd is to be freed. */
 	if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
@@ -895,16 +845,24 @@ static struct echo_object *cl_echo_object_find(struct echo_device *d,
 	info = echo_env_info(env);
 	conf = &info->eti_conf;
 	if (d->ed_next) {
-		struct lov_oinfo *oinfo = lsm->lsm_oinfo[0];
+		oinfo = kzalloc(sizeof(*oinfo), GFP_NOFS);
+		if (!oinfo) {
+			eco = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 
-		LASSERT(oinfo);
-		oinfo->loi_oi = lsm->lsm_oi;
+		oinfo->loi_oi = *oi;
 		conf->eoc_cl.u.coc_oinfo = oinfo;
 	}
-	conf->eoc_md = lsmp;
+
+	/*
+	 * If echo_object_init() is successful then ownership of oinfo
+	 * is transferred to the object.
+	 */
+	conf->eoc_oinfo = &oinfo;
 
 	fid  = &info->eti_fid;
-	rc = ostid_to_fid(fid, &lsm->lsm_oi, 0);
+	rc = ostid_to_fid(fid, (struct ost_id *)oi, 0);
 	if (rc != 0) {
 		eco = ERR_PTR(rc);
 		goto out;
@@ -927,6 +885,7 @@ static struct echo_object *cl_echo_object_find(struct echo_device *d,
 	}
 
 out:
+	kfree(oinfo);
 	cl_env_put(env, &refcheck);
 	return eco;
 }
@@ -1051,7 +1010,7 @@ static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
 	struct cl_io	    *io;
 	struct cl_page	  *clp;
 	struct lustre_handle    lh = { 0 };
-	int page_size = cl_page_size(obj);
+	size_t page_size = cl_page_size(obj);
 	int refcheck;
 	int rc;
 	int i;
@@ -1145,7 +1104,6 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
 {
 	struct echo_object     *eco;
 	struct echo_client_obd *ec = ed->ed_ec;
-	struct lov_stripe_md   *lsm = NULL;
 	int		     rc;
 	int		     created = 0;
 
@@ -1156,30 +1114,19 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
 		return -EINVAL;
 	}
 
-	rc = echo_alloc_memmd(ed, &lsm);
-	if (rc < 0) {
-		CERROR("Cannot allocate md: rc = %d\n", rc);
-		goto failed;
-	}
-
-	/* setup object ID here */
-	lsm->lsm_oi = oa->o_oi;
+	if (!ostid_id(&oa->o_oi))
+		ostid_set_id(&oa->o_oi, ++last_object_id);
 
-	if (ostid_id(&lsm->lsm_oi) == 0)
-		ostid_set_id(&lsm->lsm_oi, ++last_object_id);
-
-	rc = obd_create(env, ec->ec_exp, oa, &lsm, oti);
+	rc = obd_create(env, ec->ec_exp, oa, oti);
 	if (rc != 0) {
 		CERROR("Cannot create objects: rc = %d\n", rc);
 		goto failed;
 	}
 	created = 1;
 
-	/* See what object ID we were given */
-	oa->o_oi = lsm->lsm_oi;
 	oa->o_valid |= OBD_MD_FLID;
 
-	eco = cl_echo_object_find(ed, &lsm);
+	eco = cl_echo_object_find(ed, &oa->o_oi);
 	if (IS_ERR(eco)) {
 		rc = PTR_ERR(eco);
 		goto failed;
@@ -1190,9 +1137,7 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
 
  failed:
 	if (created && rc)
-		obd_destroy(env, ec->ec_exp, oa, lsm, oti, NULL);
-	if (lsm)
-		echo_free_memmd(ed, &lsm);
+		obd_destroy(env, ec->ec_exp, oa, oti);
 	if (rc)
 		CERROR("create object failed with: rc = %d\n", rc);
 	return rc;
@@ -1201,32 +1146,21 @@ static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
 static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
 			   struct obdo *oa)
 {
-	struct lov_stripe_md   *lsm = NULL;
 	struct echo_object     *eco;
 	int		     rc;
 
-	if ((oa->o_valid & OBD_MD_FLID) == 0 || ostid_id(&oa->o_oi) == 0) {
-		/* disallow use of object id 0 */
-		CERROR("No valid oid\n");
+	if (!(oa->o_valid & OBD_MD_FLID) || !(oa->o_valid & OBD_MD_FLGROUP) ||
+	    !ostid_id(&oa->o_oi)) {
+		CERROR("invalid oid " DOSTID "\n", POSTID(&oa->o_oi));
 		return -EINVAL;
 	}
 
-	rc = echo_alloc_memmd(ed, &lsm);
-	if (rc < 0)
-		return rc;
-
-	lsm->lsm_oi = oa->o_oi;
-	if (!(oa->o_valid & OBD_MD_FLGROUP))
-		ostid_set_seq_echo(&lsm->lsm_oi);
-
 	rc = 0;
-	eco = cl_echo_object_find(ed, &lsm);
+	eco = cl_echo_object_find(ed, &oa->o_oi);
 	if (!IS_ERR(eco))
 		*ecop = eco;
 	else
 		rc = PTR_ERR(eco);
-	if (lsm)
-		echo_free_memmd(ed, &lsm);
 	return rc;
 }
 
@@ -1436,13 +1370,12 @@ static int echo_client_prep_commit(const struct lu_env *env,
 			npages = tot_pages;
 
 		for (i = 0; i < npages; i++, off += PAGE_SIZE) {
-			rnb[i].offset = off;
-			rnb[i].len = PAGE_SIZE;
-			rnb[i].flags = brw_flags;
+			rnb[i].rnb_offset = off;
+			rnb[i].rnb_len = PAGE_SIZE;
+			rnb[i].rnb_flags = brw_flags;
 		}
 
 		ioo.ioo_bufcnt = npages;
-		oti->oti_transno = 0;
 
 		lpages = npages;
 		ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
@@ -1452,14 +1385,14 @@ static int echo_client_prep_commit(const struct lu_env *env,
 		LASSERT(lpages == npages);
 
 		for (i = 0; i < lpages; i++) {
-			struct page *page = lnb[i].page;
+			struct page *page = lnb[i].lnb_page;
 
 			/* read past eof? */
-			if (!page  && lnb[i].rc == 0)
+			if (!page  && lnb[i].lnb_rc == 0)
 				continue;
 
 			if (async)
-				lnb[i].flags |= OBD_BRW_ASYNC;
+				lnb[i].lnb_flags |= OBD_BRW_ASYNC;
 
 			if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID ||
 			    (oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
@@ -1469,13 +1402,13 @@ static int echo_client_prep_commit(const struct lu_env *env,
 			if (rw == OBD_BRW_WRITE)
 				echo_client_page_debug_setup(page, rw,
 							    ostid_id(&oa->o_oi),
-							     rnb[i].offset,
-							     rnb[i].len);
+							     rnb[i].rnb_offset,
+							     rnb[i].rnb_len);
 			else
 				echo_client_page_debug_check(page,
 							    ostid_id(&oa->o_oi),
-							     rnb[i].offset,
-							     rnb[i].len);
+							     rnb[i].rnb_offset,
+							     rnb[i].rnb_len);
 		}
 
 		ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
@@ -1613,8 +1546,7 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 
 		rc = echo_get_object(&eco, ed, oa);
 		if (rc == 0) {
-			rc = obd_destroy(env, ec->ec_exp, oa, NULL,
-					 &dummy_oti, NULL);
+			rc = obd_destroy(env, ec->ec_exp, oa, &dummy_oti);
 			if (rc == 0)
 				eco->eo_deleted = 1;
 			echo_put_object(eco);
diff --git a/drivers/staging/lustre/lustre/obdecho/echo_internal.h b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
index f5034a253f6d..966414fd5424 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_internal.h
+++ b/drivers/staging/lustre/lustre/obdecho/echo_internal.h
@@ -33,9 +33,9 @@
 
 /* The persistent object (i.e. actually stores stuff!) */
 #define ECHO_PERSISTENT_OBJID    1ULL
-#define ECHO_PERSISTENT_SIZE     ((__u64)(1<<20))
+#define ECHO_PERSISTENT_SIZE     ((__u64)(1 << 20))
 
 /* block size to use for data verification */
-#define OBD_ECHO_BLOCK_SIZE	(4<<10)
+#define OBD_ECHO_BLOCK_SIZE	(4 << 10)
 
 #endif
diff --git a/drivers/staging/lustre/lustre/osc/lproc_osc.c b/drivers/staging/lustre/lustre/osc/lproc_osc.c
index 7e83d395b998..f0062d44ee03 100644
--- a/drivers/staging/lustre/lustre/osc/lproc_osc.c
+++ b/drivers/staging/lustre/lustre/osc/lproc_osc.c
@@ -119,6 +119,7 @@ static ssize_t max_rpcs_in_flight_store(struct kobject *kobj,
 
 	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_rpcs_in_flight = val;
+	client_adjust_max_dirty(cli);
 	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
@@ -136,10 +137,10 @@ static ssize_t max_dirty_mb_show(struct kobject *kobj,
 	int mult;
 
 	spin_lock(&cli->cl_loi_list_lock);
-	val = cli->cl_dirty_max;
+	val = cli->cl_dirty_max_pages;
 	spin_unlock(&cli->cl_loi_list_lock);
 
-	mult = 1 << 20;
+	mult = 1 << (20 - PAGE_SHIFT);
 	return lprocfs_read_frac_helper(buf, PAGE_SIZE, val, mult);
 }
 
@@ -166,7 +167,7 @@ static ssize_t max_dirty_mb_store(struct kobject *kobj,
 		return -ERANGE;
 
 	spin_lock(&cli->cl_loi_list_lock);
-	cli->cl_dirty_max = (u32)(pages_number << PAGE_SHIFT);
+	cli->cl_dirty_max_pages = pages_number;
 	osc_wake_cache_waiters(cli);
 	spin_unlock(&cli->cl_loi_list_lock);
 
@@ -181,11 +182,11 @@ static int osc_cached_mb_seq_show(struct seq_file *m, void *v)
 	int shift = 20 - PAGE_SHIFT;
 
 	seq_printf(m,
-		   "used_mb: %d\n"
-		   "busy_cnt: %d\n",
-		   (atomic_read(&cli->cl_lru_in_list) +
-		    atomic_read(&cli->cl_lru_busy)) >> shift,
-		   atomic_read(&cli->cl_lru_busy));
+		   "used_mb: %ld\n"
+		   "busy_cnt: %ld\n",
+		   (atomic_long_read(&cli->cl_lru_in_list) +
+		    atomic_long_read(&cli->cl_lru_busy)) >> shift,
+		   atomic_long_read(&cli->cl_lru_busy));
 
 	return 0;
 }
@@ -197,8 +198,10 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
 {
 	struct obd_device *dev = ((struct seq_file *)file->private_data)->private;
 	struct client_obd *cli = &dev->u.cli;
-	int pages_number, mult, rc;
+	long pages_number, rc;
 	char kernbuf[128];
+	int mult;
+	u64 val;
 
 	if (count >= sizeof(kernbuf))
 		return -EINVAL;
@@ -210,14 +213,18 @@ static ssize_t osc_cached_mb_seq_write(struct file *file,
 	mult = 1 << (20 - PAGE_SHIFT);
 	buffer += lprocfs_find_named_value(kernbuf, "used_mb:", &count) -
 		  kernbuf;
-	rc = lprocfs_write_frac_helper(buffer, count, &pages_number, mult);
+	rc = lprocfs_write_frac_u64_helper(buffer, count, &val, mult);
 	if (rc)
 		return rc;
 
+	if (val > LONG_MAX)
+		return -ERANGE;
+	pages_number = (long)val;
+
 	if (pages_number < 0)
 		return -ERANGE;
 
-	rc = atomic_read(&cli->cl_lru_in_list) - pages_number;
+	rc = atomic_long_read(&cli->cl_lru_in_list) - pages_number;
 	if (rc > 0) {
 		struct lu_env *env;
 		int refcheck;
@@ -244,7 +251,7 @@ static ssize_t cur_dirty_bytes_show(struct kobject *kobj,
 	int len;
 
 	spin_lock(&cli->cl_loi_list_lock);
-	len = sprintf(buf, "%lu\n", cli->cl_dirty);
+	len = sprintf(buf, "%lu\n", cli->cl_dirty_pages << PAGE_SHIFT);
 	spin_unlock(&cli->cl_loi_list_lock);
 
 	return len;
@@ -583,6 +590,7 @@ static ssize_t max_pages_per_rpc_store(struct kobject *kobj,
 	}
 	spin_lock(&cli->cl_loi_list_lock);
 	cli->cl_max_pages_per_rpc = val;
+	client_adjust_max_dirty(cli);
 	spin_unlock(&cli->cl_loi_list_lock);
 
 	return count;
@@ -596,13 +604,14 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
 	struct obd_device *dev = container_of(kobj, struct obd_device,
 					      obd_kobj);
 	struct client_obd *cli = &dev->u.cli;
-	int pages, mb;
+	long pages;
+	int mb;
 
-	pages = atomic_read(&cli->cl_unstable_count);
+	pages = atomic_long_read(&cli->cl_unstable_count);
 	mb = (pages * PAGE_SIZE) >> 20;
 
-	return sprintf(buf, "unstable_pages: %8d\n"
-		       "unstable_mb:    %8d\n", pages, mb);
+	return sprintf(buf, "unstable_pages: %20ld\n"
+		       "unstable_mb:              %10d\n", pages, mb);
 }
 LUSTRE_RO_ATTR(unstable_stats);
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_cache.c b/drivers/staging/lustre/lustre/osc/osc_cache.c
index d011135802d5..4bbe219add98 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cache.c
+++ b/drivers/staging/lustre/lustre/osc/osc_cache.c
@@ -44,7 +44,7 @@ static int extent_debug; /* set it to be true for more debug */
 
 static void osc_update_pending(struct osc_object *obj, int cmd, int delta);
 static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
-			   int state);
+			   enum osc_extent_state state);
 static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 			      struct osc_async_page *oap, int sent, int rc);
 static int osc_make_ready(const struct lu_env *env, struct osc_async_page *oap,
@@ -177,7 +177,7 @@ static int osc_extent_sanity_check0(struct osc_extent *ext,
 {
 	struct osc_object *obj = ext->oe_obj;
 	struct osc_async_page *oap;
-	int page_count;
+	size_t page_count;
 	int rc = 0;
 
 	if (!osc_object_is_locked(obj)) {
@@ -632,7 +632,7 @@ static inline int overlapped(struct osc_extent *ex1, struct osc_extent *ex2)
  */
 static struct osc_extent *osc_extent_find(const struct lu_env *env,
 					  struct osc_object *obj, pgoff_t index,
-					  int *grants)
+					  unsigned int *grants)
 {
 	struct client_obd *cli = osc_cli(obj);
 	struct osc_lock   *olck;
@@ -643,10 +643,10 @@ static struct osc_extent *osc_extent_find(const struct lu_env *env,
 	struct osc_extent *found = NULL;
 	pgoff_t chunk;
 	pgoff_t max_end;
-	int max_pages; /* max_pages_per_rpc */
-	int chunksize;
+	unsigned int max_pages; /* max_pages_per_rpc */
+	unsigned int chunksize;
 	int ppc_bits; /* pages per chunk bits */
-	int chunk_mask;
+	pgoff_t chunk_mask;
 	int rc;
 
 	cur = osc_extent_alloc(obj);
@@ -700,8 +700,8 @@ restart:
 	if (!ext)
 		ext = first_extent(obj);
 	while (ext) {
-		loff_t ext_chk_start = ext->oe_start >> ppc_bits;
-		loff_t ext_chk_end = ext->oe_end >> ppc_bits;
+		pgoff_t ext_chk_start = ext->oe_start >> ppc_bits;
+		pgoff_t ext_chk_end = ext->oe_end >> ppc_bits;
 
 		LASSERT(sanity_check_nolock(ext) == 0);
 		if (chunk > ext_chk_end + 1)
@@ -913,7 +913,7 @@ int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
 	return 0;
 }
 
-static int extent_wait_cb(struct osc_extent *ext, int state)
+static int extent_wait_cb(struct osc_extent *ext, enum osc_extent_state state)
 {
 	int ret;
 
@@ -928,7 +928,7 @@ static int extent_wait_cb(struct osc_extent *ext, int state)
  * Wait for the extent's state to become @state.
  */
 static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
-			   int state)
+			   enum osc_extent_state state)
 {
 	struct osc_object *obj = ext->oe_obj;
 	struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
@@ -958,8 +958,8 @@ static int osc_extent_wait(const struct lu_env *env, struct osc_extent *ext,
 	rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state), &lwi);
 	if (rc == -ETIMEDOUT) {
 		OSC_EXTENT_DUMP(D_ERROR, ext,
-			"%s: wait ext to %d timedout, recovery in progress?\n",
-			osc_export(obj)->exp_obd->obd_name, state);
+				"%s: wait ext to %u timedout, recovery in progress?\n",
+				osc_export(obj)->exp_obd->obd_name, state);
 
 		lwi = LWI_INTR(NULL, NULL);
 		rc = l_wait_event(ext->oe_waitq, extent_wait_cb(ext, state),
@@ -1099,7 +1099,7 @@ static int osc_extent_make_ready(const struct lu_env *env,
 	struct osc_async_page *oap;
 	struct osc_async_page *last = NULL;
 	struct osc_object *obj = ext->oe_obj;
-	int page_count = 0;
+	unsigned int page_count = 0;
 	int rc;
 
 	/* we're going to grab page lock, so object lock must not be taken. */
@@ -1140,9 +1140,11 @@ static int osc_extent_make_ready(const struct lu_env *env,
 	 * the size of file.
 	 */
 	if (!(last->oap_async_flags & ASYNC_COUNT_STABLE)) {
-		last->oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
-		LASSERT(last->oap_count > 0);
-		LASSERT(last->oap_page_off + last->oap_count <= PAGE_SIZE);
+		int last_oap_count = osc_refresh_count(env, last, OBD_BRW_WRITE);
+
+		LASSERT(last_oap_count > 0);
+		LASSERT(last->oap_page_off + last_oap_count <= PAGE_SIZE);
+		last->oap_count = last_oap_count;
 		spin_lock(&last->oap_lock);
 		last->oap_async_flags |= ASYNC_COUNT_STABLE;
 		spin_unlock(&last->oap_lock);
@@ -1174,7 +1176,8 @@ static int osc_extent_make_ready(const struct lu_env *env,
  * called to expand the extent for the same IO. To expand the extent, the
  * page index must be in the same or next chunk of ext->oe_end.
  */
-static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, int *grants)
+static int osc_extent_expand(struct osc_extent *ext, pgoff_t index,
+			     unsigned int *grants)
 {
 	struct osc_object *obj = ext->oe_obj;
 	struct client_obd *cli = osc_cli(obj);
@@ -1183,7 +1186,7 @@ static int osc_extent_expand(struct osc_extent *ext, pgoff_t index, int *grants)
 	pgoff_t chunk = index >> ppc_bits;
 	pgoff_t end_chunk;
 	pgoff_t end_index;
-	int chunksize = 1 << cli->cl_chunkbits;
+	unsigned int chunksize = 1 << cli->cl_chunkbits;
 	int rc = 0;
 
 	LASSERT(ext->oe_max_end >= index && ext->oe_start <= index);
@@ -1361,7 +1364,7 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 	if (rc == 0 && srvlock) {
 		struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev;
 		struct osc_stats *stats = &lu2osc_dev(ld)->od_stats;
-		int bytes = oap->oap_count;
+		size_t bytes = oap->oap_count;
 
 		if (crt == CRT_READ)
 			stats->os_lockless_reads += bytes;
@@ -1383,18 +1386,16 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
 
 #define OSC_DUMP_GRANT(lvl, cli, fmt, args...) do {			      \
 	struct client_obd *__tmp = (cli);				      \
-	CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d "	      \
-	       "unstable_pages: %d/%d dropped: %ld avail: %ld, "	      \
-	       "reserved: %ld, flight: %d } lru {in list: %d, "		      \
-	       "left: %d, waiters: %d }" fmt,                                 \
+	CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %ld/%lu "	      \
+	       "dropped: %ld avail: %ld, reserved: %ld, flight: %d }"	      \
+	       "lru {in list: %ld, left: %ld, waiters: %d }" fmt "\n",	      \
 	       __tmp->cl_import->imp_obd->obd_name,			      \
-	       __tmp->cl_dirty, __tmp->cl_dirty_max,			      \
-	       atomic_read(&obd_dirty_pages), obd_max_dirty_pages,	      \
-	       atomic_read(&obd_unstable_pages), obd_max_dirty_pages,	      \
+	       __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages,	      \
+	       atomic_long_read(&obd_dirty_pages), obd_max_dirty_pages,	      \
 	       __tmp->cl_lost_grant, __tmp->cl_avail_grant,		      \
 	       __tmp->cl_reserved_grant, __tmp->cl_w_in_flight,		      \
-	       atomic_read(&__tmp->cl_lru_in_list),			      \
-	       atomic_read(&__tmp->cl_lru_busy),			      \
+	       atomic_long_read(&__tmp->cl_lru_in_list),		      \
+	       atomic_long_read(&__tmp->cl_lru_busy),			      \
 	       atomic_read(&__tmp->cl_lru_shrinkers), ##args);		      \
 } while (0)
 
@@ -1404,8 +1405,8 @@ static void osc_consume_write_grant(struct client_obd *cli,
 {
 	assert_spin_locked(&cli->cl_loi_list_lock);
 	LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
-	atomic_inc(&obd_dirty_pages);
-	cli->cl_dirty += PAGE_SIZE;
+	atomic_long_inc(&obd_dirty_pages);
+	cli->cl_dirty_pages++;
 	pga->flag |= OBD_BRW_FROM_GRANT;
 	CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
 	       PAGE_SIZE, pga, pga->pg);
@@ -1424,12 +1425,12 @@ static void osc_release_write_grant(struct client_obd *cli,
 	}
 
 	pga->flag &= ~OBD_BRW_FROM_GRANT;
-	atomic_dec(&obd_dirty_pages);
-	cli->cl_dirty -= PAGE_SIZE;
+	atomic_long_dec(&obd_dirty_pages);
+	cli->cl_dirty_pages--;
 	if (pga->flag & OBD_BRW_NOCACHE) {
 		pga->flag &= ~OBD_BRW_NOCACHE;
-		atomic_dec(&obd_dirty_transit_pages);
-		cli->cl_dirty_transit -= PAGE_SIZE;
+		atomic_long_dec(&obd_dirty_transit_pages);
+		cli->cl_dirty_transit--;
 	}
 }
 
@@ -1494,11 +1495,11 @@ static void osc_unreserve_grant(struct client_obd *cli,
 static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 			   unsigned int lost_grant)
 {
-	int grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+	unsigned long grant = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
 
 	spin_lock(&cli->cl_loi_list_lock);
-	atomic_sub(nr_pages, &obd_dirty_pages);
-	cli->cl_dirty -= nr_pages << PAGE_SHIFT;
+	atomic_long_sub(nr_pages, &obd_dirty_pages);
+	cli->cl_dirty_pages -= nr_pages;
 	cli->cl_lost_grant += lost_grant;
 	if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
 		/* borrow some grant from truncate to avoid the case that
@@ -1511,7 +1512,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 	spin_unlock(&cli->cl_loi_list_lock);
 	CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
 	       lost_grant, cli->cl_lost_grant,
-	       cli->cl_avail_grant, cli->cl_dirty);
+	       cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_SHIFT);
 }
 
 /**
@@ -1535,19 +1536,18 @@ static int osc_enter_cache_try(struct client_obd *cli,
 {
 	int rc;
 
-	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
+	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
 
 	rc = osc_reserve_grant(cli, bytes);
 	if (rc < 0)
 		return 0;
 
-	if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
-	    atomic_read(&obd_unstable_pages) + 1 +
-	    atomic_read(&obd_dirty_pages) <= obd_max_dirty_pages) {
+	if (cli->cl_dirty_pages <= cli->cl_dirty_max_pages &&
+	    atomic_long_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
 		osc_consume_write_grant(cli, &oap->oap_brw_page);
 		if (transient) {
-			cli->cl_dirty_transit += PAGE_SIZE;
-			atomic_inc(&obd_dirty_transit_pages);
+			cli->cl_dirty_transit++;
+			atomic_long_inc(&obd_dirty_transit_pages);
 			oap->oap_brw_flags |= OBD_BRW_NOCACHE;
 		}
 		rc = 1;
@@ -1581,11 +1581,13 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	struct osc_object *osc = oap->oap_obj;
 	struct lov_oinfo *loi = osc->oo_oinfo;
 	struct osc_cache_waiter ocw;
-	struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL,
-						  LWI_ON_SIGNAL_NOOP, NULL);
+	struct l_wait_info lwi;
 	int rc = -EDQUOT;
 
-	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes);
+	lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(AT_OFF ? obd_timeout : at_max),
+			       NULL, LWI_ON_SIGNAL_NOOP, NULL);
+
+	OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
 
 	spin_lock(&cli->cl_loi_list_lock);
 
@@ -1593,14 +1595,16 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	 * of queued writes and create a discontiguous rpc stream
 	 */
 	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
-	    cli->cl_dirty_max < PAGE_SIZE     ||
-	    cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) {
+	    !cli->cl_dirty_max_pages || cli->cl_ar.ar_force_sync ||
+	    loi->loi_ar.ar_force_sync) {
+		OSC_DUMP_GRANT(D_CACHE, cli, "forced sync i/o\n");
 		rc = -EDQUOT;
 		goto out;
 	}
 
 	/* Hopefully normal case - cache space and write credits available */
 	if (osc_enter_cache_try(cli, oap, bytes, 0)) {
+		OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n");
 		rc = 0;
 		goto out;
 	}
@@ -1615,7 +1619,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 	init_waitqueue_head(&ocw.ocw_waitq);
 	ocw.ocw_oap   = oap;
 	ocw.ocw_grant = bytes;
-	while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
+	while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) {
 		list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
 		ocw.ocw_rc = 0;
 		spin_unlock(&cli->cl_loi_list_lock);
@@ -1629,32 +1633,49 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 
 		spin_lock(&cli->cl_loi_list_lock);
 
-		/* l_wait_event is interrupted by signal, or timed out */
 		if (rc < 0) {
-			if (rc == -ETIMEDOUT) {
-				OSC_DUMP_GRANT(D_ERROR, cli,
-					       "try to reserve %d.\n", bytes);
-				osc_extent_tree_dump(D_ERROR, osc);
-				rc = -EDQUOT;
-			}
-
+			/* l_wait_event is interrupted by signal, or timed out */
 			list_del_init(&ocw.ocw_entry);
-			goto out;
+			break;
 		}
-
 		LASSERT(list_empty(&ocw.ocw_entry));
 		rc = ocw.ocw_rc;
 
 		if (rc != -EDQUOT)
-			goto out;
+			break;
 		if (osc_enter_cache_try(cli, oap, bytes, 0)) {
 			rc = 0;
-			goto out;
+			break;
 		}
 	}
+
+	switch (rc) {
+	case 0:
+		OSC_DUMP_GRANT(D_CACHE, cli, "finally got grant space\n");
+		break;
+	case -ETIMEDOUT:
+		OSC_DUMP_GRANT(D_CACHE, cli,
+			       "timeout, fall back to sync i/o\n");
+		osc_extent_tree_dump(D_CACHE, osc);
+		/* fall back to synchronous I/O */
+		rc = -EDQUOT;
+		break;
+	case -EINTR:
+		/* Ensures restartability - LU-3581 */
+		OSC_DUMP_GRANT(D_CACHE, cli, "interrupted\n");
+		rc = -ERESTARTSYS;
+		break;
+	case -EDQUOT:
+		OSC_DUMP_GRANT(D_CACHE, cli,
+			       "no grant space, fall back to sync i/o\n");
+		break;
+	default:
+		CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived due to %d, fall back to sync i/o\n",
+		       cli->cl_import->imp_obd->obd_name, &ocw, rc);
+		break;
+	}
 out:
 	spin_unlock(&cli->cl_loi_list_lock);
-	OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc);
 	return rc;
 }
 
@@ -1670,19 +1691,17 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
 		ocw->ocw_rc = -EDQUOT;
 		/* we can't dirty more */
-		if ((cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) ||
-		    (atomic_read(&obd_unstable_pages) + 1 +
-		     atomic_read(&obd_dirty_pages) > obd_max_dirty_pages)) {
-			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %d\n",
-			       cli->cl_dirty,
-			       cli->cl_dirty_max, obd_max_dirty_pages);
+		if ((cli->cl_dirty_pages > cli->cl_dirty_max_pages) ||
+		    (atomic_long_read(&obd_dirty_pages) + 1 >
+		     obd_max_dirty_pages)) {
+			CDEBUG(D_CACHE, "no dirty room: dirty: %ld osc max %ld, sys max %ld\n",
+			       cli->cl_dirty_pages, cli->cl_dirty_max_pages,
+			       obd_max_dirty_pages);
 			goto wakeup;
 		}
 
-		ocw->ocw_rc = 0;
-		if (!osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0))
-			ocw->ocw_rc = -EDQUOT;
-
+		if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0))
+			ocw->ocw_rc = 0;
 wakeup:
 		CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n",
 		       ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc);
@@ -1843,97 +1862,6 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid,
 		ar->ar_force_sync = 0;
 }
 
-/**
- * Performs "unstable" page accounting. This function balances the
- * increment operations performed in osc_inc_unstable_pages. It is
- * registered as the RPC request callback, and is executed when the
- * bulk RPC is committed on the server. Thus at this point, the pages
- * involved in the bulk transfer are no longer considered unstable.
- */
-void osc_dec_unstable_pages(struct ptlrpc_request *req)
-{
-	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
-	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
-	int page_count = desc->bd_iov_count;
-	int i;
-
-	/* No unstable page tracking */
-	if (!cli->cl_cache)
-		return;
-
-	LASSERT(page_count >= 0);
-
-	for (i = 0; i < page_count; i++)
-		dec_node_page_state(desc->bd_iov[i].kiov_page,
-							NR_UNSTABLE_NFS);
-
-	atomic_sub(page_count, &cli->cl_cache->ccc_unstable_nr);
-	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
-
-	atomic_sub(page_count, &cli->cl_unstable_count);
-	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
-
-	atomic_sub(page_count, &obd_unstable_pages);
-	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
-
-	spin_lock(&req->rq_lock);
-	req->rq_committed = 1;
-	req->rq_unstable  = 0;
-	spin_unlock(&req->rq_lock);
-
-	wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
-}
-
-/* "unstable" page accounting. See: osc_dec_unstable_pages. */
-void osc_inc_unstable_pages(struct ptlrpc_request *req)
-{
-	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
-	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
-	long page_count = desc->bd_iov_count;
-	int i;
-
-	/* No unstable page tracking */
-	if (!cli->cl_cache)
-		return;
-
-	LASSERT(page_count >= 0);
-
-	for (i = 0; i < page_count; i++)
-		inc_node_page_state(desc->bd_iov[i].kiov_page,
-							NR_UNSTABLE_NFS);
-
-	LASSERT(atomic_read(&cli->cl_cache->ccc_unstable_nr) >= 0);
-	atomic_add(page_count, &cli->cl_cache->ccc_unstable_nr);
-
-	LASSERT(atomic_read(&cli->cl_unstable_count) >= 0);
-	atomic_add(page_count, &cli->cl_unstable_count);
-
-	LASSERT(atomic_read(&obd_unstable_pages) >= 0);
-	atomic_add(page_count, &obd_unstable_pages);
-
-	spin_lock(&req->rq_lock);
-
-	/*
-	 * If the request has already been committed (i.e. brw_commit
-	 * called via rq_commit_cb), we need to undo the unstable page
-	 * increments we just performed because rq_commit_cb wont be
-	 * called again. Otherwise, just set the commit callback so the
-	 * unstable page accounting is properly updated when the request
-	 * is committed
-	 */
-	if (req->rq_committed) {
-		/* Drop lock before calling osc_dec_unstable_pages */
-		spin_unlock(&req->rq_lock);
-		osc_dec_unstable_pages(req);
-		spin_lock(&req->rq_lock);
-	} else {
-		req->rq_unstable = 1;
-		req->rq_commit_cb = osc_dec_unstable_pages;
-	}
-
-	spin_unlock(&req->rq_lock);
-}
-
 /* this must be called holding the loi list lock to give coverage to exit_cache,
  * async_flag maintenance, and oap_request
  */
@@ -1945,9 +1873,6 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
 	__u64 xid = 0;
 
 	if (oap->oap_request) {
-		if (!rc)
-			osc_inc_unstable_pages(oap->oap_request);
-
 		xid = ptlrpc_req_xid(oap->oap_request);
 		ptlrpc_req_finished(oap->oap_request);
 		oap->oap_request = NULL;
@@ -1979,7 +1904,7 @@ static void osc_ap_completion(const struct lu_env *env, struct client_obd *cli,
  */
 static int try_to_add_extent_for_io(struct client_obd *cli,
 				    struct osc_extent *ext, struct list_head *rpclist,
-				    int *pc, unsigned int *max_pages)
+				    unsigned int *pc, unsigned int *max_pages)
 {
 	struct osc_extent *tmp;
 	struct osc_async_page *oap = list_first_entry(&ext->oe_pages,
@@ -2032,12 +1957,13 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
  * 5. Traverse the extent tree from the 1st extent;
  * 6. Above steps exit if there is no space in this RPC.
  */
-static int get_write_extents(struct osc_object *obj, struct list_head *rpclist)
+static unsigned int get_write_extents(struct osc_object *obj,
+				      struct list_head *rpclist)
 {
 	struct client_obd *cli = osc_cli(obj);
 	struct osc_extent *ext;
 	struct osc_extent *temp;
-	int page_count = 0;
+	unsigned int page_count = 0;
 	unsigned int max_pages = cli->cl_max_pages_per_rpc;
 
 	LASSERT(osc_object_is_locked(obj));
@@ -2175,7 +2101,7 @@ osc_send_read_rpc(const struct lu_env *env, struct client_obd *cli,
 	struct osc_extent *ext;
 	struct osc_extent *next;
 	LIST_HEAD(rpclist);
-	int page_count = 0;
+	unsigned int page_count = 0;
 	unsigned int max_pages = cli->cl_max_pages_per_rpc;
 	int rc = 0;
 
@@ -2390,7 +2316,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 	struct client_obd *cli = oap->oap_cli;
 	struct osc_object *osc = oap->oap_obj;
 	pgoff_t index;
-	int grants = 0;
+	unsigned int grants = 0, tmp;
 	int brw_flags = OBD_BRW_ASYNC;
 	int cmd = OBD_BRW_WRITE;
 	int need_release = 0;
@@ -2434,9 +2360,6 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 			return rc;
 	}
 
-	if (osc_over_unstable_soft_limit(cli))
-		brw_flags |= OBD_BRW_SOFT_SYNC;
-
 	oap->oap_cmd = cmd;
 	oap->oap_page_off = ops->ops_from;
 	oap->oap_count = ops->ops_to - ops->ops_from;
@@ -2476,7 +2399,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 			grants = 0;
 			need_release = 1;
 		} else if (ext->oe_end < index) {
-			int tmp = grants;
+			tmp = grants;
 			/* try to expand this extent */
 			rc = osc_extent_expand(ext, index, &tmp);
 			if (rc < 0) {
@@ -2501,7 +2424,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
 	}
 
 	if (!ext) {
-		int tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
+		tmp = (1 << cli->cl_chunkbits) + cli->cl_extent_tax;
 
 		/* try to find new extent to cover this page */
 		LASSERT(!oio->oi_active);
@@ -2645,7 +2568,7 @@ int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
 		goto out;
 
 	spin_lock(&oap->oap_lock);
-	oap->oap_async_flags |= ASYNC_READY|ASYNC_URGENT;
+	oap->oap_async_flags |= ASYNC_READY | ASYNC_URGENT;
 	spin_unlock(&oap->oap_lock);
 
 	if (memory_pressure_get())
diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
index c8c3f1ca77be..9c8de15c309c 100644
--- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h
@@ -64,7 +64,7 @@ struct osc_io {
 	/** true if this io is lockless. */
 	unsigned int		oi_lockless;
 	/** how many LRU pages are reserved for this IO */
-	int oi_lru_reserved;
+	unsigned long		oi_lru_reserved;
 
 	/** active extents, we know how many bytes is going to be written,
 	 * so having an active extent will prevent it from being fragmented
@@ -389,7 +389,7 @@ extern struct lu_device_type osc_device_type;
 extern struct lu_context_key osc_key;
 extern struct lu_context_key osc_session_key;
 
-#define OSC_FLAGS (ASYNC_URGENT|ASYNC_READY)
+#define OSC_FLAGS (ASYNC_URGENT | ASYNC_READY)
 
 int osc_lock_init(const struct lu_env *env,
 		  struct cl_object *obj, struct cl_lock *lock,
@@ -608,7 +608,7 @@ struct osc_extent {
 	/** link list of osc_object's oo_{hp|urgent|locking}_exts. */
 	struct list_head	 oe_link;
 	/** state of this extent */
-	unsigned int       oe_state;
+	enum osc_extent_state	oe_state;
 	/** flags for this extent. */
 	unsigned int       oe_intree:1,
 	/** 0 is write, 1 is read */
diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h
index 7a27f0961955..67fe0a254991 100644
--- a/drivers/staging/lustre/lustre/osc/osc_internal.h
+++ b/drivers/staging/lustre/lustre/osc/osc_internal.h
@@ -71,7 +71,6 @@ struct osc_async_page {
 	struct client_obd       *oap_cli;
 	struct osc_object       *oap_obj;
 
-	struct ldlm_lock	*oap_ldlm_lock;
 	spinlock_t		 oap_lock;
 };
 
@@ -134,9 +133,9 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
 int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		  struct list_head *ext_list, int cmd);
-int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
-		   int target, bool force);
-int osc_lru_reclaim(struct client_obd *cli);
+long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+		    long target, bool force);
+long osc_lru_reclaim(struct client_obd *cli);
 
 unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock);
 
@@ -198,7 +197,7 @@ int osc_quotacheck(struct obd_device *unused, struct obd_export *exp,
 int osc_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk);
 void osc_inc_unstable_pages(struct ptlrpc_request *req);
 void osc_dec_unstable_pages(struct ptlrpc_request *req);
-int  osc_over_unstable_soft_limit(struct client_obd *cli);
+bool osc_over_unstable_soft_limit(struct client_obd *cli);
 
 struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
 				       struct osc_object *obj, pgoff_t index,
diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c
index 6e3dcd38913f..8a559cbcdd0c 100644
--- a/drivers/staging/lustre/lustre/osc/osc_io.c
+++ b/drivers/staging/lustre/lustre/osc/osc_io.c
@@ -109,11 +109,11 @@ static int osc_io_submit(const struct lu_env *env,
 
 	struct cl_page_list *qin = &queue->c2_qin;
 	struct cl_page_list *qout = &queue->c2_qout;
-	int queued = 0;
+	unsigned int queued = 0;
 	int result = 0;
 	int cmd;
 	int brw_flags;
-	int max_pages;
+	unsigned int max_pages;
 
 	LASSERT(qin->pl_nr > 0);
 
@@ -163,14 +163,19 @@ static int osc_io_submit(const struct lu_env *env,
 			continue;
 		}
 
-		cl_page_list_move(qout, qin, page);
 		spin_lock(&oap->oap_lock);
-		oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
+		oap->oap_async_flags = ASYNC_URGENT | ASYNC_READY;
 		oap->oap_async_flags |= ASYNC_COUNT_STABLE;
 		spin_unlock(&oap->oap_lock);
 
 		osc_page_submit(env, opg, crt, brw_flags);
 		list_add_tail(&oap->oap_pending_item, &list);
+
+		if (page->cp_sync_io)
+			cl_page_list_move(qout, qin, page);
+		else /* async IO */
+			cl_page_list_del(env, qin, page);
+
 		if (++queued == max_pages) {
 			queued = 0;
 			result = osc_queue_sync_pages(env, osc, &list, cmd,
@@ -195,7 +200,7 @@ static int osc_io_submit(const struct lu_env *env,
  * Expand stripe KMS if necessary.
  */
 static void osc_page_touch_at(const struct lu_env *env,
-			      struct cl_object *obj, pgoff_t idx, unsigned to)
+			      struct cl_object *obj, pgoff_t idx, size_t to)
 {
 	struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
 	struct cl_attr *attr = &osc_env_info(env)->oti_attr;
@@ -228,7 +233,7 @@ static void osc_page_touch_at(const struct lu_env *env,
 		attr->cat_size = kms;
 		valid |= CAT_SIZE;
 	}
-	cl_object_attr_set(env, obj, attr, valid);
+	cl_object_attr_update(env, obj, attr, valid);
 	cl_object_attr_unlock(obj);
 }
 
@@ -314,8 +319,8 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
 	struct osc_object *osc = cl2osc(ios->cis_obj);
 	struct client_obd *cli = osc_cli(osc);
 	unsigned long c;
-	unsigned int npages;
-	unsigned int max_pages;
+	unsigned long npages;
+	unsigned long max_pages;
 
 	if (cl_io_is_append(io))
 		return 0;
@@ -328,15 +333,15 @@ static int osc_io_rw_iter_init(const struct lu_env *env,
 	if (npages > max_pages)
 		npages = max_pages;
 
-	c = atomic_read(cli->cl_lru_left);
+	c = atomic_long_read(cli->cl_lru_left);
 	if (c < npages && osc_lru_reclaim(cli) > 0)
-		c = atomic_read(cli->cl_lru_left);
+		c = atomic_long_read(cli->cl_lru_left);
 	while (c >= npages) {
-		if (c == atomic_cmpxchg(cli->cl_lru_left, c, c - npages)) {
+		if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) {
 			oio->oi_lru_reserved = npages;
 			break;
 		}
-		c = atomic_read(cli->cl_lru_left);
+		c = atomic_long_read(cli->cl_lru_left);
 	}
 
 	return 0;
@@ -350,7 +355,7 @@ static void osc_io_rw_iter_fini(const struct lu_env *env,
 	struct client_obd *cli = osc_cli(osc);
 
 	if (oio->oi_lru_reserved > 0) {
-		atomic_add(oio->oi_lru_reserved, cli->cl_lru_left);
+		atomic_long_add(oio->oi_lru_reserved, cli->cl_lru_left);
 		oio->oi_lru_reserved = 0;
 	}
 	oio->oi_write_osclock = NULL;
@@ -364,7 +369,7 @@ static int osc_io_fault_start(const struct lu_env *env,
 
 	io = ios->cis_io;
 	fio = &io->u.ci_fault;
-	CDEBUG(D_INFO, "%lu %d %d\n",
+	CDEBUG(D_INFO, "%lu %d %zu\n",
 	       fio->ft_index, fio->ft_writable, fio->ft_nob);
 	/*
 	 * If mapping is writeable, adjust kms to cover this page,
@@ -471,18 +476,21 @@ static int osc_io_setattr_start(const struct lu_env *env,
 				attr->cat_ctime = lvb->lvb_ctime;
 				cl_valid |= CAT_CTIME;
 			}
-			result = cl_object_attr_set(env, obj, attr, cl_valid);
+			result = cl_object_attr_update(env, obj, attr,
+						       cl_valid);
 		}
 		cl_object_attr_unlock(obj);
 	}
 	memset(oa, 0, sizeof(*oa));
 	if (result == 0) {
 		oa->o_oi = loi->loi_oi;
+		obdo_set_parent_fid(oa, io->u.ci_setattr.sa_parent_fid);
+		oa->o_stripe_idx = io->u.ci_setattr.sa_stripe_index;
 		oa->o_mtime = attr->cat_mtime;
 		oa->o_atime = attr->cat_atime;
 		oa->o_ctime = attr->cat_ctime;
-		oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
-			OBD_MD_FLCTIME | OBD_MD_FLMTIME;
+		oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
+			       OBD_MD_FLCTIME | OBD_MD_FLMTIME;
 		if (ia_valid & ATTR_SIZE) {
 			oa->o_size = size;
 			oa->o_blocks = OBD_OBJECT_EOF;
@@ -559,7 +567,7 @@ static int osc_io_read_start(const struct lu_env *env,
 	if (!slice->cis_io->ci_noatime) {
 		cl_object_attr_lock(obj);
 		attr->cat_atime = ktime_get_real_seconds();
-		rc = cl_object_attr_set(env, obj, attr, CAT_ATIME);
+		rc = cl_object_attr_update(env, obj, attr, CAT_ATIME);
 		cl_object_attr_unlock(obj);
 	}
 	return rc;
@@ -576,7 +584,7 @@ static int osc_io_write_start(const struct lu_env *env,
 	cl_object_attr_lock(obj);
 	attr->cat_ctime = ktime_get_real_seconds();
 	attr->cat_mtime = attr->cat_ctime;
-	rc = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME);
+	rc = cl_object_attr_update(env, obj, attr, CAT_MTIME | CAT_CTIME);
 	cl_object_attr_unlock(obj);
 
 	return rc;
diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c
index 717d3ffb6789..39a8a5851603 100644
--- a/drivers/staging/lustre/lustre/osc/osc_lock.c
+++ b/drivers/staging/lustre/lustre/osc/osc_lock.c
@@ -222,7 +222,7 @@ static void osc_lock_lvb_update(const struct lu_env *env,
 		ldlm_lock_allow_match_locked(dlmlock);
 	}
 
-	cl_object_attr_set(env, obj, attr, valid);
+	cl_object_attr_update(env, obj, attr, valid);
 	cl_object_attr_unlock(obj);
 }
 
@@ -467,7 +467,7 @@ static int osc_dlm_blocking_ast0(const struct lu_env *env,
 		 */
 		attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms);
 
-		cl_object_attr_set(env, obj, attr, CAT_KMS);
+		cl_object_attr_update(env, obj, attr, CAT_KMS);
 		cl_object_attr_unlock(obj);
 		unlock_res_and_lock(dlmlock);
 
diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c
index d211d1905e83..aae3a2d4243f 100644
--- a/drivers/staging/lustre/lustre/osc/osc_object.c
+++ b/drivers/staging/lustre/lustre/osc/osc_object.c
@@ -159,8 +159,8 @@ static int osc_attr_get(const struct lu_env *env, struct cl_object *obj,
 	return 0;
 }
 
-static int osc_attr_set(const struct lu_env *env, struct cl_object *obj,
-			const struct cl_attr *attr, unsigned valid)
+static int osc_attr_update(const struct lu_env *env, struct cl_object *obj,
+			   const struct cl_attr *attr, unsigned int valid)
 {
 	struct lov_oinfo *oinfo = cl2osc(obj)->oo_oinfo;
 	struct ost_lvb *lvb = &oinfo->loi_lvb;
@@ -195,7 +195,6 @@ static int osc_object_glimpse(const struct lu_env *env,
 
 static int osc_object_ast_clear(struct ldlm_lock *lock, void *data)
 {
-	LASSERT(lock->l_granted_mode == lock->l_req_mode);
 	if (lock->l_ast_data == data)
 		lock->l_ast_data = NULL;
 	return LDLM_ITER_CONTINUE;
@@ -262,7 +261,7 @@ static const struct cl_object_operations osc_ops = {
 	.coo_lock_init = osc_lock_init,
 	.coo_io_init   = osc_io_init,
 	.coo_attr_get  = osc_attr_get,
-	.coo_attr_set  = osc_attr_set,
+	.coo_attr_update = osc_attr_update,
 	.coo_glimpse   = osc_object_glimpse,
 	.coo_prune     = osc_object_prune
 };
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c
index 355f496a2093..2a7a70aa9e80 100644
--- a/drivers/staging/lustre/lustre/osc/osc_page.c
+++ b/drivers/staging/lustre/lustre/osc/osc_page.c
@@ -323,32 +323,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 	return result;
 }
 
-int osc_over_unstable_soft_limit(struct client_obd *cli)
-{
-	long obd_upages, obd_dpages, osc_upages;
-
-	/* Can't check cli->cl_unstable_count, therefore, no soft limit */
-	if (!cli)
-		return 0;
-
-	obd_upages = atomic_read(&obd_unstable_pages);
-	obd_dpages = atomic_read(&obd_dirty_pages);
-
-	osc_upages = atomic_read(&cli->cl_unstable_count);
-
-	/*
-	 * obd_max_dirty_pages is the max number of (dirty + unstable)
-	 * pages allowed at any given time. To simulate an unstable page
-	 * only limit, we subtract the current number of dirty pages
-	 * from this max. This difference is roughly the amount of pages
-	 * currently available for unstable pages. Thus, the soft limit
-	 * is half of that difference. Check osc_upages to ensure we don't
-	 * set SOFT_SYNC for OSCs without any outstanding unstable pages.
-	 */
-	return osc_upages &&
-	       obd_upages >= (obd_max_dirty_pages - obd_dpages) / 2;
-}
-
 /**
  * Helper function called by osc_io_submit() for every page in an immediate
  * transfer (i.e., transferred synchronously).
@@ -368,9 +342,6 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
 	oap->oap_count = opg->ops_to - opg->ops_from;
 	oap->oap_brw_flags = brw_flags | OBD_BRW_SYNC;
 
-	if (osc_over_unstable_soft_limit(oap->oap_cli))
-		oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
-
 	if (capable(CFS_CAP_SYS_RESOURCE)) {
 		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
 		oap->oap_cmd |= OBD_BRW_NOQUOTA;
@@ -409,7 +380,7 @@ static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */
 static int osc_cache_too_much(struct client_obd *cli)
 {
 	struct cl_client_cache *cache = cli->cl_cache;
-	int pages = atomic_read(&cli->cl_lru_in_list);
+	long pages = atomic_long_read(&cli->cl_lru_in_list);
 	unsigned long budget;
 
 	budget = cache->ccc_lru_max / (atomic_read(&cache->ccc_users) - 2);
@@ -417,7 +388,7 @@ static int osc_cache_too_much(struct client_obd *cli)
 	/* if it's going to run out LRU slots, we should free some, but not
 	 * too much to maintain fairness among OSCs.
 	 */
-	if (atomic_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
+	if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) {
 		if (pages >= budget)
 			return lru_shrink_max;
 		else if (pages >= budget / 2)
@@ -444,7 +415,7 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
 {
 	LIST_HEAD(lru);
 	struct osc_async_page *oap;
-	int npages = 0;
+	long npages = 0;
 
 	list_for_each_entry(oap, plist, oap_pending_item) {
 		struct osc_page *opg = oap2osc_page(oap);
@@ -460,8 +431,8 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
 	if (npages > 0) {
 		spin_lock(&cli->cl_lru_list_lock);
 		list_splice_tail(&lru, &cli->cl_lru_list);
-		atomic_sub(npages, &cli->cl_lru_busy);
-		atomic_add(npages, &cli->cl_lru_in_list);
+		atomic_long_sub(npages, &cli->cl_lru_busy);
+		atomic_long_add(npages, &cli->cl_lru_in_list);
 		spin_unlock(&cli->cl_lru_list_lock);
 
 		/* XXX: May set force to be true for better performance */
@@ -472,9 +443,9 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist)
 
 static void __osc_lru_del(struct client_obd *cli, struct osc_page *opg)
 {
-	LASSERT(atomic_read(&cli->cl_lru_in_list) > 0);
+	LASSERT(atomic_long_read(&cli->cl_lru_in_list) > 0);
 	list_del_init(&opg->ops_lru);
-	atomic_dec(&cli->cl_lru_in_list);
+	atomic_long_dec(&cli->cl_lru_in_list);
 }
 
 /**
@@ -488,12 +459,12 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg)
 		if (!list_empty(&opg->ops_lru)) {
 			__osc_lru_del(cli, opg);
 		} else {
-			LASSERT(atomic_read(&cli->cl_lru_busy) > 0);
-			atomic_dec(&cli->cl_lru_busy);
+			LASSERT(atomic_long_read(&cli->cl_lru_busy) > 0);
+			atomic_long_dec(&cli->cl_lru_busy);
 		}
 		spin_unlock(&cli->cl_lru_list_lock);
 
-		atomic_inc(cli->cl_lru_left);
+		atomic_long_inc(cli->cl_lru_left);
 		/* this is a great place to release more LRU pages if
 		 * this osc occupies too many LRU pages and kernel is
 		 * stealing one of them.
@@ -518,7 +489,7 @@ static void osc_lru_use(struct client_obd *cli, struct osc_page *opg)
 		spin_lock(&cli->cl_lru_list_lock);
 		__osc_lru_del(cli, opg);
 		spin_unlock(&cli->cl_lru_list_lock);
-		atomic_inc(&cli->cl_lru_busy);
+		atomic_long_inc(&cli->cl_lru_busy);
 	}
 }
 
@@ -540,10 +511,32 @@ static void discard_pagevec(const struct lu_env *env, struct cl_io *io,
 }
 
 /**
+ * Check if a cl_page can be released, i.e, it's not being used.
+ *
+ * If unstable account is turned on, bulk transfer may hold one refcount
+ * for recovery so we need to check vmpage refcount as well; otherwise,
+ * even we can destroy cl_page but the corresponding vmpage can't be reused.
+ */
+static inline bool lru_page_busy(struct client_obd *cli, struct cl_page *page)
+{
+	if (cl_page_in_use_noref(page))
+		return true;
+
+	if (cli->cl_cache->ccc_unstable_check) {
+		struct page *vmpage = cl_page_vmpage(page);
+
+		/* vmpage have two known users: cl_page and VM page cache */
+		if (page_count(vmpage) - page_mapcount(vmpage) > 2)
+			return true;
+	}
+	return false;
+}
+
+/**
  * Drop @target of pages from LRU at most.
  */
-int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
-		   int target, bool force)
+long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
+		    long target, bool force)
 {
 	struct cl_io *io;
 	struct cl_object *clobj = NULL;
@@ -551,12 +544,12 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
 	struct osc_page *opg;
 	struct osc_page *temp;
 	int maxscan = 0;
-	int count = 0;
+	long count = 0;
 	int index = 0;
 	int rc = 0;
 
-	LASSERT(atomic_read(&cli->cl_lru_in_list) >= 0);
-	if (atomic_read(&cli->cl_lru_in_list) == 0 || target <= 0)
+	LASSERT(atomic_long_read(&cli->cl_lru_in_list) >= 0);
+	if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0)
 		return 0;
 
 	if (!force) {
@@ -575,7 +568,7 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
 	io = &osc_env_info(env)->oti_io;
 
 	spin_lock(&cli->cl_lru_list_lock);
-	maxscan = min(target << 1, atomic_read(&cli->cl_lru_in_list));
+	maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list));
 	list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) {
 		struct cl_page *page;
 		bool will_free = false;
@@ -584,7 +577,7 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
 			break;
 
 		page = opg->ops_cl.cpl_page;
-		if (cl_page_in_use_noref(page)) {
+		if (lru_page_busy(cli, page)) {
 			list_move_tail(&opg->ops_lru, &cli->cl_lru_list);
 			continue;
 		}
@@ -620,7 +613,7 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
 		}
 
 		if (cl_page_own_try(env, io, page) == 0) {
-			if (!cl_page_in_use_noref(page)) {
+			if (!lru_page_busy(cli, page)) {
 				/* remove it from lru list earlier to avoid
 				 * lock contention
 				 */
@@ -663,24 +656,19 @@ int osc_lru_shrink(const struct lu_env *env, struct client_obd *cli,
 
 	atomic_dec(&cli->cl_lru_shrinkers);
 	if (count > 0) {
-		atomic_add(count, cli->cl_lru_left);
+		atomic_long_add(count, cli->cl_lru_left);
 		wake_up_all(&osc_lru_waitq);
 	}
 	return count > 0 ? count : rc;
 }
 
-static inline int max_to_shrink(struct client_obd *cli)
-{
-	return min(atomic_read(&cli->cl_lru_in_list) >> 1, lru_shrink_max);
-}
-
-int osc_lru_reclaim(struct client_obd *cli)
+long osc_lru_reclaim(struct client_obd *cli)
 {
 	struct cl_env_nest nest;
 	struct lu_env *env;
 	struct cl_client_cache *cache = cli->cl_cache;
 	int max_scans;
-	int rc = 0;
+	long rc = 0;
 
 	LASSERT(cache);
 
@@ -693,15 +681,15 @@ int osc_lru_reclaim(struct client_obd *cli)
 		if (rc == -EBUSY)
 			rc = 0;
 
-		CDEBUG(D_CACHE, "%s: Free %d pages from own LRU: %p.\n",
+		CDEBUG(D_CACHE, "%s: Free %ld pages from own LRU: %p.\n",
 		       cli->cl_import->imp_obd->obd_name, rc, cli);
 		goto out;
 	}
 
-	CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
+	CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld, busy: %ld.\n",
 	       cli->cl_import->imp_obd->obd_name, cli,
-	       atomic_read(&cli->cl_lru_in_list),
-	       atomic_read(&cli->cl_lru_busy));
+	       atomic_long_read(&cli->cl_lru_in_list),
+	       atomic_long_read(&cli->cl_lru_busy));
 
 	/* Reclaim LRU slots from other client_obd as it can't free enough
 	 * from its own. This should rarely happen.
@@ -717,10 +705,10 @@ int osc_lru_reclaim(struct client_obd *cli)
 		cli = list_entry(cache->ccc_lru.next, struct client_obd,
 				 cl_lru_osc);
 
-		CDEBUG(D_CACHE, "%s: cli %p LRU pages: %d, busy: %d.\n",
+		CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n",
 		       cli->cl_import->imp_obd->obd_name, cli,
-		       atomic_read(&cli->cl_lru_in_list),
-		       atomic_read(&cli->cl_lru_busy));
+		       atomic_long_read(&cli->cl_lru_in_list),
+		       atomic_long_read(&cli->cl_lru_busy));
 
 		list_move_tail(&cli->cl_lru_osc, &cache->ccc_lru);
 		if (osc_cache_too_much(cli) > 0) {
@@ -737,11 +725,18 @@ int osc_lru_reclaim(struct client_obd *cli)
 
 out:
 	cl_env_nested_put(&nest, env);
-	CDEBUG(D_CACHE, "%s: cli %p freed %d pages.\n",
+	CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n",
 	       cli->cl_import->imp_obd->obd_name, cli, rc);
 	return rc;
 }
 
+/**
+ * osc_lru_reserve() is called to reserve an LRU slot for a cl_page.
+ *
+ * Usually the LRU slots are reserved in osc_io_iter_rw_init().
+ * Only in the case that the LRU slots are in extreme shortage, it should
+ * have reserved enough slots for an IO.
+ */
 static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 			   struct osc_page *opg)
 {
@@ -758,8 +753,8 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 		goto out;
 	}
 
-	LASSERT(atomic_read(cli->cl_lru_left) >= 0);
-	while (!atomic_add_unless(cli->cl_lru_left, -1, 0)) {
+	LASSERT(atomic_long_read(cli->cl_lru_left) >= 0);
+	while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) {
 		/* run out of LRU spaces, try to drop some by itself */
 		rc = osc_lru_reclaim(cli);
 		if (rc < 0)
@@ -770,7 +765,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 		cond_resched();
 
 		rc = l_wait_event(osc_lru_waitq,
-				  atomic_read(cli->cl_lru_left) > 0,
+				  atomic_long_read(cli->cl_lru_left) > 0,
 				  &lwi);
 
 		if (rc < 0)
@@ -779,7 +774,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj,
 
 out:
 	if (rc >= 0) {
-		atomic_inc(&cli->cl_lru_busy);
+		atomic_long_inc(&cli->cl_lru_busy);
 		opg->ops_in_lru = 1;
 		rc = 0;
 	}
@@ -787,4 +782,151 @@ out:
 	return rc;
 }
 
+/**
+ * Atomic operations are expensive. We accumulate the accounting for the
+ * same page pgdat to get better performance.
+ * In practice this can work pretty good because the pages in the same RPC
+ * are likely from the same page zone.
+ */
+static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc,
+					    int factor)
+{
+	int page_count = desc->bd_iov_count;
+	pg_data_t *last = NULL;
+	int count = 0;
+	int i;
+
+	for (i = 0; i < page_count; i++) {
+		pg_data_t *pgdat = page_pgdat(desc->bd_iov[i].bv_page);
+
+		if (likely(pgdat == last)) {
+			++count;
+			continue;
+		}
+
+		if (count > 0) {
+			mod_node_page_state(pgdat, NR_UNSTABLE_NFS,
+					    factor * count);
+			count = 0;
+		}
+		last = pgdat;
+		++count;
+	}
+	if (count > 0)
+		mod_node_page_state(last, NR_UNSTABLE_NFS, factor * count);
+}
+
+static inline void add_unstable_page_accounting(struct ptlrpc_bulk_desc *desc)
+{
+	unstable_page_accounting(desc, 1);
+}
+
+static inline void dec_unstable_page_accounting(struct ptlrpc_bulk_desc *desc)
+{
+	unstable_page_accounting(desc, -1);
+}
+
+/**
+ * Performs "unstable" page accounting. This function balances the
+ * increment operations performed in osc_inc_unstable_pages. It is
+ * registered as the RPC request callback, and is executed when the
+ * bulk RPC is committed on the server. Thus at this point, the pages
+ * involved in the bulk transfer are no longer considered unstable.
+ *
+ * If this function is called, the request should have been committed
+ * or req:rq_unstable must have been set; it implies that the unstable
+ * statistic have been added.
+ */
+void osc_dec_unstable_pages(struct ptlrpc_request *req)
+{
+	struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	int page_count = desc->bd_iov_count;
+	long unstable_count;
+
+	LASSERT(page_count >= 0);
+	dec_unstable_page_accounting(desc);
+
+	unstable_count = atomic_long_sub_return(page_count,
+						&cli->cl_unstable_count);
+	LASSERT(unstable_count >= 0);
+
+	unstable_count = atomic_long_sub_return(page_count,
+						&cli->cl_cache->ccc_unstable_nr);
+	LASSERT(unstable_count >= 0);
+	if (!unstable_count)
+		wake_up_all(&cli->cl_cache->ccc_unstable_waitq);
+
+	if (osc_cache_too_much(cli))
+		(void)ptlrpcd_queue_work(cli->cl_lru_work);
+}
+
+/**
+ * "unstable" page accounting. See: osc_dec_unstable_pages.
+ */
+void osc_inc_unstable_pages(struct ptlrpc_request *req)
+{
+	struct client_obd *cli  = &req->rq_import->imp_obd->u.cli;
+	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
+	long page_count = desc->bd_iov_count;
+
+	/* No unstable page tracking */
+	if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check)
+		return;
+
+	add_unstable_page_accounting(desc);
+	atomic_long_add(page_count, &cli->cl_unstable_count);
+	atomic_long_add(page_count, &cli->cl_cache->ccc_unstable_nr);
+
+	/*
+	 * If the request has already been committed (i.e. brw_commit
+	 * called via rq_commit_cb), we need to undo the unstable page
+	 * increments we just performed because rq_commit_cb wont be
+	 * called again.
+	 */
+	spin_lock(&req->rq_lock);
+	if (unlikely(req->rq_committed)) {
+		spin_unlock(&req->rq_lock);
+
+		osc_dec_unstable_pages(req);
+	} else {
+		req->rq_unstable = 1;
+		spin_unlock(&req->rq_lock);
+	}
+}
+
+/**
+ * Check if it piggybacks SOFT_SYNC flag to OST from this OSC.
+ * This function will be called by every BRW RPC so it's critical
+ * to make this function fast.
+ */
+bool osc_over_unstable_soft_limit(struct client_obd *cli)
+{
+	long unstable_nr, osc_unstable_count;
+
+	/* Can't check cli->cl_unstable_count, therefore, no soft limit */
+	if (!cli->cl_cache || !cli->cl_cache->ccc_unstable_check)
+		return false;
+
+	osc_unstable_count = atomic_long_read(&cli->cl_unstable_count);
+	unstable_nr = atomic_long_read(&cli->cl_cache->ccc_unstable_nr);
+
+	CDEBUG(D_CACHE,
+	       "%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n",
+	       cli->cl_import->imp_obd->obd_name, cli,
+	       unstable_nr, osc_unstable_count);
+
+	/*
+	 * If the LRU slots are in shortage - 25% remaining AND this OSC
+	 * has one full RPC window of unstable pages, it's a good chance
+	 * to piggyback a SOFT_SYNC flag.
+	 * Please notice that the OST won't take immediate response for the
+	 * SOFT_SYNC request so active OSCs will have more chance to carry
+	 * the flag, this is reasonable.
+	 */
+	return unstable_nr > cli->cl_cache->ccc_lru_max >> 2 &&
+	       osc_unstable_count > cli->cl_max_pages_per_rpc *
+				    cli->cl_max_rpcs_in_flight;
+}
+
 /** @} osc */
diff --git a/drivers/staging/lustre/lustre/osc/osc_request.c b/drivers/staging/lustre/lustre/osc/osc_request.c
index 536b868ff776..749781f022e2 100644
--- a/drivers/staging/lustre/lustre/osc/osc_request.c
+++ b/drivers/staging/lustre/lustre/osc/osc_request.c
@@ -41,6 +41,7 @@
 
 #include "../include/lustre_ha.h"
 #include "../include/lprocfs_status.h"
+#include "../include/lustre/lustre_ioctl.h"
 #include "../include/lustre_debug.h"
 #include "../include/lustre_param.h"
 #include "../include/lustre_fid.h"
@@ -102,36 +103,6 @@ static void osc_release_ppga(struct brw_page **ppga, u32 count);
 static int brw_interpret(const struct lu_env *env,
 			 struct ptlrpc_request *req, void *data, int rc);
 
-/* Pack OSC object metadata for disk storage (LE byte order). */
-static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
-		      struct lov_stripe_md *lsm)
-{
-	int lmm_size;
-
-	lmm_size = sizeof(**lmmp);
-	if (!lmmp)
-		return lmm_size;
-
-	if (*lmmp && !lsm) {
-		kfree(*lmmp);
-		*lmmp = NULL;
-		return 0;
-	} else if (unlikely(lsm && ostid_id(&lsm->lsm_oi) == 0)) {
-		return -EBADF;
-	}
-
-	if (!*lmmp) {
-		*lmmp = kzalloc(lmm_size, GFP_NOFS);
-		if (!*lmmp)
-			return -ENOMEM;
-	}
-
-	if (lsm)
-		ostid_cpu_to_le(&lsm->lsm_oi, &(*lmmp)->lmm_oi);
-
-	return lmm_size;
-}
-
 /* Unpack OSC object metadata from disk storage (LE byte order). */
 static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
 			struct lov_mds_md *lmm, int lmm_bytes)
@@ -189,7 +160,7 @@ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
 	    (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES))
 		(*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes;
 	else
-		(*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+		(*lsmp)->lsm_maxbytes = LUSTRE_EXT3_STRIPE_MAXBYTES;
 
 	return lsm_size;
 }
@@ -427,24 +398,16 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
 				      oinfo->oi_cb_up, oinfo, rqset);
 }
 
-static int osc_real_create(struct obd_export *exp, struct obdo *oa,
-			   struct lov_stripe_md **ea,
-			   struct obd_trans_info *oti)
+static int osc_create(const struct lu_env *env, struct obd_export *exp,
+		      struct obdo *oa, struct obd_trans_info *oti)
 {
 	struct ptlrpc_request *req;
 	struct ost_body *body;
-	struct lov_stripe_md *lsm;
 	int rc;
 
 	LASSERT(oa);
-	LASSERT(ea);
-
-	lsm = *ea;
-	if (!lsm) {
-		rc = obd_alloc_memmd(exp, &lsm);
-		if (rc < 0)
-			return rc;
-	}
+	LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+	LASSERT(fid_seq_is_echo(ostid_seq(&oa->o_oi)));
 
 	req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_CREATE);
 	if (!req) {
@@ -490,21 +453,10 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
 	oa->o_blksize = cli_brw_size(exp->exp_obd);
 	oa->o_valid |= OBD_MD_FLBLKSZ;
 
-	/* XXX LOV STACKING: the lsm that is passed to us from LOV does not
-	 * have valid lsm_oinfo data structs, so don't go touching that.
-	 * This needs to be fixed in a big way.
-	 */
-	lsm->lsm_oi = oa->o_oi;
-	*ea = lsm;
-
-	if (oti) {
-		oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
-
-		if (oa->o_valid & OBD_MD_FLCOOKIE) {
-			if (!oti->oti_logcookies)
-				oti_alloc_cookies(oti, 1);
-			*oti->oti_logcookies = oa->o_lcookie;
-		}
+	if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
+		if (!oti->oti_logcookies)
+			oti->oti_logcookies = &oti->oti_onecookie;
+		*oti->oti_logcookies = oa->o_lcookie;
 	}
 
 	CDEBUG(D_HA, "transno: %lld\n",
@@ -512,8 +464,6 @@ static int osc_real_create(struct obd_export *exp, struct obdo *oa,
 out_req:
 	ptlrpc_req_finished(req);
 out:
-	if (rc && !*ea)
-		obd_free_memmd(exp, &lsm);
 	return rc;
 }
 
@@ -649,7 +599,7 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
 
 	ostid_build_res_name(&oa->o_oi, &res_id);
 	res = ldlm_resource_get(ns, NULL, &res_id, 0, 0);
-	if (!res)
+	if (IS_ERR(res))
 		return 0;
 
 	LDLM_RESOURCE_ADDREF(res);
@@ -689,30 +639,6 @@ static int osc_can_send_destroy(struct client_obd *cli)
 	return 0;
 }
 
-static int osc_create(const struct lu_env *env, struct obd_export *exp,
-		      struct obdo *oa, struct lov_stripe_md **ea,
-		      struct obd_trans_info *oti)
-{
-	int rc = 0;
-
-	LASSERT(oa);
-	LASSERT(ea);
-	LASSERT(oa->o_valid & OBD_MD_FLGROUP);
-
-	if ((oa->o_valid & OBD_MD_FLFLAGS) &&
-	    oa->o_flags == OBD_FL_RECREATE_OBJS) {
-		return osc_real_create(exp, oa, ea, oti);
-	}
-
-	if (!fid_seq_is_mdt(ostid_seq(&oa->o_oi)))
-		return osc_real_create(exp, oa, ea, oti);
-
-	/* we should not get here anymore */
-	LBUG();
-
-	return rc;
-}
-
 /* Destroy requests can be async always on the client, and we don't even really
  * care about the return code since the client cannot do anything at all about
  * a destroy failure.
@@ -725,8 +651,7 @@ static int osc_create(const struct lu_env *env, struct obd_export *exp,
  * cookies to the MDS after committing destroy transactions.
  */
 static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
-		       struct obdo *oa, struct lov_stripe_md *ea,
-		       struct obd_trans_info *oti, struct obd_export *md_export)
+		       struct obdo *oa, struct obd_trans_info *oti)
 {
 	struct client_obd *cli = &exp->exp_obd->u.cli;
 	struct ptlrpc_request *req;
@@ -794,42 +719,44 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
 static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 				long writing_bytes)
 {
-	u32 bits = OBD_MD_FLBLOCKS|OBD_MD_FLGRANT;
+	u32 bits = OBD_MD_FLBLOCKS | OBD_MD_FLGRANT;
 
 	LASSERT(!(oa->o_valid & bits));
 
 	oa->o_valid |= bits;
 	spin_lock(&cli->cl_loi_list_lock);
-	oa->o_dirty = cli->cl_dirty;
-	if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
-		     cli->cl_dirty_max)) {
+	oa->o_dirty = cli->cl_dirty_pages << PAGE_SHIFT;
+	if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit >
+		     cli->cl_dirty_max_pages)) {
 		CERROR("dirty %lu - %lu > dirty_max %lu\n",
-		       cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
+		       cli->cl_dirty_pages, cli->cl_dirty_transit,
+		       cli->cl_dirty_max_pages);
 		oa->o_undirty = 0;
-	} else if (unlikely(atomic_read(&obd_unstable_pages) +
-			    atomic_read(&obd_dirty_pages) -
-			    atomic_read(&obd_dirty_transit_pages) >
-			    (long)(obd_max_dirty_pages + 1))) {
+	} else if (unlikely(atomic_long_read(&obd_dirty_pages) -
+			    atomic_long_read(&obd_dirty_transit_pages) >
+			    (obd_max_dirty_pages + 1))) {
 		/* The atomic_read() allowing the atomic_inc() are
 		 * not covered by a lock thus they may safely race and trip
 		 * this CERROR() unless we add in a small fudge factor (+1).
 		 */
-		CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+		CERROR("%s: dirty %ld + %ld > system dirty_max %lu\n",
 		       cli->cl_import->imp_obd->obd_name,
-		       atomic_read(&obd_unstable_pages),
-		       atomic_read(&obd_dirty_pages),
-		       atomic_read(&obd_dirty_transit_pages),
+		       atomic_long_read(&obd_dirty_pages),
+		       atomic_long_read(&obd_dirty_transit_pages),
 		       obd_max_dirty_pages);
 		oa->o_undirty = 0;
-	} else if (unlikely(cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff)) {
+	} else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages >
+		   0x7fffffff)) {
 		CERROR("dirty %lu - dirty_max %lu too big???\n",
-		       cli->cl_dirty, cli->cl_dirty_max);
+		       cli->cl_dirty_pages, cli->cl_dirty_max_pages);
 		oa->o_undirty = 0;
 	} else {
-		long max_in_flight = (cli->cl_max_pages_per_rpc <<
-				      PAGE_SHIFT)*
-				     (cli->cl_max_rpcs_in_flight + 1);
-		oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
+		unsigned long max_in_flight;
+
+		max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT) *
+				(cli->cl_max_rpcs_in_flight + 1);
+		oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_SHIFT,
+				    max_in_flight);
 	}
 	oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
 	oa->o_dropped = cli->cl_lost_grant;
@@ -1029,22 +956,24 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 {
 	/*
 	 * ocd_grant is the total grant amount we're expect to hold: if we've
-	 * been evicted, it's the new avail_grant amount, cl_dirty will drop
-	 * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty.
+	 * been evicted, it's the new avail_grant amount, cl_dirty_pages will
+	 * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant +
+	 * dirty.
 	 *
 	 * race is tolerable here: if we're evicted, but imp_state already
-	 * left EVICTED state, then cl_dirty must be 0 already.
+	 * left EVICTED state, then cl_dirty_pages must be 0 already.
 	 */
 	spin_lock(&cli->cl_loi_list_lock);
 	if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
 		cli->cl_avail_grant = ocd->ocd_grant;
 	else
-		cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty;
+		cli->cl_avail_grant = ocd->ocd_grant -
+				      (cli->cl_dirty_pages << PAGE_SHIFT);
 
 	if (cli->cl_avail_grant < 0) {
 		CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
 		      cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
-		      ocd->ocd_grant, cli->cl_dirty);
+		      ocd->ocd_grant, cli->cl_dirty_pages << PAGE_SHIFT);
 		/* workaround for servers which do not have the patch from
 		 * LU-2679
 		 */
@@ -1181,7 +1110,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
 	}
 
 	while (nob > 0 && pg_count > 0) {
-		int count = pga[i]->count > nob ? nob : pga[i]->count;
+		unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
 
 		/* corrupt the data before we compute the checksum, to
 		 * simulate an OST->client data error
@@ -1191,7 +1120,7 @@ static u32 osc_checksum_bulk(int nob, u32 pg_count,
 			unsigned char *ptr = kmap(pga[i]->pg);
 			int off = pga[i]->off & ~PAGE_MASK;
 
-			memcpy(ptr + off, "bad1", min(4, nob));
+			memcpy(ptr + off, "bad1", min_t(typeof(nob), 4, nob));
 			kunmap(pga[i]->pg);
 		}
 		cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
@@ -1335,11 +1264,11 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
 
 		if (i > 0 && can_merge_pages(pg_prev, pg)) {
 			niobuf--;
-			niobuf->len += pg->count;
+			niobuf->rnb_len += pg->count;
 		} else {
-			niobuf->offset = pg->off;
-			niobuf->len = pg->count;
-			niobuf->flags = pg->flag;
+			niobuf->rnb_offset = pg->off;
+			niobuf->rnb_len = pg->count;
+			niobuf->rnb_flags = pg->flag;
 		}
 		pg_prev = pg;
 	}
@@ -1418,6 +1347,11 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
 	INIT_LIST_HEAD(&aa->aa_oaps);
 
 	*reqp = req;
+	niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
+	CDEBUG(D_RPCTRACE, "brw rpc %p - object " DOSTID " offset %lld<>%lld\n",
+	       req, POSTID(&oa->o_oi), niobuf[0].rnb_offset,
+	       niobuf[niocount - 1].rnb_offset + niobuf[niocount - 1].rnb_len);
+
 	return 0;
 
  out:
@@ -1463,7 +1397,8 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
 			   oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0,
 			   oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0,
 			   POSTID(&oa->o_oi), pga[0]->off,
-			   pga[page_count-1]->off + pga[page_count-1]->count - 1);
+			   pga[page_count - 1]->off +
+			   pga[page_count - 1]->count - 1);
 	CERROR("original client csum %x (type %x), server csum %x (type %x), client csum now %x\n",
 	       client_cksum, client_cksum_type,
 	       server_cksum, cksum_type, new_cksum);
@@ -1565,7 +1500,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
 		char *router = "";
 		enum cksum_type cksum_type;
 
-		cksum_type = cksum_type_unpack(body->oa.o_valid&OBD_MD_FLFLAGS ?
+		cksum_type = cksum_type_unpack(body->oa.o_valid &
+					       OBD_MD_FLFLAGS ?
 					       body->oa.o_flags : 0);
 		client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
 						 aa->aa_ppga, OST_READ,
@@ -1794,7 +1730,8 @@ static int brw_interpret(const struct lu_env *env,
 
 		if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) {
 			struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo;
-			loff_t last_off = last->oap_count + last->oap_obj_off;
+			loff_t last_off = last->oap_count + last->oap_obj_off +
+					  last->oap_page_off;
 
 			/* Change file size if this is an out of quota or
 			 * direct IO write and it extends the file size
@@ -1812,11 +1749,14 @@ static int brw_interpret(const struct lu_env *env,
 		}
 
 		if (valid != 0)
-			cl_object_attr_set(env, obj, attr, valid);
+			cl_object_attr_update(env, obj, attr, valid);
 		cl_object_attr_unlock(obj);
 	}
 	kmem_cache_free(obdo_cachep, aa->aa_oa);
 
+	if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0)
+		osc_inc_unstable_pages(req);
+
 	list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
 		list_del_init(&ext->oe_link);
 		osc_extent_finish(env, ext, 1, rc);
@@ -1847,21 +1787,21 @@ static int brw_interpret(const struct lu_env *env,
 
 static void brw_commit(struct ptlrpc_request *req)
 {
-	spin_lock(&req->rq_lock);
 	/*
 	 * If osc_inc_unstable_pages (via osc_extent_finish) races with
 	 * this called via the rq_commit_cb, I need to ensure
 	 * osc_dec_unstable_pages is still called. Otherwise unstable
 	 * pages may be leaked.
 	 */
-	if (req->rq_unstable) {
+	spin_lock(&req->rq_lock);
+	if (unlikely(req->rq_unstable)) {
+		req->rq_unstable = 0;
 		spin_unlock(&req->rq_lock);
 		osc_dec_unstable_pages(req);
-		spin_lock(&req->rq_lock);
 	} else {
 		req->rq_committed = 1;
+		spin_unlock(&req->rq_lock);
 	}
-	spin_unlock(&req->rq_lock);
 }
 
 /**
@@ -1881,13 +1821,13 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	struct osc_async_page *tmp;
 	struct cl_req *clerq = NULL;
 	enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
-	struct ldlm_lock *lock = NULL;
 	struct cl_req_attr *crattr = NULL;
 	u64 starting_offset = OBD_OBJECT_EOF;
 	u64 ending_offset = 0;
 	int mpflag = 0;
 	int mem_tight = 0;
 	int page_count = 0;
+	bool soft_sync = false;
 	int i;
 	int rc;
 	struct ost_body *body;
@@ -1915,6 +1855,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		}
 	}
 
+	soft_sync = osc_over_unstable_soft_limit(cli);
 	if (mem_tight)
 		mpflag = cfs_memory_pressure_get_and_set();
 
@@ -1947,10 +1888,11 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 				rc = PTR_ERR(clerq);
 				goto out;
 			}
-			lock = oap->oap_ldlm_lock;
 		}
 		if (mem_tight)
 			oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
+		if (soft_sync)
+			oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
 		pga[i] = &oap->oap_brw_page;
 		pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
 		CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
@@ -1964,10 +1906,6 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	LASSERT(clerq);
 	crattr->cra_oa = oa;
 	cl_req_attr_set(env, clerq, crattr, ~0ULL);
-	if (lock) {
-		oa->o_handle = lock->l_remote_handle;
-		oa->o_valid |= OBD_MD_FLHANDLE;
-	}
 
 	rc = cl_req_prep(env, clerq);
 	if (rc != 0) {
@@ -1998,7 +1936,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
 	crattr->cra_oa = &body->oa;
 	cl_req_attr_set(env, clerq, crattr,
-			OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME);
+			OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME);
 
 	lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
 
@@ -2044,7 +1982,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	}
 	spin_unlock(&cli->cl_loi_list_lock);
 
-	DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %dr/%dw in flight",
+	DEBUG_REQ(D_INODE, req, "%d pages, aa %p. now %ur/%dw in flight",
 		  page_count, aa, cli->cl_r_in_flight,
 		  cli->cl_w_in_flight);
 
@@ -2116,27 +2054,6 @@ static int osc_set_data_with_check(struct lustre_handle *lockh,
 	return set;
 }
 
-/* find any ldlm lock of the inode in osc
- * return 0    not find
- *	1    find one
- *      < 0    error
- */
-static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
-			   ldlm_iterator_t replace, void *data)
-{
-	struct ldlm_res_id res_id;
-	struct obd_device *obd = class_exp2obd(exp);
-	int rc = 0;
-
-	ostid_build_res_name(&lsm->lsm_oi, &res_id);
-	rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
-	if (rc == LDLM_ITER_STOP)
-		return 1;
-	if (rc == LDLM_ITER_CONTINUE)
-		return 0;
-	return rc;
-}
-
 static int osc_enqueue_fini(struct ptlrpc_request *req,
 			    osc_enqueue_upcall_f upcall, void *cookie,
 			    struct lustre_handle *lockh, enum ldlm_mode mode,
@@ -2586,71 +2503,6 @@ static int osc_statfs(const struct lu_env *env, struct obd_export *exp,
 	return rc;
 }
 
-/* Retrieve object striping information.
- *
- * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
- * the maximum number of OST indices which will fit in the user buffer.
- * lmm_magic must be LOV_MAGIC (we only use 1 slot here).
- */
-static int osc_getstripe(struct lov_stripe_md *lsm,
-			 struct lov_user_md __user *lump)
-{
-	/* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */
-	struct lov_user_md_v3 lum, *lumk;
-	struct lov_user_ost_data_v1 *lmm_objects;
-	int rc = 0, lum_size;
-
-	if (!lsm)
-		return -ENODATA;
-
-	/* we only need the header part from user space to get lmm_magic and
-	 * lmm_stripe_count, (the header part is common to v1 and v3)
-	 */
-	lum_size = sizeof(struct lov_user_md_v1);
-	if (copy_from_user(&lum, lump, lum_size))
-		return -EFAULT;
-
-	if ((lum.lmm_magic != LOV_USER_MAGIC_V1) &&
-	    (lum.lmm_magic != LOV_USER_MAGIC_V3))
-		return -EINVAL;
-
-	/* lov_user_md_vX and lov_mds_md_vX must have the same size */
-	LASSERT(sizeof(struct lov_user_md_v1) == sizeof(struct lov_mds_md_v1));
-	LASSERT(sizeof(struct lov_user_md_v3) == sizeof(struct lov_mds_md_v3));
-	LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lumk->lmm_objects[0]));
-
-	/* we can use lov_mds_md_size() to compute lum_size
-	 * because lov_user_md_vX and lov_mds_md_vX have the same size
-	 */
-	if (lum.lmm_stripe_count > 0) {
-		lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic);
-		lumk = kzalloc(lum_size, GFP_NOFS);
-		if (!lumk)
-			return -ENOMEM;
-
-		if (lum.lmm_magic == LOV_USER_MAGIC_V1)
-			lmm_objects =
-			    &(((struct lov_user_md_v1 *)lumk)->lmm_objects[0]);
-		else
-			lmm_objects = &(lumk->lmm_objects[0]);
-		lmm_objects->l_ost_oi = lsm->lsm_oi;
-	} else {
-		lum_size = lov_mds_md_size(0, lum.lmm_magic);
-		lumk = &lum;
-	}
-
-	lumk->lmm_oi = lsm->lsm_oi;
-	lumk->lmm_stripe_count = 1;
-
-	if (copy_to_user(lump, lumk, lum_size))
-		rc = -EFAULT;
-
-	if (lumk != &lum)
-		kfree(lumk);
-
-	return rc;
-}
-
 static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 			 void *karg, void __user *uarg)
 {
@@ -2664,57 +2516,6 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 		return -EINVAL;
 	}
 	switch (cmd) {
-	case OBD_IOC_LOV_GET_CONFIG: {
-		char *buf;
-		struct lov_desc *desc;
-		struct obd_uuid uuid;
-
-		buf = NULL;
-		len = 0;
-		if (obd_ioctl_getdata(&buf, &len, uarg)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		data = (struct obd_ioctl_data *)buf;
-
-		if (sizeof(*desc) > data->ioc_inllen1) {
-			obd_ioctl_freedata(buf, len);
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (data->ioc_inllen2 < sizeof(uuid)) {
-			obd_ioctl_freedata(buf, len);
-			err = -EINVAL;
-			goto out;
-		}
-
-		desc = (struct lov_desc *)data->ioc_inlbuf1;
-		desc->ld_tgt_count = 1;
-		desc->ld_active_tgt_count = 1;
-		desc->ld_default_stripe_count = 1;
-		desc->ld_default_stripe_size = 0;
-		desc->ld_default_stripe_offset = 0;
-		desc->ld_pattern = 0;
-		memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
-
-		memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
-
-		err = copy_to_user(uarg, buf, len);
-		if (err)
-			err = -EFAULT;
-		obd_ioctl_freedata(buf, len);
-		goto out;
-	}
-	case LL_IOC_LOV_SETSTRIPE:
-		err = obd_alloc_memmd(exp, karg);
-		if (err > 0)
-			err = 0;
-		goto out;
-	case LL_IOC_LOV_GETSTRIPE:
-		err = osc_getstripe(karg, uarg);
-		goto out;
 	case OBD_IOC_CLIENT_RECOVER:
 		err = ptlrpc_recover_import(obd->u.cli.cl_import,
 					    data->ioc_inlbuf1, 0);
@@ -2749,51 +2550,7 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
 	if (!vallen || !val)
 		return -EFAULT;
 
-	if (KEY_IS(KEY_LOCK_TO_STRIPE)) {
-		__u32 *stripe = val;
-		*vallen = sizeof(*stripe);
-		*stripe = 0;
-		return 0;
-	} else if (KEY_IS(KEY_LAST_ID)) {
-		struct ptlrpc_request *req;
-		u64 *reply;
-		char *tmp;
-		int rc;
-
-		req = ptlrpc_request_alloc(class_exp2cliimp(exp),
-					   &RQF_OST_GET_INFO_LAST_ID);
-		if (!req)
-			return -ENOMEM;
-
-		req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY,
-				     RCL_CLIENT, keylen);
-		rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
-		if (rc) {
-			ptlrpc_request_free(req);
-			return rc;
-		}
-
-		tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY);
-		memcpy(tmp, key, keylen);
-
-		req->rq_no_delay = 1;
-		req->rq_no_resend = 1;
-		ptlrpc_request_set_replen(req);
-		rc = ptlrpc_queue_wait(req);
-		if (rc)
-			goto out;
-
-		reply = req_capsule_server_get(&req->rq_pill, &RMF_OBD_ID);
-		if (!reply) {
-			rc = -EPROTO;
-			goto out;
-		}
-
-		*((u64 *)val) = *reply;
-out:
-		ptlrpc_req_finished(req);
-		return rc;
-	} else if (KEY_IS(KEY_FIEMAP)) {
+	if (KEY_IS(KEY_FIEMAP)) {
 		struct ll_fiemap_info_key *fm_key = key;
 		struct ldlm_res_id res_id;
 		ldlm_policy_data_t policy;
@@ -2931,11 +2688,11 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
 
 	if (KEY_IS(KEY_CACHE_LRU_SHRINK)) {
 		struct client_obd *cli = &obd->u.cli;
-		int nr = atomic_read(&cli->cl_lru_in_list) >> 1;
-		int target = *(int *)val;
+		long nr = atomic_long_read(&cli->cl_lru_in_list) >> 1;
+		long target = *(long *)val;
 
 		nr = osc_lru_shrink(env, cli, min(nr, target), true);
-		*(int *)val -= nr;
+		*(long *)val -= nr;
 		return 0;
 	}
 
@@ -3014,8 +2771,9 @@ static int osc_reconnect(const struct lu_env *env,
 		long lost_grant;
 
 		spin_lock(&cli->cl_loi_list_lock);
-		data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
-				2 * cli_brw_size(obd);
+		data->ocd_grant = (cli->cl_avail_grant +
+				   (cli->cl_dirty_pages << PAGE_SHIFT)) ?:
+				   2 * cli_brw_size(obd);
 		lost_grant = cli->cl_lost_grant;
 		cli->cl_lost_grant = 0;
 		spin_unlock(&cli->cl_loi_list_lock);
@@ -3346,7 +3104,6 @@ static struct obd_ops osc_obd_ops = {
 	.disconnect     = osc_disconnect,
 	.statfs         = osc_statfs,
 	.statfs_async   = osc_statfs_async,
-	.packmd         = osc_packmd,
 	.unpackmd       = osc_unpackmd,
 	.create         = osc_create,
 	.destroy        = osc_destroy,
@@ -3354,7 +3111,6 @@ static struct obd_ops osc_obd_ops = {
 	.getattr_async  = osc_getattr_async,
 	.setattr        = osc_setattr,
 	.setattr_async  = osc_setattr_async,
-	.find_cbdata    = osc_find_cbdata,
 	.iocontrol      = osc_iocontrol,
 	.get_info       = osc_get_info,
 	.set_info_async = osc_set_info_async,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c
index d4463d7c81d2..8c51d51a678b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/client.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/client.c
@@ -45,6 +45,7 @@
 
 static int ptlrpc_send_new_req(struct ptlrpc_request *req);
 static int ptlrpcd_check_work(struct ptlrpc_request *req);
+static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async);
 
 /**
  * Initialize passed in client structure \a cl.
@@ -89,7 +90,6 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
 
 	return c;
 }
-EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
 
 /**
  * Allocate and initialize new bulk descriptor on the sender.
@@ -202,7 +202,7 @@ void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin)
 
 	if (unpin) {
 		for (i = 0; i < desc->bd_iov_count; i++)
-			put_page(desc->bd_iov[i].kiov_page);
+			put_page(desc->bd_iov[i].bv_page);
 	}
 
 	kfree(desc);
@@ -283,8 +283,8 @@ int ptlrpc_at_get_net_latency(struct ptlrpc_request *req)
 }
 
 /* Adjust expected network latency */
-static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
-				      unsigned int service_time)
+void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+			       unsigned int service_time)
 {
 	unsigned int nl, oldnl;
 	struct imp_at *at;
@@ -364,31 +364,37 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
 	}
 
 	rc = unpack_reply(early_req);
-	if (rc == 0) {
-		/* Expecting to increase the service time estimate here */
-		ptlrpc_at_adj_service(req,
-			lustre_msg_get_timeout(early_req->rq_repmsg));
-		ptlrpc_at_adj_net_latency(req,
-			lustre_msg_get_service_time(early_req->rq_repmsg));
-	}
-
-	sptlrpc_cli_finish_early_reply(early_req);
-
-	if (rc != 0) {
+	if (rc) {
+		sptlrpc_cli_finish_early_reply(early_req);
 		spin_lock(&req->rq_lock);
 		return rc;
 	}
 
-	/* Adjust the local timeout for this req */
-	ptlrpc_at_set_req_timeout(req);
+	/*
+	 * Use new timeout value just to adjust the local value for this
+	 * request, don't include it into at_history. It is unclear yet why
+	 * service time increased and should it be counted or skipped, e.g.
+	 * that can be recovery case or some error or server, the real reply
+	 * will add all new data if it is worth to add.
+	 */
+	req->rq_timeout = lustre_msg_get_timeout(early_req->rq_repmsg);
+	lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
+
+	/* Network latency can be adjusted, it is pure network delays */
+	ptlrpc_at_adj_net_latency(req,
+				  lustre_msg_get_service_time(early_req->rq_repmsg));
+
+	sptlrpc_cli_finish_early_reply(early_req);
 
 	spin_lock(&req->rq_lock);
 	olddl = req->rq_deadline;
 	/*
-	 * server assumes it now has rq_timeout from when it sent the
-	 * early reply, so client should give it at least that long.
+	 * server assumes it now has rq_timeout from when the request
+	 * arrived, so the client should give it at least that long.
+	 * since we don't know the arrival time we'll use the original
+	 * sent time
 	 */
-	req->rq_deadline = ktime_get_real_seconds() + req->rq_timeout +
+	req->rq_deadline = req->rq_sent + req->rq_timeout +
 			   ptlrpc_at_get_net_latency(req);
 
 	DEBUG_REQ(D_ADAPTTO, req,
@@ -884,7 +890,6 @@ struct ptlrpc_request_set *ptlrpc_prep_fcset(int max, set_producer_func func,
 
 	return set;
 }
-EXPORT_SYMBOL(ptlrpc_prep_fcset);
 
 /**
  * Wind down and free request set structure previously allocated with
@@ -1004,7 +1009,6 @@ void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
 			wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
 	}
 }
-EXPORT_SYMBOL(ptlrpc_set_add_new_req);
 
 /**
  * Based on the current state of the import, determine if the request
@@ -1035,8 +1039,8 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
 		*status = -EIO;
 	} else if (ptlrpc_send_limit_expired(req)) {
 		/* probably doesn't need to be a D_ERROR after initial testing */
-		DEBUG_REQ(D_ERROR, req, "send limit expired ");
-		*status = -EIO;
+		DEBUG_REQ(D_HA, req, "send limit expired ");
+		*status = -ETIMEDOUT;
 	} else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
 		   imp->imp_state == LUSTRE_IMP_CONNECTING) {
 		/* allow CONNECT even if import is invalid */
@@ -1073,36 +1077,42 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
 }
 
 /**
- * Decide if the error message regarding provided request \a req
- * should be printed to the console or not.
- * Makes it's decision on request status and other properties.
- * Returns 1 to print error on the system console or 0 if not.
+ * Decide if the error message should be printed to the console or not.
+ * Makes its decision based on request type, status, and failure frequency.
+ *
+ * \param[in] req  request that failed and may need a console message
+ *
+ * \retval false if no message should be printed
+ * \retval true  if console message should be printed
  */
-static int ptlrpc_console_allow(struct ptlrpc_request *req)
+static bool ptlrpc_console_allow(struct ptlrpc_request *req)
 {
 	__u32 opc;
-	int err;
 
 	LASSERT(req->rq_reqmsg);
 	opc = lustre_msg_get_opc(req->rq_reqmsg);
 
-	/*
-	 * Suppress particular reconnect errors which are to be expected.  No
-	 * errors are suppressed for the initial connection on an import
-	 */
-	if ((lustre_handle_is_used(&req->rq_import->imp_remote_handle)) &&
-	    (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT)) {
+	/* Suppress particular reconnect errors which are to be expected. */
+	if (opc == OST_CONNECT || opc == MDS_CONNECT || opc == MGS_CONNECT) {
+		int err;
+
 		/* Suppress timed out reconnect requests */
-		if (req->rq_timedout)
-			return 0;
+		if (lustre_handle_is_used(&req->rq_import->imp_remote_handle) ||
+		    req->rq_timedout)
+			return false;
 
-		/* Suppress unavailable/again reconnect requests */
+		/*
+		 * Suppress most unavailable/again reconnect requests, but
+		 * print occasionally so it is clear client is trying to
+		 * connect to a server where no target is running.
+		 */
 		err = lustre_msg_get_status(req->rq_repmsg);
-		if (err == -ENODEV || err == -EAGAIN)
-			return 0;
+		if ((err == -ENODEV || err == -EAGAIN) &&
+		    req->rq_import->imp_conn_cnt % 30 != 20)
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /**
@@ -1116,14 +1126,14 @@ static int ptlrpc_check_status(struct ptlrpc_request *req)
 	err = lustre_msg_get_status(req->rq_repmsg);
 	if (lustre_msg_get_type(req->rq_repmsg) == PTL_RPC_MSG_ERR) {
 		struct obd_import *imp = req->rq_import;
+		lnet_nid_t nid = imp->imp_connection->c_peer.nid;
 		__u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
 
 		if (ptlrpc_console_allow(req))
-			LCONSOLE_ERROR_MSG(0x011, "%s: Communicating with %s, operation %s failed with %d.\n",
+			LCONSOLE_ERROR_MSG(0x011, "%s: operation %s to node %s failed: rc = %d\n",
 					   imp->imp_obd->obd_name,
-					   libcfs_nid2str(
-						   imp->imp_connection->c_peer.nid),
-					   ll_opcode2str(opc), err);
+					   ll_opcode2str(opc),
+					   libcfs_nid2str(nid), err);
 		return err < 0 ? err : -EINVAL;
 	}
 
@@ -1280,7 +1290,7 @@ static int after_reply(struct ptlrpc_request *req)
 		 * some reason. Try to reconnect, and if that fails, punt to
 		 * the upcall.
 		 */
-		if (ll_rpc_recoverable_error(rc)) {
+		if (ptlrpc_recoverable_error(rc)) {
 			if (req->rq_send_state != LUSTRE_IMP_FULL ||
 			    imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) {
 				return rc;
@@ -1628,8 +1638,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
 			    req->rq_waiting || req->rq_wait_ctx) {
 				int status;
 
-				if (!ptlrpc_unregister_reply(req, 1))
+				if (!ptlrpc_unregister_reply(req, 1)) {
+					ptlrpc_unregister_bulk(req, 1);
 					continue;
+				}
 
 				spin_lock(&imp->imp_lock);
 				if (ptlrpc_import_delay_req(imp, req,
@@ -1995,7 +2007,6 @@ int ptlrpc_expired_set(void *data)
 	 */
 	return 1;
 }
-EXPORT_SYMBOL(ptlrpc_expired_set);
 
 /**
  * Sets rq_intr flag in \a req under spinlock.
@@ -2012,7 +2023,7 @@ EXPORT_SYMBOL(ptlrpc_mark_interrupted);
  * Interrupts (sets interrupted flag) all uncompleted requests in
  * a set \a data. Callback for l_wait_event for interruptible waits.
  */
-void ptlrpc_interrupted_set(void *data)
+static void ptlrpc_interrupted_set(void *data)
 {
 	struct ptlrpc_request_set *set = data;
 	struct list_head *tmp;
@@ -2030,7 +2041,6 @@ void ptlrpc_interrupted_set(void *data)
 		ptlrpc_mark_interrupted(req);
 	}
 }
-EXPORT_SYMBOL(ptlrpc_interrupted_set);
 
 /**
  * Get the smallest timeout in the set; this does NOT set a timeout.
@@ -2074,7 +2084,6 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
 	}
 	return timeout;
 }
-EXPORT_SYMBOL(ptlrpc_set_next_timeout);
 
 /**
  * Send all unset request from the set and then wait until all
@@ -2325,7 +2334,7 @@ EXPORT_SYMBOL(ptlrpc_req_xid);
  * The request owner (i.e. the thread doing the I/O) must call...
  * Returns 0 on success or 1 if unregistering cannot be made.
  */
-int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
+static int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 {
 	int rc;
 	wait_queue_head_t *wq;
@@ -2390,7 +2399,6 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(ptlrpc_unregister_reply);
 
 static void ptlrpc_free_request(struct ptlrpc_request *req)
 {
@@ -2451,7 +2459,8 @@ void ptlrpc_free_committed(struct obd_import *imp)
 	       imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
 	       imp->imp_generation);
 
-	if (imp->imp_generation != imp->imp_last_generation_checked)
+	if (imp->imp_generation != imp->imp_last_generation_checked ||
+	    !imp->imp_last_transno_checked)
 		skip_committed_list = false;
 
 	imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
@@ -2499,6 +2508,9 @@ free_req:
 		if (req->rq_import_generation < imp->imp_generation) {
 			DEBUG_REQ(D_RPCTRACE, req, "free stale open request");
 			ptlrpc_free_request(req);
+		} else if (!req->rq_replay) {
+			DEBUG_REQ(D_RPCTRACE, req, "free closed open request");
+			ptlrpc_free_request(req);
 		}
 	}
 }
@@ -2541,7 +2553,6 @@ void ptlrpc_resend_req(struct ptlrpc_request *req)
 	ptlrpc_client_wake_req(req);
 	spin_unlock(&req->rq_lock);
 }
-EXPORT_SYMBOL(ptlrpc_resend_req);
 
 /**
  * Grab additional reference on a request \a req
@@ -2610,7 +2621,6 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
 
 	list_add(&req->rq_replay_list, &imp->imp_replay_list);
 }
-EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
 
 /**
  * Send request and wait until it completes.
@@ -2783,7 +2793,6 @@ int ptlrpc_replay_req(struct ptlrpc_request *req)
 	ptlrpcd_add_req(req);
 	return 0;
 }
-EXPORT_SYMBOL(ptlrpc_replay_req);
 
 /**
  * Aborts all in-flight request on import \a imp sending and delayed lists
@@ -2843,7 +2852,6 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
 
 	spin_unlock(&imp->imp_lock);
 }
-EXPORT_SYMBOL(ptlrpc_abort_inflight);
 
 /**
  * Abort all uncompleted requests in request set \a set
@@ -2929,7 +2937,6 @@ __u64 ptlrpc_next_xid(void)
 
 	return next;
 }
-EXPORT_SYMBOL(ptlrpc_next_xid);
 
 /**
  * Get a glimpse at what next xid value might have been.
diff --git a/drivers/staging/lustre/lustre/ptlrpc/connection.c b/drivers/staging/lustre/lustre/ptlrpc/connection.c
index 177a379da9fa..7b020d60c9e5 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/connection.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/connection.c
@@ -82,7 +82,6 @@ out:
 	       libcfs_nid2str(conn->c_peer.nid));
 	return conn;
 }
-EXPORT_SYMBOL(ptlrpc_connection_get);
 
 int ptlrpc_connection_put(struct ptlrpc_connection *conn)
 {
@@ -118,7 +117,6 @@ int ptlrpc_connection_put(struct ptlrpc_connection *conn)
 
 	return rc;
 }
-EXPORT_SYMBOL(ptlrpc_connection_put);
 
 struct ptlrpc_connection *
 ptlrpc_connection_addref(struct ptlrpc_connection *conn)
@@ -130,7 +128,6 @@ ptlrpc_connection_addref(struct ptlrpc_connection *conn)
 
 	return conn;
 }
-EXPORT_SYMBOL(ptlrpc_connection_addref);
 
 int ptlrpc_connection_init(void)
 {
@@ -146,13 +143,11 @@ int ptlrpc_connection_init(void)
 
 	return 0;
 }
-EXPORT_SYMBOL(ptlrpc_connection_init);
 
 void ptlrpc_connection_fini(void)
 {
 	cfs_hash_putref(conn_hash);
 }
-EXPORT_SYMBOL(ptlrpc_connection_fini);
 
 /*
  * Hash operations for net_peer<->connection
diff --git a/drivers/staging/lustre/lustre/ptlrpc/events.c b/drivers/staging/lustre/lustre/ptlrpc/events.c
index b1ce72511509..283dfb296d35 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/events.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/events.c
@@ -543,7 +543,7 @@ static int ptlrpc_ni_init(void)
 	rc = LNetNIInit(pid);
 	if (rc < 0) {
 		CDEBUG(D_NET, "Can't init network interface: %d\n", rc);
-		return -ENOENT;
+		return rc;
 	}
 
 	/* CAVEAT EMPTOR: how we process portals events is _radically_
@@ -561,7 +561,7 @@ static int ptlrpc_ni_init(void)
 	CERROR("Failed to allocate event queue: %d\n", rc);
 	LNetNIFini();
 
-	return -ENOMEM;
+	return rc;
 }
 
 int ptlrpc_init_portals(void)
@@ -570,7 +570,7 @@ int ptlrpc_init_portals(void)
 
 	if (rc != 0) {
 		CERROR("network initialisation failed\n");
-		return -EIO;
+		return rc;
 	}
 	rc = ptlrpcd_addref();
 	if (rc == 0)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c
index 3292e6ea0102..a23d0a05b574 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/import.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/import.c
@@ -307,7 +307,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
 		 */
 		lwi = LWI_TIMEOUT_INTERVAL(
 			cfs_timeout_cap(cfs_time_seconds(timeout)),
-			(timeout > 1)?cfs_time_seconds(1):cfs_time_seconds(1)/2,
+			(timeout > 1) ? cfs_time_seconds(1) :
+			cfs_time_seconds(1) / 2,
 			NULL, NULL);
 		rc = l_wait_event(imp->imp_recovery_waitq,
 				  (atomic_read(&imp->imp_inflight) == 0),
@@ -424,7 +425,6 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 		ptlrpc_pinger_force(imp);
 	}
 }
-EXPORT_SYMBOL(ptlrpc_fail_import);
 
 int ptlrpc_reconnect_import(struct obd_import *imp)
 {
@@ -698,7 +698,8 @@ int ptlrpc_connect_import(struct obd_import *imp)
 	request->rq_send_state = LUSTRE_IMP_CONNECTING;
 	/* Allow a slightly larger reply for future growth compatibility */
 	req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
-			     sizeof(struct obd_connect_data)+16*sizeof(__u64));
+			     sizeof(struct obd_connect_data) +
+			     16 * sizeof(__u64));
 	ptlrpc_request_set_replen(request);
 	request->rq_interpret_reply = ptlrpc_connect_interpret;
 
@@ -750,6 +751,153 @@ static int ptlrpc_busy_reconnect(int rc)
 	return (rc == -EBUSY) || (rc == -EAGAIN);
 }
 
+static int ptlrpc_connect_set_flags(struct obd_import *imp,
+				    struct obd_connect_data *ocd,
+				    u64 old_connect_flags,
+				    struct obd_export *exp, int init_connect)
+{
+	struct client_obd *cli = &imp->imp_obd->u.cli;
+	static bool warned;
+
+	if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
+	    !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
+		LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %#llx, replied %#llx\n",
+			      imp->imp_obd->obd_name,
+			      imp->imp_connection->c_remote_uuid.uuid,
+			      imp->imp_connect_flags_orig,
+			      ocd->ocd_connect_flags);
+		return -EPROTO;
+	}
+
+	spin_lock(&imp->imp_lock);
+	list_del(&imp->imp_conn_current->oic_item);
+	list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list);
+	imp->imp_last_success_conn = imp->imp_conn_current->oic_last_attempt;
+
+	spin_unlock(&imp->imp_lock);
+
+	if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+	    (ocd->ocd_version > LUSTRE_VERSION_CODE +
+				LUSTRE_VERSION_OFFSET_WARN ||
+	     ocd->ocd_version < LUSTRE_VERSION_CODE -
+				LUSTRE_VERSION_OFFSET_WARN)) {
+		/*
+		 * Sigh, some compilers do not like #ifdef in the middle
+		 * of macro arguments
+		 */
+		const char *older = "older than client. Consider upgrading server";
+		const char *newer = "newer than client. Consider recompiling application";
+
+		LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
+			      obd2cli_tgt(imp->imp_obd),
+			      OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
+			      OBD_OCD_VERSION_MINOR(ocd->ocd_version),
+			      OBD_OCD_VERSION_PATCH(ocd->ocd_version),
+			      OBD_OCD_VERSION_FIX(ocd->ocd_version),
+			      ocd->ocd_version > LUSTRE_VERSION_CODE ?
+			      newer : older, LUSTRE_VERSION_STRING);
+		warned = true;
+	}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+	/*
+	 * Check if server has LU-1252 fix applied to not always swab
+	 * the IR MNE entries. Do this only once per connection.  This
+	 * fixup is version-limited, because we don't want to carry the
+	 * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
+	 * need interop with unpatched 2.2 servers.  For newer servers,
+	 * the client will do MNE swabbing only as needed.  LU-1644
+	 */
+	if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
+		     !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
+		     OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
+		     OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
+		     OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
+		     !strcmp(imp->imp_obd->obd_type->typ_name,
+			     LUSTRE_MGC_NAME)))
+		imp->imp_need_mne_swab = 1;
+	else /* clear if server was upgraded since last connect */
+		imp->imp_need_mne_swab = 0;
+#endif
+
+	if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
+		/*
+		 * We sent to the server ocd_cksum_types with bits set
+		 * for algorithms we understand. The server masked off
+		 * the checksum types it doesn't support
+		 */
+		if (!(ocd->ocd_cksum_types & cksum_types_supported_client())) {
+			LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
+				      obd2cli_tgt(imp->imp_obd),
+				      ocd->ocd_cksum_types,
+				      cksum_types_supported_client());
+			cli->cl_checksum = 0;
+			cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+		} else {
+			cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
+		}
+	} else {
+		/*
+		 * The server does not support OBD_CONNECT_CKSUM.
+		 * Enforce ADLER for backward compatibility
+		 */
+		cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
+	}
+	cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
+
+	if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
+		cli->cl_max_pages_per_rpc =
+			min(ocd->ocd_brw_size >> PAGE_SHIFT,
+			    cli->cl_max_pages_per_rpc);
+	else if (imp->imp_connect_op == MDS_CONNECT ||
+		 imp->imp_connect_op == MGS_CONNECT)
+		cli->cl_max_pages_per_rpc = 1;
+
+	LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
+		(cli->cl_max_pages_per_rpc > 0));
+
+	client_adjust_max_dirty(cli);
+
+	/*
+	 * Reset ns_connect_flags only for initial connect. It might be
+	 * changed in while using FS and if we reset it in reconnect
+	 * this leads to losing user settings done before such as
+	 * disable lru_resize, etc.
+	 */
+	if (old_connect_flags != exp_connect_flags(exp) || init_connect) {
+		CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
+		       imp->imp_obd->obd_name, ocd->ocd_connect_flags);
+		imp->imp_obd->obd_namespace->ns_connect_flags =
+			ocd->ocd_connect_flags;
+		imp->imp_obd->obd_namespace->ns_orig_connect_flags =
+			ocd->ocd_connect_flags;
+	}
+
+	if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
+	    (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+		/*
+		 * We need a per-message support flag, because
+		 * a. we don't know if the incoming connect reply
+		 *    supports AT or not (in reply_in_callback)
+		 *    until we unpack it.
+		 * b. failovered server means export and flags are gone
+		 *    (in ptlrpc_send_reply).
+		 *    Can only be set when we know AT is supported at
+		 *    both ends
+		 */
+		imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
+	else
+		imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+
+	if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
+	    (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
+		imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
+	else
+		imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+	return 0;
+}
+
 /**
  * interpret_reply callback for connect RPCs.
  * Looks into returned status of connect operation and decides
@@ -762,7 +910,6 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
 {
 	struct ptlrpc_connect_async_args *aa = data;
 	struct obd_import *imp = request->rq_import;
-	struct client_obd *cli = &imp->imp_obd->u.cli;
 	struct lustre_handle old_hdl;
 	__u64 old_connect_flags;
 	int msg_flags;
@@ -842,7 +989,21 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
 	old_connect_flags = exp_connect_flags(exp);
 	exp->exp_connect_data = *ocd;
 	imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
+
+	/*
+	 * The net statistics after (re-)connect is not valid anymore,
+	 * because may reflect other routing, etc.
+	 */
+	at_init(&imp->imp_at.iat_net_latency, 0, 0);
+	ptlrpc_at_adj_net_latency(request,
+				  lustre_msg_get_service_time(request->rq_repmsg));
+
+	/* Import flags should be updated before waking import at FULL state */
+	rc = ptlrpc_connect_set_flags(imp, ocd, old_connect_flags, exp,
+				      aa->pcaa_initial_connect);
 	class_export_put(exp);
+	if (rc)
+		goto out;
 
 	obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
 
@@ -987,151 +1148,13 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
 
 finish:
 	rc = ptlrpc_import_recovery_state_machine(imp);
-	if (rc != 0) {
-		if (rc == -ENOTCONN) {
-			CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
-			       obd2cli_tgt(imp->imp_obd),
-			       imp->imp_connection->c_remote_uuid.uuid);
-			ptlrpc_connect_import(imp);
-			imp->imp_connect_tried = 1;
-			return 0;
-		}
-	} else {
-		static bool warned;
-
-		spin_lock(&imp->imp_lock);
-		list_del(&imp->imp_conn_current->oic_item);
-		list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list);
-		imp->imp_last_success_conn =
-			imp->imp_conn_current->oic_last_attempt;
-
-		spin_unlock(&imp->imp_lock);
-
-		if ((imp->imp_connect_flags_orig & OBD_CONNECT_IBITS) &&
-		    !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
-			LCONSOLE_WARN("%s: MDS %s does not support ibits lock, either very old or invalid: requested %llx, replied %llx\n",
-				      imp->imp_obd->obd_name,
-				      imp->imp_connection->c_remote_uuid.uuid,
-				      imp->imp_connect_flags_orig,
-				      ocd->ocd_connect_flags);
-			rc = -EPROTO;
-			goto out;
-		}
-
-		if (!warned && (ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
-		    (ocd->ocd_version > LUSTRE_VERSION_CODE +
-					LUSTRE_VERSION_OFFSET_WARN ||
-		     ocd->ocd_version < LUSTRE_VERSION_CODE -
-					LUSTRE_VERSION_OFFSET_WARN)) {
-			/* Sigh, some compilers do not like #ifdef in the middle
-			 * of macro arguments
-			 */
-			const char *older = "older than client. Consider upgrading server";
-			const char *newer = "newer than client. Consider recompiling application";
-
-			LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) is much %s (%s)\n",
-				      obd2cli_tgt(imp->imp_obd),
-				      OBD_OCD_VERSION_MAJOR(ocd->ocd_version),
-				      OBD_OCD_VERSION_MINOR(ocd->ocd_version),
-				      OBD_OCD_VERSION_PATCH(ocd->ocd_version),
-				      OBD_OCD_VERSION_FIX(ocd->ocd_version),
-				      ocd->ocd_version > LUSTRE_VERSION_CODE ?
-				      newer : older, LUSTRE_VERSION_STRING);
-			warned = true;
-		}
-
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
-		/* Check if server has LU-1252 fix applied to not always swab
-		 * the IR MNE entries. Do this only once per connection.  This
-		 * fixup is version-limited, because we don't want to carry the
-		 * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we
-		 * need interop with unpatched 2.2 servers.  For newer servers,
-		 * the client will do MNE swabbing only as needed.  LU-1644
-		 */
-		if (unlikely((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
-			     !(ocd->ocd_connect_flags & OBD_CONNECT_MNE_SWAB) &&
-			     OBD_OCD_VERSION_MAJOR(ocd->ocd_version) == 2 &&
-			     OBD_OCD_VERSION_MINOR(ocd->ocd_version) == 2 &&
-			     OBD_OCD_VERSION_PATCH(ocd->ocd_version) < 55 &&
-			     strcmp(imp->imp_obd->obd_type->typ_name,
-				    LUSTRE_MGC_NAME) == 0))
-			imp->imp_need_mne_swab = 1;
-		else /* clear if server was upgraded since last connect */
-			imp->imp_need_mne_swab = 0;
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
-#endif
-
-		if (ocd->ocd_connect_flags & OBD_CONNECT_CKSUM) {
-			/* We sent to the server ocd_cksum_types with bits set
-			 * for algorithms we understand. The server masked off
-			 * the checksum types it doesn't support
-			 */
-			if ((ocd->ocd_cksum_types &
-			     cksum_types_supported_client()) == 0) {
-				LCONSOLE_WARN("The negotiation of the checksum algorithm to use with server %s failed (%x/%x), disabling checksums\n",
-					      obd2cli_tgt(imp->imp_obd),
-					      ocd->ocd_cksum_types,
-					      cksum_types_supported_client());
-				cli->cl_checksum = 0;
-				cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
-			} else {
-				cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
-			}
-		} else {
-			/* The server does not support OBD_CONNECT_CKSUM.
-			 * Enforce ADLER for backward compatibility
-			 */
-			cli->cl_supp_cksum_types = OBD_CKSUM_ADLER;
-		}
-		cli->cl_cksum_type = cksum_type_select(cli->cl_supp_cksum_types);
-
-		if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
-			cli->cl_max_pages_per_rpc =
-				min(ocd->ocd_brw_size >> PAGE_SHIFT,
-				    cli->cl_max_pages_per_rpc);
-		else if (imp->imp_connect_op == MDS_CONNECT ||
-			 imp->imp_connect_op == MGS_CONNECT)
-			cli->cl_max_pages_per_rpc = 1;
-
-		/* Reset ns_connect_flags only for initial connect. It might be
-		 * changed in while using FS and if we reset it in reconnect
-		 * this leads to losing user settings done before such as
-		 * disable lru_resize, etc.
-		 */
-		if (old_connect_flags != exp_connect_flags(exp) ||
-		    aa->pcaa_initial_connect) {
-			CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server flags: %#llx\n",
-			       imp->imp_obd->obd_name, ocd->ocd_connect_flags);
-			imp->imp_obd->obd_namespace->ns_connect_flags =
-				ocd->ocd_connect_flags;
-			imp->imp_obd->obd_namespace->ns_orig_connect_flags =
-				ocd->ocd_connect_flags;
-		}
-
-		if ((ocd->ocd_connect_flags & OBD_CONNECT_AT) &&
-		    (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
-			/* We need a per-message support flag, because
-			 * a. we don't know if the incoming connect reply
-			 *    supports AT or not (in reply_in_callback)
-			 *    until we unpack it.
-			 * b. failovered server means export and flags are gone
-			 *    (in ptlrpc_send_reply).
-			 * Can only be set when we know AT is supported at
-			 * both ends
-			 */
-			imp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
-		else
-			imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
-
-		if ((ocd->ocd_connect_flags & OBD_CONNECT_FULL20) &&
-		    (imp->imp_msg_magic == LUSTRE_MSG_MAGIC_V2))
-			imp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
-		else
-			imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
-
-		LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
-			(cli->cl_max_pages_per_rpc > 0));
+	if (rc == -ENOTCONN) {
+		CDEBUG(D_HA, "evicted/aborted by %s@%s during recovery; invalidating and reconnecting\n",
+		       obd2cli_tgt(imp->imp_obd),
+		       imp->imp_connection->c_remote_uuid.uuid);
+		ptlrpc_connect_import(imp);
+		imp->imp_connect_tried = 1;
+		return 0;
 	}
 
 out:
@@ -1497,10 +1520,13 @@ EXPORT_SYMBOL(ptlrpc_disconnect_import);
 /* Adaptive Timeout utils */
 extern unsigned int at_min, at_max, at_history;
 
-/* Bin into timeslices using AT_BINS bins.
- * This gives us a max of the last binlimit*AT_BINS secs without the storage,
- * but still smoothing out a return to normalcy from a slow response.
- * (E.g. remember the maximum latency in each minute of the last 4 minutes.)
+/*
+ *Update at_current with the specified value (bounded by at_min and at_max),
+ * as well as the AT history "bins".
+ *  - Bin into timeslices using AT_BINS bins.
+ *  - This gives us a max of the last at_history seconds without the storage,
+ *    but still smoothing out a return to normalcy from a slow response.
+ *  - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
  */
 int at_measured(struct adaptive_timeout *at, unsigned int val)
 {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/layout.c b/drivers/staging/lustre/lustre/ptlrpc/layout.c
index ab5d85174245..839ef3e80c1a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/layout.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/layout.c
@@ -667,11 +667,8 @@ static struct req_format *req_formats[] = {
 	&RQF_MDS_SYNC,
 	&RQF_MDS_CLOSE,
 	&RQF_MDS_RELEASE_CLOSE,
-	&RQF_MDS_PIN,
-	&RQF_MDS_UNPIN,
 	&RQF_MDS_READPAGE,
 	&RQF_MDS_WRITEPAGE,
-	&RQF_MDS_IS_SUBDIR,
 	&RQF_MDS_DONE_WRITING,
 	&RQF_MDS_REINT,
 	&RQF_MDS_REINT_CREATE,
@@ -1116,9 +1113,9 @@ EXPORT_SYMBOL(RMF_SWAP_LAYOUTS);
 
 struct req_format {
 	const char *rf_name;
-	int rf_idx;
+	size_t rf_idx;
 	struct {
-		int nr;
+		size_t nr;
 		const struct req_msg_field **d;
 	} rf_fields[RCL_NR];
 };
@@ -1389,15 +1386,6 @@ struct req_format RQF_MDS_RELEASE_CLOSE =
 			mdt_release_close_client, mds_last_unlink_server);
 EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE);
 
-struct req_format RQF_MDS_PIN =
-	DEFINE_REQ_FMT0("MDS_PIN",
-			mdt_body_capa, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_PIN);
-
-struct req_format RQF_MDS_UNPIN =
-	DEFINE_REQ_FMT0("MDS_UNPIN", mdt_body_only, empty);
-EXPORT_SYMBOL(RQF_MDS_UNPIN);
-
 struct req_format RQF_MDS_DONE_WRITING =
 	DEFINE_REQ_FMT0("MDS_DONE_WRITING",
 			mdt_close_client, mdt_body_only);
@@ -1448,11 +1436,6 @@ struct req_format RQF_MDS_WRITEPAGE =
 			mdt_body_capa, mdt_body_only);
 EXPORT_SYMBOL(RQF_MDS_WRITEPAGE);
 
-struct req_format RQF_MDS_IS_SUBDIR =
-	DEFINE_REQ_FMT0("MDS_IS_SUBDIR",
-			mdt_body_only, mdt_body_only);
-EXPORT_SYMBOL(RQF_MDS_IS_SUBDIR);
-
 struct req_format RQF_LLOG_ORIGIN_HANDLE_CREATE =
 	DEFINE_REQ_FMT0("LLOG_ORIGIN_HANDLE_CREATE",
 			llog_origin_handle_create_client, llogd_body_only);
@@ -1572,9 +1555,9 @@ EXPORT_SYMBOL(RQF_OST_GET_INFO_FIEMAP);
  */
 int req_layout_init(void)
 {
-	int i;
-	int j;
-	int k;
+	size_t i;
+	size_t j;
+	size_t k;
 	struct req_format *rf = NULL;
 
 	for (i = 0; i < ARRAY_SIZE(req_formats); ++i) {
@@ -1616,7 +1599,7 @@ EXPORT_SYMBOL(req_layout_fini);
  */
 static void req_capsule_init_area(struct req_capsule *pill)
 {
-	int i;
+	size_t i;
 
 	for (i = 0; i < ARRAY_SIZE(pill->rc_area[RCL_CLIENT]); i++) {
 		pill->rc_area[RCL_CLIENT][i] = -1;
@@ -1667,8 +1650,7 @@ EXPORT_SYMBOL(req_capsule_fini);
 
 static int __req_format_is_sane(const struct req_format *fmt)
 {
-	return
-		0 <= fmt->rf_idx && fmt->rf_idx < ARRAY_SIZE(req_formats) &&
+	return fmt->rf_idx < ARRAY_SIZE(req_formats) &&
 		req_formats[fmt->rf_idx] == fmt;
 }
 
@@ -1702,11 +1684,11 @@ EXPORT_SYMBOL(req_capsule_set);
  * variable-sized fields.  The field sizes come from the declared \a rmf_size
  * field of a \a pill's \a rc_fmt's RMF's.
  */
-int req_capsule_filled_sizes(struct req_capsule *pill,
-			     enum req_location loc)
+size_t req_capsule_filled_sizes(struct req_capsule *pill,
+				enum req_location loc)
 {
 	const struct req_format *fmt = pill->rc_fmt;
-	int i;
+	size_t i;
 
 	for (i = 0; i < fmt->rf_fields[loc].nr; ++i) {
 		if (pill->rc_area[loc][i] == -1) {
@@ -1761,11 +1743,11 @@ EXPORT_SYMBOL(req_capsule_server_pack);
  * Returns the PTLRPC request or reply (\a loc) buffer offset of a \a pill
  * corresponding to the given RMF (\a field).
  */
-static int __req_capsule_offset(const struct req_capsule *pill,
+static u32 __req_capsule_offset(const struct req_capsule *pill,
 				const struct req_msg_field *field,
 				enum req_location loc)
 {
-	int offset;
+	u32 offset;
 
 	offset = field->rmf_offset[pill->rc_fmt->rf_idx][loc];
 	LASSERTF(offset > 0, "%s:%s, off=%d, loc=%d\n", pill->rc_fmt->rf_name,
@@ -1869,10 +1851,10 @@ static void *__req_capsule_get(struct req_capsule *pill,
 	const struct req_format *fmt;
 	struct lustre_msg *msg;
 	void *value;
-	int len;
-	int offset;
+	u32 len;
+	u32 offset;
 
-	void *(*getter)(struct lustre_msg *m, int n, int minlen);
+	void *(*getter)(struct lustre_msg *m, u32 n, u32 minlen);
 
 	static const char *rcl_names[RCL_NR] = {
 		[RCL_CLIENT] = "client",
@@ -1899,20 +1881,20 @@ static void *__req_capsule_get(struct req_capsule *pill,
 		 */
 		len = lustre_msg_buflen(msg, offset);
 		if ((len % field->rmf_size) != 0) {
-			CERROR("%s: array field size mismatch %d modulo %d != 0 (%d)\n",
+			CERROR("%s: array field size mismatch %d modulo %u != 0 (%d)\n",
 			       field->rmf_name, len, field->rmf_size, loc);
 			return NULL;
 		}
 	} else if (pill->rc_area[loc][offset] != -1) {
 		len = pill->rc_area[loc][offset];
 	} else {
-		len = max(field->rmf_size, 0);
+		len = max_t(typeof(field->rmf_size), field->rmf_size, 0);
 	}
 	value = getter(msg, offset, len);
 
 	if (!value) {
 		DEBUG_REQ(D_ERROR, pill->rc_req,
-			  "Wrong buffer for field `%s' (%d of %d) in format `%s': %d vs. %d (%s)\n",
+			  "Wrong buffer for field `%s' (%u of %u) in format `%s': %u vs. %u (%s)\n",
 			  field->rmf_name, offset, lustre_msg_bufcount(msg),
 			  fmt->rf_name, lustre_msg_buflen(msg, offset), len,
 			  rcl_names[loc]);
@@ -1958,7 +1940,7 @@ EXPORT_SYMBOL(req_capsule_client_swab_get);
  */
 void *req_capsule_client_sized_get(struct req_capsule *pill,
 				   const struct req_msg_field *field,
-				   int len)
+				   u32 len)
 {
 	req_capsule_set_size(pill, field, RCL_CLIENT, len);
 	return __req_capsule_get(pill, field, RCL_CLIENT, NULL, 0);
@@ -1999,7 +1981,7 @@ EXPORT_SYMBOL(req_capsule_server_swab_get);
  */
 void *req_capsule_server_sized_get(struct req_capsule *pill,
 				   const struct req_msg_field *field,
-				   int len)
+				   u32 len)
 {
 	req_capsule_set_size(pill, field, RCL_SERVER, len);
 	return __req_capsule_get(pill, field, RCL_SERVER, NULL, 0);
@@ -2008,7 +1990,7 @@ EXPORT_SYMBOL(req_capsule_server_sized_get);
 
 void *req_capsule_server_sized_swab_get(struct req_capsule *pill,
 					const struct req_msg_field *field,
-					int len, void *swabber)
+					u32 len, void *swabber)
 {
 	req_capsule_set_size(pill, field, RCL_SERVER, len);
 	return __req_capsule_get(pill, field, RCL_SERVER, swabber, 0);
@@ -2024,23 +2006,25 @@ EXPORT_SYMBOL(req_capsule_server_sized_swab_get);
  */
 void req_capsule_set_size(struct req_capsule *pill,
 			  const struct req_msg_field *field,
-			  enum req_location loc, int size)
+			  enum req_location loc, u32 size)
 {
 	LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
 
-	if ((size != field->rmf_size) &&
+	if ((size != (u32)field->rmf_size) &&
 	    (field->rmf_size != -1) &&
 	    !(field->rmf_flags & RMF_F_NO_SIZE_CHECK) &&
 	    (size > 0)) {
+		u32 rmf_size = (u32)field->rmf_size;
+
 		if ((field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
-		    (size % field->rmf_size != 0)) {
-			CERROR("%s: array field size mismatch %d %% %d != 0 (%d)\n",
-			       field->rmf_name, size, field->rmf_size, loc);
+		    (size % rmf_size != 0)) {
+			CERROR("%s: array field size mismatch %u %% %u != 0 (%d)\n",
+			       field->rmf_name, size, rmf_size, loc);
 			LBUG();
 		} else if (!(field->rmf_flags & RMF_F_STRUCT_ARRAY) &&
-		    size < field->rmf_size) {
-			CERROR("%s: field size mismatch %d != %d (%d)\n",
-			       field->rmf_name, size, field->rmf_size, loc);
+			   size < rmf_size) {
+			CERROR("%s: field size mismatch %u != %u (%d)\n",
+			       field->rmf_name, size, rmf_size, loc);
 			LBUG();
 		}
 	}
@@ -2057,7 +2041,7 @@ EXPORT_SYMBOL(req_capsule_set_size);
  * actually sets the size in pill.rc_area[loc][offset], but this function
  * returns the message buflen[offset], maybe we should use another name.
  */
-int req_capsule_get_size(const struct req_capsule *pill,
+u32 req_capsule_get_size(const struct req_capsule *pill,
 			 const struct req_msg_field *field,
 			 enum req_location loc)
 {
@@ -2075,7 +2059,7 @@ EXPORT_SYMBOL(req_capsule_get_size);
  *
  * See also req_capsule_set_size().
  */
-int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
+u32 req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
 {
 	return lustre_msg_size(pill->rc_req->rq_import->imp_msg_magic,
 			       pill->rc_fmt->rf_fields[loc].nr,
@@ -2090,10 +2074,11 @@ int req_capsule_msg_size(struct req_capsule *pill, enum req_location loc)
  * This function should not be used for formats which contain variable size
  * fields.
  */
-int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
+u32 req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
 			 enum req_location loc)
 {
-	int size, i = 0;
+	size_t i = 0;
+	u32 size;
 
 	/*
 	 * This function should probably LASSERT() that fmt has no fields with
@@ -2103,7 +2088,7 @@ int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
 	 * we do.
 	 */
 	size = lustre_msg_hdr_size(magic, fmt->rf_fields[loc].nr);
-	if (size < 0)
+	if (!size)
 		return size;
 
 	for (; i < fmt->rf_fields[loc].nr; ++i)
@@ -2135,7 +2120,7 @@ int req_capsule_fmt_size(__u32 magic, const struct req_format *fmt,
 void req_capsule_extend(struct req_capsule *pill, const struct req_format *fmt)
 {
 	int i;
-	int j;
+	size_t j;
 
 	const struct req_format *old;
 
@@ -2193,7 +2178,7 @@ static int req_capsule_field_present(const struct req_capsule *pill,
 				     const struct req_msg_field *field,
 				     enum req_location loc)
 {
-	int offset;
+	u32 offset;
 
 	LASSERT(loc == RCL_SERVER || loc == RCL_CLIENT);
 	LASSERT(req_capsule_has_field(pill, field, loc));
@@ -2210,12 +2195,11 @@ static int req_capsule_field_present(const struct req_capsule *pill,
  */
 void req_capsule_shrink(struct req_capsule *pill,
 			const struct req_msg_field *field,
-			unsigned int newlen,
-			enum req_location loc)
+			u32 newlen, enum req_location loc)
 {
 	const struct req_format *fmt;
 	struct lustre_msg *msg;
-	int len;
+	u32 len;
 	int offset;
 
 	fmt = pill->rc_fmt;
@@ -2228,7 +2212,7 @@ void req_capsule_shrink(struct req_capsule *pill,
 
 	msg = __req_msg(pill, loc);
 	len = lustre_msg_buflen(msg, offset);
-	LASSERTF(newlen <= len, "%s:%s, oldlen=%d, newlen=%d\n",
+	LASSERTF(newlen <= len, "%s:%s, oldlen=%u, newlen=%u\n",
 		 fmt->rf_name, field->rmf_name, len, newlen);
 
 	if (loc == RCL_CLIENT)
diff --git a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
index bc93b75744e1..9bad57d65db4 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/lproc_ptlrpc.c
@@ -191,7 +191,7 @@ ptlrpc_ldebugfs_register(struct dentry *root, char *dir,
 	LASSERT(!*debugfs_root_ret);
 	LASSERT(!*stats_ret);
 
-	svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES+LUSTRE_MAX_OPCODES,
+	svc_stats = lprocfs_alloc_stats(EXTRA_MAX_OPCODES + LUSTRE_MAX_OPCODES,
 					0);
 	if (!svc_stats)
 		return;
@@ -937,7 +937,7 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
 static int
 ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file)
 {
-	static struct seq_operations sops = {
+	static const struct seq_operations sops = {
 		.start = ptlrpc_lprocfs_svc_req_history_start,
 		.stop  = ptlrpc_lprocfs_svc_req_history_stop,
 		.next  = ptlrpc_lprocfs_svc_req_history_next,
diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
index 11ec82545347..9c937398a085 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c
@@ -295,7 +295,6 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(ptlrpc_unregister_bulk);
 
 static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
 {
@@ -398,7 +397,8 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
 	lustre_msg_set_status(req->rq_repmsg,
 			      ptlrpc_status_hton(req->rq_status));
 	lustre_msg_set_opc(req->rq_repmsg,
-		req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
+			   req->rq_reqmsg ?
+			   lustre_msg_get_opc(req->rq_reqmsg) : 0);
 
 	target_pack_pool_reply(req);
 
@@ -433,7 +433,6 @@ out:
 	ptlrpc_connection_put(conn);
 	return rc;
 }
-EXPORT_SYMBOL(ptlrpc_send_reply);
 
 int ptlrpc_reply(struct ptlrpc_request *req)
 {
@@ -441,7 +440,6 @@ int ptlrpc_reply(struct ptlrpc_request *req)
 		return 0;
 	return ptlrpc_send_reply(req, 0);
 }
-EXPORT_SYMBOL(ptlrpc_reply);
 
 /**
  * For request \a req send an error reply back. Create empty
@@ -468,13 +466,11 @@ int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
 	rc = ptlrpc_send_reply(req, may_be_difficult);
 	return rc;
 }
-EXPORT_SYMBOL(ptlrpc_send_error);
 
 int ptlrpc_error(struct ptlrpc_request *req)
 {
 	return ptlrpc_send_error(req, 0);
 }
-EXPORT_SYMBOL(ptlrpc_error);
 
 /**
  * Send request \a request.
@@ -490,7 +486,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 	struct ptlrpc_connection *connection;
 	lnet_handle_me_t reply_me_h;
 	lnet_md_t reply_md;
-	struct obd_device *obd = request->rq_import->imp_obd;
+	struct obd_import *imp = request->rq_import;
+	struct obd_device *obd = imp->imp_obd;
 
 	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
 		return 0;
@@ -503,7 +500,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 	 */
 	LASSERT(!request->rq_receiving_reply);
 	LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
-		  (request->rq_import->imp_state == LUSTRE_IMP_FULL)));
+		  (imp->imp_state == LUSTRE_IMP_FULL)));
 
 	if (unlikely(obd && obd->obd_fail)) {
 		CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
@@ -516,15 +513,22 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 		return -ENODEV;
 	}
 
-	connection = request->rq_import->imp_connection;
+	connection = imp->imp_connection;
 
 	lustre_msg_set_handle(request->rq_reqmsg,
-			      &request->rq_import->imp_remote_handle);
+			      &imp->imp_remote_handle);
 	lustre_msg_set_type(request->rq_reqmsg, PTL_RPC_MSG_REQUEST);
-	lustre_msg_set_conn_cnt(request->rq_reqmsg,
-				request->rq_import->imp_conn_cnt);
-	lustre_msghdr_set_flags(request->rq_reqmsg,
-				request->rq_import->imp_msghdr_flags);
+	lustre_msg_set_conn_cnt(request->rq_reqmsg, imp->imp_conn_cnt);
+	lustre_msghdr_set_flags(request->rq_reqmsg, imp->imp_msghdr_flags);
+
+	/**
+	 * For enabled AT all request should have AT_SUPPORT in the
+	 * FULL import state when OBD_CONNECT_AT is set
+	 */
+	LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
+		(imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
+		!(imp->imp_connect_data.ocd_connect_flags &
+		OBD_CONNECT_AT));
 
 	if (request->rq_resend)
 		lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
@@ -628,7 +632,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 	ptlrpc_request_addref(request);
 	if (obd && obd->obd_svc_stats)
 		lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
-			atomic_read(&request->rq_import->imp_inflight));
+			atomic_read(&imp->imp_inflight));
 
 	OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
 
@@ -640,7 +644,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
 	request->rq_deadline = request->rq_sent + request->rq_timeout +
 		ptlrpc_at_get_net_latency(request);
 
-	ptlrpc_pinger_sending_on_import(request->rq_import);
+	ptlrpc_pinger_sending_on_import(imp);
 
 	DEBUG_REQ(D_INFO, request, "send flg=%x",
 		  lustre_msg_get_flags(request->rq_reqmsg));
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
index b514f18fae50..871768511e8c 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pack_generic.c
@@ -50,36 +50,34 @@
 
 #include "ptlrpc_internal.h"
 
-static inline int lustre_msg_hdr_size_v2(int count)
+static inline u32 lustre_msg_hdr_size_v2(u32 count)
 {
 	return cfs_size_round(offsetof(struct lustre_msg_v2,
 				       lm_buflens[count]));
 }
 
-int lustre_msg_hdr_size(__u32 magic, int count)
+u32 lustre_msg_hdr_size(__u32 magic, u32 count)
 {
 	switch (magic) {
 	case LUSTRE_MSG_MAGIC_V2:
 		return lustre_msg_hdr_size_v2(count);
 	default:
 		LASSERTF(0, "incorrect message magic: %08x\n", magic);
-		return -EINVAL;
+		return 0;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_hdr_size);
 
 void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
-			    int index)
+			    u32 index)
 {
 	if (inout)
 		lustre_set_req_swabbed(req, index);
 	else
 		lustre_set_rep_swabbed(req, index);
 }
-EXPORT_SYMBOL(ptlrpc_buf_set_swabbed);
 
 int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
-			 int index)
+			 u32 index)
 {
 	if (inout)
 		return (ptlrpc_req_need_swab(req) &&
@@ -88,12 +86,11 @@ int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
 		return (ptlrpc_rep_need_swab(req) &&
 			!lustre_rep_swabbed(req, index));
 }
-EXPORT_SYMBOL(ptlrpc_buf_need_swab);
 
 /* early reply size */
-int lustre_msg_early_size(void)
+u32 lustre_msg_early_size(void)
 {
-	static int size;
+	static u32 size;
 
 	if (!size) {
 		/* Always reply old ptlrpc_body_v2 to keep interoperability
@@ -111,9 +108,9 @@ int lustre_msg_early_size(void)
 }
 EXPORT_SYMBOL(lustre_msg_early_size);
 
-int lustre_msg_size_v2(int count, __u32 *lengths)
+u32 lustre_msg_size_v2(int count, __u32 *lengths)
 {
-	int size;
+	u32 size;
 	int i;
 
 	size = lustre_msg_hdr_size_v2(count);
@@ -131,7 +128,7 @@ EXPORT_SYMBOL(lustre_msg_size_v2);
  *       target then the first buffer will be stripped because the ptlrpc
  *       data is part of the lustre_msg_v1 header. b=14043
  */
-int lustre_msg_size(__u32 magic, int count, __u32 *lens)
+u32 lustre_msg_size(__u32 magic, int count, __u32 *lens)
 {
 	__u32 size[] = { sizeof(struct ptlrpc_body) };
 
@@ -148,15 +145,14 @@ int lustre_msg_size(__u32 magic, int count, __u32 *lens)
 		return lustre_msg_size_v2(count, lens);
 	default:
 		LASSERTF(0, "incorrect message magic: %08x\n", magic);
-		return -EINVAL;
+		return 0;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_size);
 
 /* This is used to determine the size of a buffer that was already packed
  * and will correctly handle the different message formats.
  */
-int lustre_packed_msg_size(struct lustre_msg *msg)
+u32 lustre_packed_msg_size(struct lustre_msg *msg)
 {
 	switch (msg->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2:
@@ -166,7 +162,6 @@ int lustre_packed_msg_size(struct lustre_msg *msg)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(lustre_packed_msg_size);
 
 void lustre_init_msg_v2(struct lustre_msg_v2 *msg, int count, __u32 *lens,
 			char **bufs)
@@ -227,7 +222,6 @@ int lustre_pack_request(struct ptlrpc_request *req, __u32 magic, int count,
 	/* only use new format, we don't need to be compatible with 1.4 */
 	return lustre_pack_request_v2(req, count, lens, bufs);
 }
-EXPORT_SYMBOL(lustre_pack_request);
 
 #if RS_DEBUG
 LIST_HEAD(ptlrpc_rs_debug_lru);
@@ -369,7 +363,6 @@ int lustre_pack_reply_flags(struct ptlrpc_request *req, int count, __u32 *lens,
 		       lustre_msg_size(req->rq_reqmsg->lm_magic, count, lens));
 	return rc;
 }
-EXPORT_SYMBOL(lustre_pack_reply_flags);
 
 int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
 		      char **bufs)
@@ -378,11 +371,9 @@ int lustre_pack_reply(struct ptlrpc_request *req, int count, __u32 *lens,
 }
 EXPORT_SYMBOL(lustre_pack_reply);
 
-void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
+void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, u32 n, u32 min_size)
 {
-	int i, offset, buflen, bufcount;
-
-	LASSERT(n >= 0);
+	u32 i, offset, buflen, bufcount;
 
 	bufcount = m->lm_bufcount;
 	if (unlikely(n >= bufcount)) {
@@ -406,7 +397,7 @@ void *lustre_msg_buf_v2(struct lustre_msg_v2 *m, int n, int min_size)
 	return (char *)m + offset;
 }
 
-void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
+void *lustre_msg_buf(struct lustre_msg *m, u32 n, u32 min_size)
 {
 	switch (m->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2:
@@ -419,7 +410,7 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size)
 }
 EXPORT_SYMBOL(lustre_msg_buf);
 
-static int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, int segment,
+static int lustre_shrink_msg_v2(struct lustre_msg_v2 *msg, u32 segment,
 				unsigned int newlen, int move_data)
 {
 	char *tail = NULL, *newpos;
@@ -493,7 +484,6 @@ void lustre_free_reply_state(struct ptlrpc_reply_state *rs)
 
 	sptlrpc_svc_free_rs(rs);
 }
-EXPORT_SYMBOL(lustre_free_reply_state);
 
 static int lustre_unpack_msg_v2(struct lustre_msg_v2 *m, int len)
 {
@@ -581,7 +571,6 @@ int ptlrpc_unpack_req_msg(struct ptlrpc_request *req, int len)
 	}
 	return rc;
 }
-EXPORT_SYMBOL(ptlrpc_unpack_req_msg);
 
 int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
 {
@@ -594,7 +583,6 @@ int ptlrpc_unpack_rep_msg(struct ptlrpc_request *req, int len)
 	}
 	return rc;
 }
-EXPORT_SYMBOL(ptlrpc_unpack_rep_msg);
 
 static inline int lustre_unpack_ptlrpc_body_v2(struct ptlrpc_request *req,
 					       const int inout, int offset)
@@ -647,7 +635,7 @@ int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset)
 	}
 }
 
-static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
+static inline u32 lustre_msg_buflen_v2(struct lustre_msg_v2 *m, u32 n)
 {
 	if (n >= m->lm_bufcount)
 		return 0;
@@ -662,14 +650,14 @@ static inline int lustre_msg_buflen_v2(struct lustre_msg_v2 *m, int n)
  *
  * returns zero for non-existent message indices
  */
-int lustre_msg_buflen(struct lustre_msg *m, int n)
+u32 lustre_msg_buflen(struct lustre_msg *m, u32 n)
 {
 	switch (m->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2:
 		return lustre_msg_buflen_v2(m, n);
 	default:
 		CERROR("incorrect message magic: %08x\n", m->lm_magic);
-		return -EINVAL;
+		return 0;
 	}
 }
 EXPORT_SYMBOL(lustre_msg_buflen);
@@ -677,23 +665,22 @@ EXPORT_SYMBOL(lustre_msg_buflen);
 /* NB return the bufcount for lustre_msg_v2 format, so if message is packed
  * in V1 format, the result is one bigger. (add struct ptlrpc_body).
  */
-int lustre_msg_bufcount(struct lustre_msg *m)
+u32 lustre_msg_bufcount(struct lustre_msg *m)
 {
 	switch (m->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2:
 		return m->lm_bufcount;
 	default:
 		CERROR("incorrect message magic: %08x\n", m->lm_magic);
-		return -EINVAL;
+		return 0;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_bufcount);
 
-char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
+char *lustre_msg_string(struct lustre_msg *m, u32 index, u32 max_len)
 {
 	/* max_len == 0 means the string should fill the buffer */
 	char *str;
-	int slen, blen;
+	u32 slen, blen;
 
 	switch (m->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2:
@@ -731,11 +718,10 @@ char *lustre_msg_string(struct lustre_msg *m, int index, int max_len)
 
 	return str;
 }
-EXPORT_SYMBOL(lustre_msg_string);
 
 /* Wrap up the normal fixed length cases */
-static inline void *__lustre_swab_buf(struct lustre_msg *msg, int index,
-				      int min_size, void *swabber)
+static inline void *__lustre_swab_buf(struct lustre_msg *msg, u32 index,
+				      u32 min_size, void *swabber)
 {
 	void *ptr = NULL;
 
@@ -804,7 +790,7 @@ __u32 lustre_msg_get_flags(struct lustre_msg *msg)
 }
 EXPORT_SYMBOL(lustre_msg_get_flags);
 
-void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_add_flags(struct lustre_msg *msg, u32 flags)
 {
 	switch (msg->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2: {
@@ -820,7 +806,7 @@ void lustre_msg_add_flags(struct lustre_msg *msg, int flags)
 }
 EXPORT_SYMBOL(lustre_msg_add_flags);
 
-void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_set_flags(struct lustre_msg *msg, u32 flags)
 {
 	switch (msg->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2: {
@@ -834,9 +820,8 @@ void lustre_msg_set_flags(struct lustre_msg *msg, int flags)
 		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_flags);
 
-void lustre_msg_clear_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_clear_flags(struct lustre_msg *msg, u32 flags)
 {
 	switch (msg->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2: {
@@ -868,9 +853,8 @@ __u32 lustre_msg_get_op_flags(struct lustre_msg *msg)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_get_op_flags);
 
-void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags)
+void lustre_msg_add_op_flags(struct lustre_msg *msg, u32 flags)
 {
 	switch (msg->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2: {
@@ -903,7 +887,6 @@ struct lustre_handle *lustre_msg_get_handle(struct lustre_msg *msg)
 		return NULL;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_get_handle);
 
 __u32 lustre_msg_get_type(struct lustre_msg *msg)
 {
@@ -924,7 +907,7 @@ __u32 lustre_msg_get_type(struct lustre_msg *msg)
 }
 EXPORT_SYMBOL(lustre_msg_get_type);
 
-void lustre_msg_add_version(struct lustre_msg *msg, int version)
+void lustre_msg_add_version(struct lustre_msg *msg, u32 version)
 {
 	switch (msg->lm_magic) {
 	case LUSTRE_MSG_MAGIC_V2: {
@@ -938,7 +921,6 @@ void lustre_msg_add_version(struct lustre_msg *msg, int version)
 		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
 	}
 }
-EXPORT_SYMBOL(lustre_msg_add_version);
 
 __u32 lustre_msg_get_opc(struct lustre_msg *msg)
 {
@@ -1055,7 +1037,6 @@ __u64 lustre_msg_get_slv(struct lustre_msg *msg)
 		return -EINVAL;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_get_slv);
 
 void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
 {
@@ -1075,7 +1056,6 @@ void lustre_msg_set_slv(struct lustre_msg *msg, __u64 slv)
 		return;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_slv);
 
 __u32 lustre_msg_get_limit(struct lustre_msg *msg)
 {
@@ -1094,7 +1074,6 @@ __u32 lustre_msg_get_limit(struct lustre_msg *msg)
 		return -EINVAL;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_get_limit);
 
 void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
 {
@@ -1114,7 +1093,6 @@ void lustre_msg_set_limit(struct lustre_msg *msg, __u64 limit)
 		return;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_limit);
 
 __u32 lustre_msg_get_conn_cnt(struct lustre_msg *msg)
 {
@@ -1145,7 +1123,6 @@ __u32 lustre_msg_get_magic(struct lustre_msg *msg)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(lustre_msg_get_magic);
 
 __u32 lustre_msg_get_timeout(struct lustre_msg *msg)
 {
@@ -1203,8 +1180,9 @@ __u32 lustre_msg_calc_cksum(struct lustre_msg *msg)
 		unsigned int hsize = 4;
 
 		cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32, (unsigned char *)pb,
-				   lustre_msg_buflen(msg, MSG_PTLRPC_BODY_OFF),
-				   NULL, 0, (unsigned char *)&crc, &hsize);
+				       lustre_msg_buflen(msg,
+							 MSG_PTLRPC_BODY_OFF),
+				       NULL, 0, (unsigned char *)&crc, &hsize);
 		return crc;
 	}
 	default:
@@ -1227,7 +1205,6 @@ void lustre_msg_set_handle(struct lustre_msg *msg, struct lustre_handle *handle)
 		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_handle);
 
 void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
 {
@@ -1243,7 +1220,6 @@ void lustre_msg_set_type(struct lustre_msg *msg, __u32 type)
 		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_type);
 
 void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
 {
@@ -1259,7 +1235,6 @@ void lustre_msg_set_opc(struct lustre_msg *msg, __u32 opc)
 		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_opc);
 
 void lustre_msg_set_versions(struct lustre_msg *msg, __u64 *versions)
 {
@@ -1326,7 +1301,6 @@ void lustre_msg_set_conn_cnt(struct lustre_msg *msg, __u32 conn_cnt)
 		LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
 	}
 }
-EXPORT_SYMBOL(lustre_msg_set_conn_cnt);
 
 void lustre_msg_set_timeout(struct lustre_msg *msg, __u32 timeout)
 {
@@ -1377,7 +1351,7 @@ void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
 		LASSERTF(pb, "invalid msg %p: no ptlrpc body!\n", msg);
 
 		if (jobid)
-			memcpy(pb->pb_jobid, jobid, JOBSTATS_JOBID_SIZE);
+			memcpy(pb->pb_jobid, jobid, LUSTRE_JOBID_SIZE);
 		else if (pb->pb_jobid[0] == '\0')
 			lustre_get_jobid(pb->pb_jobid);
 		return;
@@ -1491,7 +1465,6 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
 	 */
 	CLASSERT(offsetof(typeof(*b), pb_jobid) != 0);
 }
-EXPORT_SYMBOL(lustre_swab_ptlrpc_body);
 
 void lustre_swab_connect(struct obd_connect_data *ocd)
 {
@@ -1591,7 +1564,6 @@ void lustre_swab_obd_statfs(struct obd_statfs *os)
 	CLASSERT(offsetof(typeof(*os), os_spare8) != 0);
 	CLASSERT(offsetof(typeof(*os), os_spare9) != 0);
 }
-EXPORT_SYMBOL(lustre_swab_obd_statfs);
 
 void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
 {
@@ -1599,33 +1571,28 @@ void lustre_swab_obd_ioobj(struct obd_ioobj *ioo)
 	__swab32s(&ioo->ioo_max_brw);
 	__swab32s(&ioo->ioo_bufcnt);
 }
-EXPORT_SYMBOL(lustre_swab_obd_ioobj);
 
 void lustre_swab_niobuf_remote(struct niobuf_remote *nbr)
 {
-	__swab64s(&nbr->offset);
-	__swab32s(&nbr->len);
-	__swab32s(&nbr->flags);
+	__swab64s(&nbr->rnb_offset);
+	__swab32s(&nbr->rnb_len);
+	__swab32s(&nbr->rnb_flags);
 }
-EXPORT_SYMBOL(lustre_swab_niobuf_remote);
 
 void lustre_swab_ost_body(struct ost_body *b)
 {
 	lustre_swab_obdo(&b->oa);
 }
-EXPORT_SYMBOL(lustre_swab_ost_body);
 
 void lustre_swab_ost_last_id(u64 *id)
 {
 	__swab64s(id);
 }
-EXPORT_SYMBOL(lustre_swab_ost_last_id);
 
 void lustre_swab_generic_32s(__u32 *val)
 {
 	__swab32s(val);
 }
-EXPORT_SYMBOL(lustre_swab_generic_32s);
 
 void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
 {
@@ -1674,37 +1641,36 @@ EXPORT_SYMBOL(lustre_swab_lquota_lvb);
 
 void lustre_swab_mdt_body(struct mdt_body *b)
 {
-	lustre_swab_lu_fid(&b->fid1);
-	lustre_swab_lu_fid(&b->fid2);
+	lustre_swab_lu_fid(&b->mbo_fid1);
+	lustre_swab_lu_fid(&b->mbo_fid2);
 	/* handle is opaque */
-	__swab64s(&b->valid);
-	__swab64s(&b->size);
-	__swab64s(&b->mtime);
-	__swab64s(&b->atime);
-	__swab64s(&b->ctime);
-	__swab64s(&b->blocks);
-	__swab64s(&b->ioepoch);
-	__swab64s(&b->t_state);
-	__swab32s(&b->fsuid);
-	__swab32s(&b->fsgid);
-	__swab32s(&b->capability);
-	__swab32s(&b->mode);
-	__swab32s(&b->uid);
-	__swab32s(&b->gid);
-	__swab32s(&b->flags);
-	__swab32s(&b->rdev);
-	__swab32s(&b->nlink);
-	CLASSERT(offsetof(typeof(*b), unused2) != 0);
-	__swab32s(&b->suppgid);
-	__swab32s(&b->eadatasize);
-	__swab32s(&b->aclsize);
-	__swab32s(&b->max_mdsize);
-	__swab32s(&b->max_cookiesize);
-	__swab32s(&b->uid_h);
-	__swab32s(&b->gid_h);
-	CLASSERT(offsetof(typeof(*b), padding_5) != 0);
-}
-EXPORT_SYMBOL(lustre_swab_mdt_body);
+	__swab64s(&b->mbo_valid);
+	__swab64s(&b->mbo_size);
+	__swab64s(&b->mbo_mtime);
+	__swab64s(&b->mbo_atime);
+	__swab64s(&b->mbo_ctime);
+	__swab64s(&b->mbo_blocks);
+	__swab64s(&b->mbo_ioepoch);
+	__swab64s(&b->mbo_t_state);
+	__swab32s(&b->mbo_fsuid);
+	__swab32s(&b->mbo_fsgid);
+	__swab32s(&b->mbo_capability);
+	__swab32s(&b->mbo_mode);
+	__swab32s(&b->mbo_uid);
+	__swab32s(&b->mbo_gid);
+	__swab32s(&b->mbo_flags);
+	__swab32s(&b->mbo_rdev);
+	__swab32s(&b->mbo_nlink);
+	CLASSERT(offsetof(typeof(*b), mbo_unused2) != 0);
+	__swab32s(&b->mbo_suppgid);
+	__swab32s(&b->mbo_eadatasize);
+	__swab32s(&b->mbo_aclsize);
+	__swab32s(&b->mbo_max_mdsize);
+	__swab32s(&b->mbo_max_cookiesize);
+	__swab32s(&b->mbo_uid_h);
+	__swab32s(&b->mbo_gid_h);
+	CLASSERT(offsetof(typeof(*b), mbo_padding_5) != 0);
+}
 
 void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
 {
@@ -1713,7 +1679,6 @@ void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b)
 	 __swab32s(&b->flags);
 	 CLASSERT(offsetof(typeof(*b), padding) != 0);
 }
-EXPORT_SYMBOL(lustre_swab_mdt_ioepoch);
 
 void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
 {
@@ -1729,11 +1694,10 @@ void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
 	for (i = 0; i < MTI_NIDS_MAX; i++)
 		__swab64s(&mti->mti_nids[i]);
 }
-EXPORT_SYMBOL(lustre_swab_mgs_target_info);
 
 void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
 {
-	int i;
+	__u8 i;
 
 	__swab64s(&entry->mne_version);
 	__swab32s(&entry->mne_instance);
@@ -1760,14 +1724,12 @@ void lustre_swab_mgs_config_body(struct mgs_config_body *body)
 	__swab32s(&body->mcb_units);
 	__swab16s(&body->mcb_type);
 }
-EXPORT_SYMBOL(lustre_swab_mgs_config_body);
 
 void lustre_swab_mgs_config_res(struct mgs_config_res *body)
 {
 	__swab64s(&body->mcr_offset);
 	__swab64s(&body->mcr_size);
 }
-EXPORT_SYMBOL(lustre_swab_mgs_config_res);
 
 static void lustre_swab_obd_dqinfo(struct obd_dqinfo *i)
 {
@@ -1800,7 +1762,6 @@ void lustre_swab_obd_quotactl(struct obd_quotactl *q)
 	lustre_swab_obd_dqinfo(&q->qc_dqinfo);
 	lustre_swab_obd_dqblk(&q->qc_dqblk);
 }
-EXPORT_SYMBOL(lustre_swab_obd_quotactl);
 
 void lustre_swab_fid2path(struct getinfo_fid2path *gf)
 {
@@ -1822,7 +1783,7 @@ static void lustre_swab_fiemap_extent(struct ll_fiemap_extent *fm_extent)
 
 void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
 {
-	int i;
+	__u32 i;
 
 	__swab64s(&fiemap->fm_start);
 	__swab64s(&fiemap->fm_length);
@@ -1834,7 +1795,6 @@ void lustre_swab_fiemap(struct ll_user_fiemap *fiemap)
 	for (i = 0; i < fiemap->fm_mapped_extents; i++)
 		lustre_swab_fiemap_extent(&fiemap->fm_extents[i]);
 }
-EXPORT_SYMBOL(lustre_swab_fiemap);
 
 void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
 {
@@ -1863,7 +1823,6 @@ void lustre_swab_mdt_rec_reint (struct mdt_rec_reint *rr)
 
 	CLASSERT(offsetof(typeof(*rr), rr_padding_4) != 0);
 };
-EXPORT_SYMBOL(lustre_swab_mdt_rec_reint);
 
 void lustre_swab_lov_desc(struct lov_desc *ld)
 {
@@ -1878,18 +1837,42 @@ void lustre_swab_lov_desc(struct lov_desc *ld)
 }
 EXPORT_SYMBOL(lustre_swab_lov_desc);
 
-static void print_lum(struct lov_user_md *lum)
+/* This structure is always in little-endian */
+static void lustre_swab_lmv_mds_md_v1(struct lmv_mds_md_v1 *lmm1)
+{
+	int i;
+
+	__swab32s(&lmm1->lmv_magic);
+	__swab32s(&lmm1->lmv_stripe_count);
+	__swab32s(&lmm1->lmv_master_mdt_index);
+	__swab32s(&lmm1->lmv_hash_type);
+	__swab32s(&lmm1->lmv_layout_version);
+	for (i = 0; i < lmm1->lmv_stripe_count; i++)
+		lustre_swab_lu_fid(&lmm1->lmv_stripe_fids[i]);
+}
+
+void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm)
+{
+	switch (lmm->lmv_magic) {
+	case LMV_MAGIC_V1:
+		lustre_swab_lmv_mds_md_v1(&lmm->lmv_md_v1);
+		break;
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL(lustre_swab_lmv_mds_md);
+
+void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
 {
-	CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
-	CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
-	CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
-	CDEBUG(D_OTHER, "\tlmm_object_id: %llu\n", lmm_oi_id(&lum->lmm_oi));
-	CDEBUG(D_OTHER, "\tlmm_object_gr: %llu\n", lmm_oi_seq(&lum->lmm_oi));
-	CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
-	CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
-	CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
-	       lum->lmm_stripe_offset);
+	__swab32s(&lum->lum_magic);
+	__swab32s(&lum->lum_stripe_count);
+	__swab32s(&lum->lum_stripe_offset);
+	__swab32s(&lum->lum_hash_type);
+	__swab32s(&lum->lum_type);
+	CLASSERT(offsetof(typeof(*lum), lum_padding1));
 }
+EXPORT_SYMBOL(lustre_swab_lmv_user_md);
 
 static void lustre_swab_lmm_oi(struct ost_id *oi)
 {
@@ -1905,7 +1888,6 @@ static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
 	__swab32s(&lum->lmm_stripe_size);
 	__swab16s(&lum->lmm_stripe_count);
 	__swab16s(&lum->lmm_stripe_offset);
-	print_lum(lum);
 }
 
 void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum)
@@ -1941,9 +1923,9 @@ void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
 	int i;
 
 	for (i = 0; i < stripe_count; i++) {
-		lustre_swab_ost_id(&(lod[i].l_ost_oi));
-		__swab32s(&(lod[i].l_ost_gen));
-		__swab32s(&(lod[i].l_ost_idx));
+		lustre_swab_ost_id(&lod[i].l_ost_oi);
+		__swab32s(&lod[i].l_ost_gen);
+		__swab32s(&lod[i].l_ost_idx);
 	}
 }
 EXPORT_SYMBOL(lustre_swab_lov_user_md_objects);
@@ -1973,7 +1955,6 @@ void lustre_swab_ldlm_intent(struct ldlm_intent *i)
 {
 	__swab64s(&i->opc);
 }
-EXPORT_SYMBOL(lustre_swab_ldlm_intent);
 
 static void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r)
 {
@@ -1997,7 +1978,6 @@ void lustre_swab_ldlm_request(struct ldlm_request *rq)
 	__swab32s(&rq->lock_count);
 	/* lock_handle[] opaque */
 }
-EXPORT_SYMBOL(lustre_swab_ldlm_request);
 
 void lustre_swab_ldlm_reply(struct ldlm_reply *r)
 {
@@ -2008,7 +1988,6 @@ void lustre_swab_ldlm_reply(struct ldlm_reply *r)
 	__swab64s(&r->lock_policy_res1);
 	__swab64s(&r->lock_policy_res2);
 }
-EXPORT_SYMBOL(lustre_swab_ldlm_reply);
 
 /* Dump functions */
 void dump_ioo(struct obd_ioobj *ioo)
@@ -2018,14 +1997,12 @@ void dump_ioo(struct obd_ioobj *ioo)
 	       POSTID(&ioo->ioo_oid), ioo->ioo_max_brw,
 	       ioo->ioo_bufcnt);
 }
-EXPORT_SYMBOL(dump_ioo);
 
 void dump_rniobuf(struct niobuf_remote *nb)
 {
 	CDEBUG(D_RPCTRACE, "niobuf_remote: offset=%llu, len=%d, flags=%x\n",
-	       nb->offset, nb->len, nb->flags);
+	       nb->rnb_offset, nb->rnb_len, nb->rnb_flags);
 }
-EXPORT_SYMBOL(dump_rniobuf);
 
 static void dump_obdo(struct obdo *oa)
 {
@@ -2093,13 +2070,11 @@ void dump_ost_body(struct ost_body *ob)
 {
 	dump_obdo(&ob->oa);
 }
-EXPORT_SYMBOL(dump_ost_body);
 
 void dump_rcs(__u32 *rc)
 {
 	CDEBUG(D_RPCTRACE, "rmf_rcs: %d\n", *rc);
 }
-EXPORT_SYMBOL(dump_rcs);
 
 static inline int req_ptlrpc_body_swabbed(struct ptlrpc_request *req)
 {
@@ -2184,14 +2159,12 @@ void lustre_swab_lustre_capa(struct lustre_capa *c)
 	__swab32s(&c->lc_timeout);
 	__swab32s(&c->lc_expiry);
 }
-EXPORT_SYMBOL(lustre_swab_lustre_capa);
 
 void lustre_swab_hsm_user_state(struct hsm_user_state *state)
 {
 	__swab32s(&state->hus_states);
 	__swab32s(&state->hus_archive_id);
 }
-EXPORT_SYMBOL(lustre_swab_hsm_user_state);
 
 void lustre_swab_hsm_state_set(struct hsm_state_set *hss)
 {
@@ -2214,14 +2187,12 @@ void lustre_swab_hsm_current_action(struct hsm_current_action *action)
 	__swab32s(&action->hca_action);
 	lustre_swab_hsm_extent(&action->hca_location);
 }
-EXPORT_SYMBOL(lustre_swab_hsm_current_action);
 
 void lustre_swab_hsm_user_item(struct hsm_user_item *hui)
 {
 	lustre_swab_lu_fid(&hui->hui_fid);
 	lustre_swab_hsm_extent(&hui->hui_extent);
 }
-EXPORT_SYMBOL(lustre_swab_hsm_user_item);
 
 void lustre_swab_layout_intent(struct layout_intent *li)
 {
@@ -2230,7 +2201,6 @@ void lustre_swab_layout_intent(struct layout_intent *li)
 	__swab64s(&li->li_start);
 	__swab64s(&li->li_end);
 }
-EXPORT_SYMBOL(lustre_swab_layout_intent);
 
 void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
 {
@@ -2241,7 +2211,6 @@ void lustre_swab_hsm_progress_kernel(struct hsm_progress_kernel *hpk)
 	__swab16s(&hpk->hpk_flags);
 	__swab16s(&hpk->hpk_errval);
 }
-EXPORT_SYMBOL(lustre_swab_hsm_progress_kernel);
 
 void lustre_swab_hsm_request(struct hsm_request *hr)
 {
@@ -2251,7 +2220,6 @@ void lustre_swab_hsm_request(struct hsm_request *hr)
 	__swab32s(&hr->hr_itemcount);
 	__swab32s(&hr->hr_data_len);
 }
-EXPORT_SYMBOL(lustre_swab_hsm_request);
 
 void lustre_swab_swap_layouts(struct mdc_swap_layouts *msl)
 {
@@ -2264,4 +2232,3 @@ void lustre_swab_close_data(struct close_data *cd)
 	lustre_swab_lu_fid(&cd->cd_fid);
 	__swab64s(&cd->cd_data_version);
 }
-EXPORT_SYMBOL(lustre_swab_close_data);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pers.c b/drivers/staging/lustre/lustre/ptlrpc/pers.c
index 6c820e944171..5b9fb11c0b6b 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pers.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pers.c
@@ -64,9 +64,9 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
 {
 	lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
 
-	kiov->kiov_page = page;
-	kiov->kiov_offset = pageoffset;
-	kiov->kiov_len = len;
+	kiov->bv_page = page;
+	kiov->bv_offset = pageoffset;
+	kiov->bv_len = len;
 
 	desc->bd_iov_count++;
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/pinger.c b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
index c0529d808d81..5504fc2363ac 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/pinger.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/pinger.c
@@ -340,7 +340,6 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
 	ptlrpc_update_next_ping(imp, 0);
 }
-EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import);
 
 void ptlrpc_pinger_commit_expected(struct obd_import *imp)
 {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
index a9831fab80f3..f14d193287da 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpc_internal.h
@@ -53,6 +53,8 @@ int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait);
 int ptlrpcd_start(struct ptlrpcd_ctl *pc);
 
 /* client.c */
+void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
+			       unsigned int service_time);
 struct ptlrpc_bulk_desc *ptlrpc_new_bulk(unsigned npages, unsigned max_brw,
 					 unsigned type, unsigned portal);
 int ptlrpc_request_cache_init(void);
@@ -60,6 +62,11 @@ void ptlrpc_request_cache_fini(void);
 struct ptlrpc_request *ptlrpc_request_cache_alloc(gfp_t flags);
 void ptlrpc_request_cache_free(struct ptlrpc_request *req);
 void ptlrpc_init_xid(void);
+void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
+			    struct ptlrpc_request *req);
+int ptlrpc_expired_set(void *data);
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
+void ptlrpc_resend_req(struct ptlrpc_request *request);
 
 /* events.c */
 int ptlrpc_init_portals(void);
@@ -268,7 +275,7 @@ void sptlrpc_conf_fini(void);
 int  sptlrpc_init(void);
 void sptlrpc_fini(void);
 
-static inline int ll_rpc_recoverable_error(int rc)
+static inline bool ptlrpc_recoverable_error(int rc)
 {
 	return (rc == -ENOTCONN || rc == -ENODEV);
 }
diff --git a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
index 0a374b6c2f71..1f55d642aa75 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/ptlrpcd.c
@@ -412,7 +412,7 @@ static int ptlrpcd(void *arg)
 	 * an argument, describing its "scope".
 	 */
 	rc = lu_context_init(&env.le_ctx,
-			     LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+			     LCT_CL_THREAD | LCT_REMEMBER | LCT_NOREF);
 	if (rc == 0) {
 		rc = lu_context_init(env.le_ses,
 				     LCT_SESSION | LCT_REMEMBER | LCT_NOREF);
@@ -567,7 +567,7 @@ int ptlrpcd_start(struct ptlrpcd_ctl *pc)
 	 * ptlrpcd thread (or a thread-set) has to be given an argument,
 	 * describing its "scope".
 	 */
-	rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER);
+	rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD | LCT_REMEMBER);
 	if (rc != 0)
 		goto out;
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/recover.c b/drivers/staging/lustre/lustre/ptlrpc/recover.c
index 718b3a8d61c6..405faf0dc9fc 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/recover.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/recover.c
@@ -201,7 +201,6 @@ int ptlrpc_resend(struct obd_import *imp)
 
 	return 0;
 }
-EXPORT_SYMBOL(ptlrpc_resend);
 
 /**
  * Go through all requests in delayed list and wake their threads
@@ -221,7 +220,6 @@ void ptlrpc_wake_delayed(struct obd_import *imp)
 	}
 	spin_unlock(&imp->imp_lock);
 }
-EXPORT_SYMBOL(ptlrpc_wake_delayed);
 
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
 {
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index dbd819fa6b75..5d3995d5c69a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -311,6 +311,19 @@ static int import_sec_check_expire(struct obd_import *imp)
 	return sptlrpc_import_sec_adapt(imp, NULL, NULL);
 }
 
+/**
+ * Get and validate the client side ptlrpc security facilities from
+ * \a imp. There is a race condition on client reconnect when the import is
+ * being destroyed while there are outstanding client bound requests. In
+ * this case do not output any error messages if import secuity is not
+ * found.
+ *
+ * \param[in] imp obd import associated with client
+ * \param[out] sec client side ptlrpc security
+ *
+ * \retval 0 if security retrieved successfully
+ * \retval -ve errno if there was a problem
+ */
 static int import_sec_validate_get(struct obd_import *imp,
 				   struct ptlrpc_sec **sec)
 {
@@ -323,9 +336,11 @@ static int import_sec_validate_get(struct obd_import *imp,
 	}
 
 	*sec = sptlrpc_import_sec_ref(imp);
+	/* Only output an error when the import is still active */
 	if (!*sec) {
-		CERROR("import %p (%s) with no sec\n",
-		       imp, ptlrpc_import_state_name(imp->imp_state));
+		if (list_empty(&imp->imp_zombie_chain))
+			CERROR("import %p (%s) with no sec\n",
+			       imp, ptlrpc_import_state_name(imp->imp_state));
 		return -EACCES;
 	}
 
@@ -499,7 +514,7 @@ static int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req)
 		       newctx, newctx->cc_flags);
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ);
+		schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC));
 	} else {
 		/*
 		 * it's possible newctx == oldctx if we're switching
@@ -718,8 +733,9 @@ again:
 	req->rq_restart = 0;
 	spin_unlock(&req->rq_lock);
 
-	lwi = LWI_TIMEOUT_INTR(timeout * HZ, ctx_refresh_timeout,
-			       ctx_refresh_interrupt, req);
+	lwi = LWI_TIMEOUT_INTR(msecs_to_jiffies(timeout * MSEC_PER_SEC),
+			       ctx_refresh_timeout, ctx_refresh_interrupt,
+			       req);
 	rc = l_wait_event(req->rq_reply_waitq, ctx_check_refresh(ctx), &lwi);
 
 	/*
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
index 5f4d79718589..b2cc5ea6cb93 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
@@ -139,7 +139,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
 		   "cache missing:	   %lu\n"
 		   "low free mark:	   %lu\n"
 		   "max waitqueue depth:     %u\n"
-		   "max wait time:	   %ld/%u\n",
+		   "max wait time:	   %ld/%lu\n",
 		   totalram_pages,
 		   PAGES_PER_POOL,
 		   page_pools.epp_max_pages,
@@ -158,7 +158,7 @@ int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v)
 		   page_pools.epp_st_lowfree,
 		   page_pools.epp_st_max_wqlen,
 		   page_pools.epp_st_max_wait,
-		   HZ);
+		   msecs_to_jiffies(MSEC_PER_SEC));
 
 	spin_unlock(&page_pools.epp_lock);
 
@@ -326,12 +326,12 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
 	LASSERT(page_pools.epp_pools[p_idx]);
 
 	for (i = 0; i < desc->bd_iov_count; i++) {
-		LASSERT(desc->bd_enc_iov[i].kiov_page);
+		LASSERT(desc->bd_enc_iov[i].bv_page);
 		LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]);
 		LASSERT(!page_pools.epp_pools[p_idx][g_idx]);
 
 		page_pools.epp_pools[p_idx][g_idx] =
-					desc->bd_enc_iov[i].kiov_page;
+					desc->bd_enc_iov[i].bv_page;
 
 		if (++g_idx == PAGES_PER_POOL) {
 			p_idx++;
@@ -348,7 +348,6 @@ void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc)
 	kfree(desc->bd_enc_iov);
 	desc->bd_enc_iov = NULL;
 }
-EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages);
 
 static inline void enc_pools_alloc(void)
 {
@@ -432,12 +431,13 @@ void sptlrpc_enc_pool_fini(void)
 
 	if (page_pools.epp_st_access > 0) {
 		CDEBUG(D_SEC,
-		       "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%d\n",
+		       "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait %ld/%ld\n",
 		       page_pools.epp_st_max_pages, page_pools.epp_st_grows,
 		       page_pools.epp_st_grow_fails,
 		       page_pools.epp_st_shrinks, page_pools.epp_st_access,
 		       page_pools.epp_st_missings, page_pools.epp_st_max_wqlen,
-		       page_pools.epp_st_max_wait, HZ);
+		       page_pools.epp_st_max_wait,
+		       msecs_to_jiffies(MSEC_PER_SEC));
 	}
 }
 
@@ -456,13 +456,11 @@ const char *sptlrpc_get_hash_name(__u8 hash_alg)
 {
 	return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]);
 }
-EXPORT_SYMBOL(sptlrpc_get_hash_name);
 
 __u8 sptlrpc_get_hash_alg(const char *algname)
 {
 	return cfs_crypto_hash_alg(algname);
 }
-EXPORT_SYMBOL(sptlrpc_get_hash_alg);
 
 int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed)
 {
@@ -522,9 +520,10 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 	hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]);
 
 	for (i = 0; i < desc->bd_iov_count; i++) {
-		cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page,
-				  desc->bd_iov[i].kiov_offset & ~PAGE_MASK,
-				  desc->bd_iov[i].kiov_len);
+		cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].bv_page,
+					    desc->bd_iov[i].bv_offset &
+					    ~PAGE_MASK,
+					    desc->bd_iov[i].bv_len);
 	}
 
 	if (hashsize > buflen) {
@@ -542,4 +541,3 @@ int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg,
 
 	return err;
 }
-EXPORT_SYMBOL(sptlrpc_get_bulk_checksum);
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
index c14035479c5f..2181a85efd49 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_config.c
@@ -58,7 +58,6 @@ enum lustre_sec_part sptlrpc_target_sec_part(struct obd_device *obd)
 	CERROR("unknown target %p(%s)\n", obd, type);
 	return LUSTRE_SP_ANY;
 }
-EXPORT_SYMBOL(sptlrpc_target_sec_part);
 
 /****************************************
  * user supplied flavor string parsing  *
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
index 9b9801ece582..8ffd000eafac 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_gc.c
@@ -71,7 +71,6 @@ void sptlrpc_gc_add_sec(struct ptlrpc_sec *sec)
 
 	CDEBUG(D_SEC, "added sec %p(%s)\n", sec, sec->ps_policy->sp_name);
 }
-EXPORT_SYMBOL(sptlrpc_gc_add_sec);
 
 void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
 {
@@ -95,7 +94,6 @@ void sptlrpc_gc_del_sec(struct ptlrpc_sec *sec)
 
 	CDEBUG(D_SEC, "del sec %p(%s)\n", sec, sec->ps_policy->sp_name);
 }
-EXPORT_SYMBOL(sptlrpc_gc_del_sec);
 
 static void sec_process_ctx_list(void)
 {
@@ -182,7 +180,8 @@ again:
 		/* check ctx list again before sleep */
 		sec_process_ctx_list();
 
-		lwi = LWI_TIMEOUT(SEC_GC_INTERVAL * HZ, NULL, NULL);
+		lwi = LWI_TIMEOUT(msecs_to_jiffies(SEC_GC_INTERVAL * MSEC_PER_SEC),
+				  NULL, NULL);
 		l_wait_event(thread->t_ctl_waitq,
 			     thread_is_stopping(thread) ||
 			     thread_is_signal(thread),
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
index 5c4590b0c521..cd305bcb334a 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c
@@ -154,13 +154,13 @@ static void corrupt_bulk_data(struct ptlrpc_bulk_desc *desc)
 	unsigned int off, i;
 
 	for (i = 0; i < desc->bd_iov_count; i++) {
-		if (desc->bd_iov[i].kiov_len == 0)
+		if (desc->bd_iov[i].bv_len == 0)
 			continue;
 
-		ptr = kmap(desc->bd_iov[i].kiov_page);
-		off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK;
+		ptr = kmap(desc->bd_iov[i].bv_page);
+		off = desc->bd_iov[i].bv_offset & ~PAGE_MASK;
 		ptr[off] ^= 0x1;
-		kunmap(desc->bd_iov[i].kiov_page);
+		kunmap(desc->bd_iov[i].bv_page);
 		return;
 	}
 }
@@ -249,9 +249,12 @@ int plain_ctx_verify(struct ptlrpc_cli_ctx *ctx, struct ptlrpc_request *req)
 		unsigned int hsize = 4;
 
 		cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
-				lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
-				lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
-				NULL, 0, (unsigned char *)&cksum, &hsize);
+				       lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF,
+						      0),
+				       lustre_msg_buflen(msg,
+							 PLAIN_PACK_MSG_OFF),
+				       NULL, 0, (unsigned char *)&cksum,
+				       &hsize);
 		if (cksum != msg->lm_cksum) {
 			CDEBUG(D_SEC,
 			       "early reply checksum mismatch: %08x != %08x\n",
@@ -349,11 +352,11 @@ int plain_cli_unwrap_bulk(struct ptlrpc_cli_ctx *ctx,
 
 	/* fix the actual data size */
 	for (i = 0, nob = 0; i < desc->bd_iov_count; i++) {
-		if (desc->bd_iov[i].kiov_len + nob > desc->bd_nob_transferred) {
-			desc->bd_iov[i].kiov_len =
+		if (desc->bd_iov[i].bv_len + nob > desc->bd_nob_transferred) {
+			desc->bd_iov[i].bv_len =
 				desc->bd_nob_transferred - nob;
 		}
-		nob += desc->bd_iov[i].kiov_len;
+		nob += desc->bd_iov[i].bv_len;
 	}
 
 	rc = plain_verify_bulk_csum(desc, req->rq_flvr.u_bulk.hash.hash_alg,
@@ -869,9 +872,12 @@ int plain_authorize(struct ptlrpc_request *req)
 		unsigned int hsize = 4;
 
 		cfs_crypto_hash_digest(CFS_HASH_ALG_CRC32,
-			lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF, 0),
-			lustre_msg_buflen(msg, PLAIN_PACK_MSG_OFF),
-			NULL, 0, (unsigned char *)&msg->lm_cksum, &hsize);
+				       lustre_msg_buf(msg, PLAIN_PACK_MSG_OFF,
+						      0),
+				       lustre_msg_buflen(msg,
+							 PLAIN_PACK_MSG_OFF),
+				       NULL, 0, (unsigned char *)&msg->lm_cksum,
+				       &hsize);
 		req->rq_reply_off = 0;
 	}
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c
index 4788c4940c2a..72f39308eebb 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/service.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/service.c
@@ -1005,6 +1005,10 @@ ptlrpc_at_remove_timed(struct ptlrpc_request *req)
 	array->paa_count--;
 }
 
+/*
+ * Attempt to extend the request deadline by sending an early reply to the
+ * client.
+ */
 static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
 {
 	struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
@@ -1039,24 +1043,26 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
 		return -ENOSYS;
 	}
 
-	/* Fake our processing time into the future to ask the clients
-	 * for some extra amount of time
+	/*
+	 * We want to extend the request deadline by at_extra seconds,
+	 * so we set our service estimate to reflect how much time has
+	 * passed since this request arrived plus an additional
+	 * at_extra seconds. The client will calculate the new deadline
+	 * based on this service estimate (plus some additional time to
+	 * account for network latency). See ptlrpc_at_recv_early_reply
 	 */
 	at_measured(&svcpt->scp_at_estimate, at_extra +
 		    ktime_get_real_seconds() - req->rq_arrival_time.tv_sec);
+	newdl = req->rq_arrival_time.tv_sec + at_get(&svcpt->scp_at_estimate);
 
 	/* Check to see if we've actually increased the deadline -
 	 * we may be past adaptive_max
 	 */
-	if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
-	    at_get(&svcpt->scp_at_estimate)) {
+	if (req->rq_deadline >= newdl) {
 		DEBUG_REQ(D_WARNING, req, "Couldn't add any time (%ld/%lld), not sending early reply\n",
-			  olddl, req->rq_arrival_time.tv_sec +
-			  at_get(&svcpt->scp_at_estimate) -
-			  ktime_get_real_seconds());
+			  olddl, newdl - ktime_get_real_seconds());
 		return -ETIMEDOUT;
 	}
-	newdl = ktime_get_real_seconds() + at_get(&svcpt->scp_at_estimate);
 
 	reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
 	if (!reqcopy)
@@ -1982,11 +1988,12 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
 	cond_resched();
 
 	l_wait_event_exclusive_head(svcpt->scp_waitq,
-				ptlrpc_thread_stopping(thread) ||
-				ptlrpc_server_request_incoming(svcpt) ||
-				ptlrpc_server_request_pending(svcpt, false) ||
-				ptlrpc_rqbd_pending(svcpt) ||
-				ptlrpc_at_check(svcpt), &lwi);
+				    ptlrpc_thread_stopping(thread) ||
+				    ptlrpc_server_request_incoming(svcpt) ||
+				    ptlrpc_server_request_pending(svcpt,
+								  false) ||
+				    ptlrpc_rqbd_pending(svcpt) ||
+				    ptlrpc_at_check(svcpt), &lwi);
 
 	if (ptlrpc_thread_stopping(thread))
 		return -EINTR;
@@ -2049,7 +2056,7 @@ static int ptlrpc_main(void *arg)
 	}
 
 	rc = lu_context_init(&env->le_ctx,
-			     svc->srv_ctx_tags|LCT_REMEMBER|LCT_NOREF);
+			     svc->srv_ctx_tags | LCT_REMEMBER | LCT_NOREF);
 	if (rc)
 		goto out_srv_fini;
 
@@ -2349,7 +2356,7 @@ static void ptlrpc_svcpt_stop_threads(struct ptlrpc_service_part *svcpt)
 
 	while (!list_empty(&zombie)) {
 		thread = list_entry(zombie.next,
-					struct ptlrpc_thread, t_link);
+				    struct ptlrpc_thread, t_link);
 		list_del(&thread->t_link);
 		kfree(thread);
 	}
@@ -2398,7 +2405,6 @@ int ptlrpc_start_threads(struct ptlrpc_service *svc)
 	ptlrpc_stop_all_threads(svc);
 	return rc;
 }
-EXPORT_SYMBOL(ptlrpc_start_threads);
 
 int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
 {
@@ -2539,8 +2545,8 @@ int ptlrpc_hr_init(void)
 		LASSERT(hrp->hrp_nthrs > 0);
 		hrp->hrp_thrs =
 			kzalloc_node(hrp->hrp_nthrs * sizeof(*hrt), GFP_NOFS,
-				cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
-						    i));
+				     cfs_cpt_spread_node(ptlrpc_hr.hr_cpt_table,
+							 i));
 		if (!hrp->hrp_thrs) {
 			rc = -ENOMEM;
 			goto out;
@@ -2593,7 +2599,8 @@ static void ptlrpc_wait_replies(struct ptlrpc_service_part *svcpt)
 						     NULL, NULL);
 
 		rc = l_wait_event(svcpt->scp_waitq,
-		     atomic_read(&svcpt->scp_nreps_difficult) == 0, &lwi);
+				  atomic_read(&svcpt->scp_nreps_difficult) == 0,
+				  &lwi);
 		if (rc == 0)
 			break;
 		CWARN("Unexpectedly long timeout %s %p\n",
@@ -2639,7 +2646,7 @@ ptlrpc_service_unlink_rqbd(struct ptlrpc_service *svc)
 		 * event with its 'unlink' flag set for each posted rqbd
 		 */
 		list_for_each_entry(rqbd, &svcpt->scp_rqbd_posted,
-					rqbd_list) {
+				    rqbd_list) {
 			rc = LNetMDUnlink(rqbd->rqbd_md_h);
 			LASSERT(rc == 0 || rc == -ENOENT);
 		}
diff --git a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
index 6cc2b2edf3fc..e5945e2ccc49 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/wiretest.c
@@ -190,28 +190,30 @@ void lustre_assert_wire_constants(void)
 		 (long long)REINT_SETXATTR);
 	LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
 		 (long long)REINT_RMENTRY);
-	LASSERTF(REINT_MAX == 9, "found %lld\n",
+	LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
+		 (long long)REINT_MIGRATE);
+	LASSERTF(REINT_MAX == 10, "found %lld\n",
 		 (long long)REINT_MAX);
 	LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_IT_EXECD);
+		 (unsigned)DISP_IT_EXECD);
 	LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_LOOKUP_EXECD);
+		 (unsigned)DISP_LOOKUP_EXECD);
 	LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_LOOKUP_NEG);
+		 (unsigned)DISP_LOOKUP_NEG);
 	LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_LOOKUP_POS);
+		 (unsigned)DISP_LOOKUP_POS);
 	LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_OPEN_CREATE);
+		 (unsigned)DISP_OPEN_CREATE);
 	LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_OPEN_OPEN);
+		 (unsigned)DISP_OPEN_OPEN);
 	LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_ENQ_COMPLETE);
+		 (unsigned)DISP_ENQ_COMPLETE);
 	LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_ENQ_OPEN_REF);
+		 (unsigned)DISP_ENQ_OPEN_REF);
 	LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_ENQ_CREATE_REF);
+		 (unsigned)DISP_ENQ_CREATE_REF);
 	LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
-		(unsigned)DISP_OPEN_LOCK);
+		 (unsigned)DISP_OPEN_LOCK);
 	LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
 		 (long long)MDS_STATUS_CONN);
 	LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
@@ -219,55 +221,55 @@ void lustre_assert_wire_constants(void)
 	LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
 		 (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
 	LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)MF_SOM_CHANGE);
+		 (unsigned)MF_SOM_CHANGE);
 	LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)MF_EPOCH_OPEN);
+		 (unsigned)MF_EPOCH_OPEN);
 	LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)MF_EPOCH_CLOSE);
+		 (unsigned)MF_EPOCH_CLOSE);
 	LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
-		(unsigned)MF_MDC_CANCEL_FID1);
+		 (unsigned)MF_MDC_CANCEL_FID1);
 	LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
-		(unsigned)MF_MDC_CANCEL_FID2);
+		 (unsigned)MF_MDC_CANCEL_FID2);
 	LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
-		(unsigned)MF_MDC_CANCEL_FID3);
+		 (unsigned)MF_MDC_CANCEL_FID3);
 	LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
-		(unsigned)MF_MDC_CANCEL_FID4);
+		 (unsigned)MF_MDC_CANCEL_FID4);
 	LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
-		(unsigned)MF_SOM_AU);
+		 (unsigned)MF_SOM_AU);
 	LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
-		(unsigned)MF_GETATTR_LOCK);
+		 (unsigned)MF_GETATTR_LOCK);
 	LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_MODE);
+		 (long long)MDS_ATTR_MODE);
 	LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_UID);
+		 (long long)MDS_ATTR_UID);
 	LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_GID);
+		 (long long)MDS_ATTR_GID);
 	LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_SIZE);
+		 (long long)MDS_ATTR_SIZE);
 	LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_ATIME);
+		 (long long)MDS_ATTR_ATIME);
 	LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_MTIME);
+		 (long long)MDS_ATTR_MTIME);
 	LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_CTIME);
+		 (long long)MDS_ATTR_CTIME);
 	LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_ATIME_SET);
+		 (long long)MDS_ATTR_ATIME_SET);
 	LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_MTIME_SET);
+		 (long long)MDS_ATTR_MTIME_SET);
 	LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_FORCE);
+		 (long long)MDS_ATTR_FORCE);
 	LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_ATTR_FLAG);
+		 (long long)MDS_ATTR_ATTR_FLAG);
 	LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_KILL_SUID);
+		 (long long)MDS_ATTR_KILL_SUID);
 	LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_KILL_SGID);
+		 (long long)MDS_ATTR_KILL_SGID);
 	LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_CTIME_SET);
+		 (long long)MDS_ATTR_CTIME_SET);
 	LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_FROM_OPEN);
+		 (long long)MDS_ATTR_FROM_OPEN);
 	LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
-			(long long)MDS_ATTR_BLOCKS);
+		 (long long)MDS_ATTR_BLOCKS);
 	LASSERTF(FLD_QUERY == 900, "found %lld\n",
 		 (long long)FLD_QUERY);
 	LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
@@ -418,15 +420,15 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
 		 (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
 	LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)LMAI_RELEASED);
+		 (unsigned)LMAI_RELEASED);
 	LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)LMAC_HSM);
+		 (unsigned)LMAC_HSM);
 	LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)LMAC_SOM);
+		 (unsigned)LMAC_SOM);
 	LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)LMAC_NOT_IN_OI);
+		 (unsigned)LMAC_NOT_IN_OI);
 	LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
-		(unsigned)LMAC_FID_ON_OST);
+		 (unsigned)LMAC_FID_ON_OST);
 
 	/* Checks for struct ost_id */
 	LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
@@ -452,35 +454,35 @@ void lustre_assert_wire_constants(void)
 	LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
 		 (long long)FID_SEQ_IGIF);
 	LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_IGIF_MAX);
+		 (long long)FID_SEQ_IGIF_MAX);
 	LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_IDIF);
+		 (long long)FID_SEQ_IDIF);
 	LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_IDIF_MAX);
+		 (long long)FID_SEQ_IDIF_MAX);
 	LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_START);
+		 (long long)FID_SEQ_START);
 	LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_LOCAL_FILE);
+		 (long long)FID_SEQ_LOCAL_FILE);
 	LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_DOT_LUSTRE);
+		 (long long)FID_SEQ_DOT_LUSTRE);
 	LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_SPECIAL);
+		 (long long)FID_SEQ_SPECIAL);
 	LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_QUOTA);
+		 (long long)FID_SEQ_QUOTA);
 	LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_QUOTA_GLB);
+		 (long long)FID_SEQ_QUOTA_GLB);
 	LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_ROOT);
+		 (long long)FID_SEQ_ROOT);
 	LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_NORMAL);
+		 (long long)FID_SEQ_NORMAL);
 	LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
-			(long long)FID_SEQ_LOV_DEFAULT);
+		 (long long)FID_SEQ_LOV_DEFAULT);
 	LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)FID_OID_SPECIAL_BFL);
+		 (unsigned)FID_OID_SPECIAL_BFL);
 	LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)FID_OID_DOT_LUSTRE);
+		 (unsigned)FID_OID_DOT_LUSTRE);
 	LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)FID_OID_DOT_LUSTRE_OBF);
+		 (unsigned)FID_OID_DOT_LUSTRE_OBF);
 
 	/* Checks for struct lu_dirent */
 	LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
@@ -510,11 +512,11 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
 		 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
 	LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)LUDA_FID);
+		 (unsigned)LUDA_FID);
 	LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)LUDA_TYPE);
+		 (unsigned)LUDA_TYPE);
 	LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)LUDA_64BITHASH);
+		 (unsigned)LUDA_64BITHASH);
 
 	/* Checks for struct luda_type */
 	LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
@@ -602,9 +604,9 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
 	LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
-		LUSTRE_MSG_MAGIC_V2);
+		 LUSTRE_MSG_MAGIC_V2);
 	LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
-		LUSTRE_MSG_MAGIC_V2_SWABBED);
+		 LUSTRE_MSG_MAGIC_V2_SWABBED);
 
 	/* Checks for struct ptlrpc_body */
 	LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
@@ -682,7 +684,7 @@ void lustre_assert_wire_constants(void)
 		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
 	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
 		 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
-	CLASSERT(JOBSTATS_JOBID_SIZE == 32);
+	CLASSERT(LUSTRE_JOBID_SIZE == 32);
 	LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
 		 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
 	LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
@@ -780,61 +782,61 @@ void lustre_assert_wire_constants(void)
 	LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
 		 (long long)MSG_PTLRPC_HEADER_OFF);
 	LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
-		PTLRPC_MSG_VERSION);
+		 PTLRPC_MSG_VERSION);
 	LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
-		LUSTRE_VERSION_MASK);
+		 LUSTRE_VERSION_MASK);
 	LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
-		LUSTRE_OBD_VERSION);
+		 LUSTRE_OBD_VERSION);
 	LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
-		LUSTRE_MDS_VERSION);
+		 LUSTRE_MDS_VERSION);
 	LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
-		LUSTRE_OST_VERSION);
+		 LUSTRE_OST_VERSION);
 	LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
-		LUSTRE_DLM_VERSION);
+		 LUSTRE_DLM_VERSION);
 	LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
-		LUSTRE_LOG_VERSION);
+		 LUSTRE_LOG_VERSION);
 	LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
-		LUSTRE_MGS_VERSION);
+		 LUSTRE_MGS_VERSION);
 	LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
 		 (long long)MSGHDR_AT_SUPPORT);
 	LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
 		 (long long)MSGHDR_CKSUM_INCOMPAT18);
 	LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_OP_FLAG_MASK);
+		 (unsigned)MSG_OP_FLAG_MASK);
 	LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
 		 (long long)MSG_OP_FLAG_SHIFT);
 	LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
-		(unsigned)MSG_GEN_FLAG_MASK);
+		 (unsigned)MSG_GEN_FLAG_MASK);
 	LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_LAST_REPLAY);
+		 (unsigned)MSG_LAST_REPLAY);
 	LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_RESENT);
+		 (unsigned)MSG_RESENT);
 	LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_REPLAY);
+		 (unsigned)MSG_REPLAY);
 	LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_DELAY_REPLAY);
+		 (unsigned)MSG_DELAY_REPLAY);
 	LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_VERSION_REPLAY);
+		 (unsigned)MSG_VERSION_REPLAY);
 	LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_REQ_REPLAY_DONE);
+		 (unsigned)MSG_REQ_REPLAY_DONE);
 	LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_LOCK_REPLAY_DONE);
+		 (unsigned)MSG_LOCK_REPLAY_DONE);
 	LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_RECOVERING);
+		 (unsigned)MSG_CONNECT_RECOVERING);
 	LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_RECONNECT);
+		 (unsigned)MSG_CONNECT_RECONNECT);
 	LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_REPLAYABLE);
+		 (unsigned)MSG_CONNECT_REPLAYABLE);
 	LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_LIBCLIENT);
+		 (unsigned)MSG_CONNECT_LIBCLIENT);
 	LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_INITIAL);
+		 (unsigned)MSG_CONNECT_INITIAL);
 	LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_ASYNC);
+		 (unsigned)MSG_CONNECT_ASYNC);
 	LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_NEXT_VER);
+		 (unsigned)MSG_CONNECT_NEXT_VER);
 	LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
-		(unsigned)MSG_CONNECT_TRANSNO);
+		 (unsigned)MSG_CONNECT_TRANSNO);
 
 	/* Checks for struct obd_connect_data */
 	LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
@@ -1069,12 +1071,18 @@ void lustre_assert_wire_constants(void)
 		 "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
 	LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
 		 "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
+	LASSERTF(OBD_CONNECT_LFSCK == 0x40000000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_LFSCK);
+	LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_UNLINK_CLOSE);
+	LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n",
+		 OBD_CONNECT_DIR_STRIPE);
 	LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)OBD_CKSUM_CRC32);
+		 (unsigned)OBD_CKSUM_CRC32);
 	LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)OBD_CKSUM_ADLER);
+		 (unsigned)OBD_CKSUM_ADLER);
 	LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
-		(unsigned)OBD_CKSUM_CRC32C);
+		 (unsigned)OBD_CKSUM_CRC32C);
 
 	/* Checks for struct obdo */
 	LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
@@ -1346,7 +1354,7 @@ void lustre_assert_wire_constants(void)
 		 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
 	LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
 		 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
-	CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
+	CLASSERT(LOV_MAGIC_V1 == (0x0BD10000 | 0x0BD0));
 
 	/* Checks for struct lov_mds_md_v3 */
 	LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
@@ -1375,7 +1383,7 @@ void lustre_assert_wire_constants(void)
 		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
 	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
 		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
-	CLASSERT(LOV_MAXPOOLNAME == 16);
+	CLASSERT(LOV_MAXPOOLNAME == 15);
 	LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
 		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
 	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
@@ -1384,15 +1392,64 @@ void lustre_assert_wire_constants(void)
 		 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
 	LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
 		 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
-	CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0);
+	CLASSERT(LOV_MAGIC_V3 == (0x0BD30000 | 0x0BD0));
 	LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)LOV_PATTERN_RAID0);
+		 (unsigned)LOV_PATTERN_RAID0);
 	LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)LOV_PATTERN_RAID1);
+		 (unsigned)LOV_PATTERN_RAID1);
 	LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
-		(unsigned)LOV_PATTERN_FIRST);
+		 (unsigned)LOV_PATTERN_FIRST);
 	LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
-		(unsigned)LOV_PATTERN_CMOBD);
+		 (unsigned)LOV_PATTERN_CMOBD);
+
+	/* Checks for struct lmv_mds_md_v1 */
+	LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
+		 (long long)(int)sizeof(struct lmv_mds_md_v1));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_magic) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_magic));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count) == 4, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_hash_type) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_hash_type));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_layout_version) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_layout_version));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding1) == 20, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding1));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding2) == 24, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding2));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding3) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding3));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]) == 1, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]));
+	LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]));
+	LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]));
+	CLASSERT(LMV_MAGIC_V1 == 0x0CD20CD0);
+	CLASSERT(LMV_MAGIC_STRIPE == 0x0CD40CD0);
+	CLASSERT(LMV_HASH_TYPE_MASK == 0x0000ffff);
+	CLASSERT(LMV_HASH_FLAG_MIGRATION == 0x80000000);
+	CLASSERT(LMV_HASH_FLAG_DEAD == 0x40000000);
 
 	/* Checks for struct obd_statfs */
 	LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
@@ -1582,53 +1639,53 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
 	LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
-		Q_QUOTACHECK);
+		 Q_QUOTACHECK);
 	LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
-		Q_INITQUOTA);
+		 Q_INITQUOTA);
 	LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
-		Q_GETOINFO);
+		 Q_GETOINFO);
 	LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
-		Q_GETOQUOTA);
+		 Q_GETOQUOTA);
 	LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
-		Q_FINVALIDATE);
+		 Q_FINVALIDATE);
 
 	/* Checks for struct niobuf_remote */
 	LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
 		 (long long)(int)sizeof(struct niobuf_remote));
-	LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n",
-		 (long long)(int)offsetof(struct niobuf_remote, offset));
-	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct niobuf_remote *)0)->offset));
-	LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n",
-		 (long long)(int)offsetof(struct niobuf_remote, len));
-	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct niobuf_remote *)0)->len));
-	LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n",
-		 (long long)(int)offsetof(struct niobuf_remote, flags));
-	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct niobuf_remote *)0)->flags));
+	LASSERTF((int)offsetof(struct niobuf_remote, rnb_offset) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct niobuf_remote, rnb_offset));
+	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_offset) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_offset));
+	LASSERTF((int)offsetof(struct niobuf_remote, rnb_len) == 8, "found %lld\n",
+		 (long long)(int)offsetof(struct niobuf_remote, rnb_len));
+	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_len) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_len));
+	LASSERTF((int)offsetof(struct niobuf_remote, rnb_flags) == 12, "found %lld\n",
+		 (long long)(int)offsetof(struct niobuf_remote, rnb_flags));
+	LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_flags));
 	LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
-		OBD_BRW_READ);
+		 OBD_BRW_READ);
 	LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
-		OBD_BRW_WRITE);
+		 OBD_BRW_WRITE);
 	LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
-		OBD_BRW_SYNC);
+		 OBD_BRW_SYNC);
 	LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
-		OBD_BRW_CHECK);
+		 OBD_BRW_CHECK);
 	LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
-		OBD_BRW_FROM_GRANT);
+		 OBD_BRW_FROM_GRANT);
 	LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
-		OBD_BRW_GRANTED);
+		 OBD_BRW_GRANTED);
 	LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
-		OBD_BRW_NOCACHE);
+		 OBD_BRW_NOCACHE);
 	LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
-		OBD_BRW_NOQUOTA);
+		 OBD_BRW_NOQUOTA);
 	LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
-		OBD_BRW_SRVLOCK);
+		 OBD_BRW_SRVLOCK);
 	LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
-		OBD_BRW_ASYNC);
+		 OBD_BRW_ASYNC);
 	LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
-		OBD_BRW_MEMALLOC);
+		 OBD_BRW_MEMALLOC);
 	LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
 		 OBD_BRW_OVER_USRQUOTA);
 	LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
@@ -1663,203 +1720,203 @@ void lustre_assert_wire_constants(void)
 	/* Checks for struct mdt_body */
 	LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
 		 (long long)(int)sizeof(struct mdt_body));
-	LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, fid1));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->fid1));
-	LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, fid2));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->fid2));
-	LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, handle));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->handle));
-	LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, valid));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->valid));
-	LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, size));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->size));
-	LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, mtime));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->mtime));
-	LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, atime));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->atime));
-	LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, ctime));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->ctime));
-	LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, blocks));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->blocks));
-	LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, t_state));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8,
+	LASSERTF((int)offsetof(struct mdt_body, mbo_fid1) == 0, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_fid1));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid1) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid1));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_fid2) == 16, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_fid2));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid2) == 16, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid2));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_handle) == 32, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_handle));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_handle) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_handle));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_valid) == 40, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_valid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_valid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_valid));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_size) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_size));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_size) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_size));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_mtime) == 56, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_mtime));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mtime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mtime));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_atime) == 64, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_atime));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_atime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_atime));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_ctime) == 72, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_ctime));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_ctime) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_ctime));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_blocks) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_blocks));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_blocks) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_blocks));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_t_state) == 96, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_t_state));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_t_state) == 8,
 		 "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
-	LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, fsuid));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->fsuid));
-	LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, fsgid));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->fsgid));
-	LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, capability));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->capability));
-	LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, mode));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->mode));
-	LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, uid));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->uid));
-	LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, gid));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->gid));
-	LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, flags));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->flags));
-	LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, rdev));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->rdev));
-	LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, nlink));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->nlink));
-	LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, unused2));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->unused2));
-	LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, suppgid));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->suppgid));
-	LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, eadatasize));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize));
-	LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, aclsize));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->aclsize));
-	LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, max_mdsize));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize));
-	LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, max_cookiesize));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize));
-	LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, uid_h));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->uid_h));
-	LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, gid_h));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->gid_h));
-	LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, padding_5));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_5));
-	LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, padding_6));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_6));
-	LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, padding_7));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_7));
-	LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, padding_8));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_8));
-	LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, padding_9));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_9));
-	LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n",
-		 (long long)(int)offsetof(struct mdt_body, padding_10));
-	LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct mdt_body *)0)->padding_10));
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_t_state));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_fsuid) == 104, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_fsuid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsuid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsuid));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_fsgid) == 108, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_fsgid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsgid));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_capability) == 112, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_capability));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_capability) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_capability));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_mode) == 116, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_mode));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mode) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mode));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_uid) == 120, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_uid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_gid) == 124, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_gid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_flags) == 128, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_flags));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_flags) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_flags));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_rdev) == 132, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_rdev));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_rdev) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_rdev));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_nlink) == 136, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_nlink));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_nlink) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_nlink));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_unused2) == 140, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_unused2));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused2) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused2));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_suppgid) == 144, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_suppgid));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_suppgid) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_suppgid));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_eadatasize) == 148, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_eadatasize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_eadatasize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_eadatasize));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_aclsize) == 152, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_aclsize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_aclsize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_aclsize));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_max_mdsize) == 156, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_max_mdsize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_max_cookiesize) == 160, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_max_cookiesize));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_uid_h));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid_h));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_gid_h) == 168, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_gid_h));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid_h) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid_h));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_padding_5) == 172, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_padding_5));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_5) == 4, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_5));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_padding_6) == 176, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_padding_6));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_6) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_6));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_padding_7) == 184, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_padding_7));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_7) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_7));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_padding_8) == 192, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_padding_8));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_8) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_8));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_padding_9) == 200, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_padding_9));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_9) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_9));
+	LASSERTF((int)offsetof(struct mdt_body, mbo_padding_10) == 208, "found %lld\n",
+		 (long long)(int)offsetof(struct mdt_body, mbo_padding_10));
+	LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_10) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_10));
 	LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
-		MDS_FMODE_CLOSED);
+		 MDS_FMODE_CLOSED);
 	LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
-		MDS_FMODE_EXEC);
+		 MDS_FMODE_EXEC);
 	LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
-		MDS_FMODE_EPOCH);
+		 MDS_FMODE_EPOCH);
 	LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
-		MDS_FMODE_TRUNC);
+		 MDS_FMODE_TRUNC);
 	LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
-		MDS_FMODE_SOM);
+		 MDS_FMODE_SOM);
 	LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
-		MDS_OPEN_CREATED);
+		 MDS_OPEN_CREATED);
 	LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
-		MDS_OPEN_CROSS);
+		 MDS_OPEN_CROSS);
 	LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
-		MDS_OPEN_CREAT);
+		 MDS_OPEN_CREAT);
 	LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
-		MDS_OPEN_EXCL);
+		 MDS_OPEN_EXCL);
 	LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
-		MDS_OPEN_TRUNC);
+		 MDS_OPEN_TRUNC);
 	LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
-		MDS_OPEN_APPEND);
+		 MDS_OPEN_APPEND);
 	LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
-		MDS_OPEN_SYNC);
+		 MDS_OPEN_SYNC);
 	LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
-		MDS_OPEN_DIRECTORY);
+		 MDS_OPEN_DIRECTORY);
 	LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_BY_FID);
+		 MDS_OPEN_BY_FID);
 	LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_DELAY_CREATE);
+		 MDS_OPEN_DELAY_CREATE);
 	LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_OWNEROVERRIDE);
+		 MDS_OPEN_OWNEROVERRIDE);
 	LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_JOIN_FILE);
+		 MDS_OPEN_JOIN_FILE);
 	LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_LOCK);
+		 MDS_OPEN_LOCK);
 	LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_HAS_EA);
+		 MDS_OPEN_HAS_EA);
 	LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
-		MDS_OPEN_HAS_OBJS);
+		 MDS_OPEN_HAS_OBJS);
 	LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
-			(long long)MDS_OPEN_NORESTORE);
+		 (long long)MDS_OPEN_NORESTORE);
 	LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
-			(long long)MDS_OPEN_NEWSTRIPE);
+		 (long long)MDS_OPEN_NEWSTRIPE);
 	LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
-			(long long)MDS_OPEN_VOLATILE);
+		 (long long)MDS_OPEN_VOLATILE);
 	LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
-		LUSTRE_SYNC_FL);
+		 LUSTRE_SYNC_FL);
 	LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
-		LUSTRE_IMMUTABLE_FL);
+		 LUSTRE_IMMUTABLE_FL);
 	LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
-		LUSTRE_APPEND_FL);
+		 LUSTRE_APPEND_FL);
 	LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
-		LUSTRE_NOATIME_FL);
+		 LUSTRE_NOATIME_FL);
 	LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
-		LUSTRE_DIRSYNC_FL);
+		 LUSTRE_DIRSYNC_FL);
 	LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
-		MDS_INODELOCK_LOOKUP);
+		 MDS_INODELOCK_LOOKUP);
 	LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
-		MDS_INODELOCK_UPDATE);
+		 MDS_INODELOCK_UPDATE);
 	LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
-		MDS_INODELOCK_OPEN);
+		 MDS_INODELOCK_OPEN);
 	LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
-		MDS_INODELOCK_LAYOUT);
+		 MDS_INODELOCK_LAYOUT);
 
 	/* Checks for struct mdt_ioepoch */
 	LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
@@ -2617,35 +2674,6 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
 		 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
 
-	/* Checks for struct lmv_stripe_md */
-	LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n",
-		 (long long)(int)sizeof(struct lmv_stripe_md));
-	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n",
-		 (long long)(int)offsetof(struct lmv_stripe_md, mea_magic));
-	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic));
-	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n",
-		 (long long)(int)offsetof(struct lmv_stripe_md, mea_count));
-	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count));
-	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n",
-		 (long long)(int)offsetof(struct lmv_stripe_md, mea_master));
-	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master));
-	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n",
-		 (long long)(int)offsetof(struct lmv_stripe_md, mea_padding));
-	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding));
-	CLASSERT(LOV_MAXPOOLNAME == 16);
-	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n",
-		 (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16]));
-	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n",
-		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]));
-	LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n",
-		 (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0]));
-	LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]));
-
 	/* Checks for struct lov_desc */
 	LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
 		 (long long)(int)sizeof(struct lov_desc));
@@ -3195,10 +3223,10 @@ void lustre_assert_wire_constants(void)
 		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
 	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
-	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
-		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
-	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid));
+	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid));
 	LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
 		 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
 	LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
@@ -3272,50 +3300,6 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
 		 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
 
-	/* Checks for struct changelog_ext_rec */
-	LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n",
-		 (long long)(int)sizeof(struct changelog_ext_rec));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_flags));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_type));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_index));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_prev));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_time));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid));
-	LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n",
-		 (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid));
-	LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n",
-		 (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid));
-
 	/* Checks for struct changelog_setinfo */
 	LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
 		 (long long)(int)sizeof(struct changelog_setinfo));
@@ -3339,10 +3323,10 @@ void lustre_assert_wire_constants(void)
 		 (long long)(int)offsetof(struct llog_changelog_rec, cr));
 	LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
 		 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
-	LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n",
-		 (long long)(int)offsetof(struct llog_changelog_rec, cr_tail));
-	LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n",
-		 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail));
+	LASSERTF((int)offsetof(struct llog_changelog_rec, cr_do_not_use) == 80, "found %lld\n",
+		 (long long)(int)offsetof(struct llog_changelog_rec, cr_do_not_use));
+	LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use) == 8, "found %lld\n",
+		 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use));
 
 	/* Checks for struct llog_changelog_user_rec */
 	LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
@@ -3506,6 +3490,19 @@ void lustre_assert_wire_constants(void)
 	CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
 	CLASSERT(LLOG_FIRST_OPC == 501);
 	CLASSERT(LLOG_LAST_OPC == 510);
+	CLASSERT(LLOG_CONFIG_ORIG_CTXT == 0);
+	CLASSERT(LLOG_CONFIG_REPL_CTXT == 1);
+	CLASSERT(LLOG_MDS_OST_ORIG_CTXT == 2);
+	CLASSERT(LLOG_MDS_OST_REPL_CTXT == 3);
+	CLASSERT(LLOG_SIZE_ORIG_CTXT == 4);
+	CLASSERT(LLOG_SIZE_REPL_CTXT == 5);
+	CLASSERT(LLOG_TEST_ORIG_CTXT == 8);
+	CLASSERT(LLOG_TEST_REPL_CTXT == 9);
+	CLASSERT(LLOG_CHANGELOG_ORIG_CTXT == 12);
+	CLASSERT(LLOG_CHANGELOG_REPL_CTXT == 13);
+	CLASSERT(LLOG_CHANGELOG_USER_ORIG_CTXT == 14);
+	CLASSERT(LLOG_AGENT_ORIG_CTXT == 15);
+	CLASSERT(LLOG_MAX_CTXTS == 16);
 
 	/* Checks for struct llogd_conn_body */
 	LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
@@ -3943,9 +3940,9 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct hsm_progress *)0)->padding));
 	LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
-		HP_FLAG_COMPLETED);
+		 HP_FLAG_COMPLETED);
 	LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
-		HP_FLAG_RETRY);
+		 HP_FLAG_RETRY);
 
 	LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
 		 (long long)(int)offsetof(struct hsm_copy, hc_data_version));
@@ -4100,9 +4097,9 @@ void lustre_assert_wire_constants(void)
 	LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
 		 (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
 	LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
-		(unsigned)HSM_FORCE_ACTION);
+		 (unsigned)HSM_FORCE_ACTION);
 	LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
-		(unsigned)HSM_GHOST_COPY);
+		 (unsigned)HSM_GHOST_COPY);
 
 	/* Checks for struct hsm_user_request */
 	LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",