1 files changed, 1290 insertions, 0 deletions
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
new file mode 100644
index 000000000000..3b34c3f4da2d
--- /dev/null
+++ b/fs/smb/client/transport.c
@@ -0,0 +1,1290 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ *
+ *   Copyright (C) International Business Machines  Corp., 2002,2008
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *   Jeremy Allison (jra@samba.org) 2006.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/gfp.h>
+#include <linux/wait.h>
+#include <linux/net.h>
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/tcp.h>
+#include <linux/bvec.h>
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+#include <linux/processor.h>
+#include <linux/mempool.h>
+#include <linux/sched/signal.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/task_work.h>
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+#include "smb2proto.h"
+#include "smbdirect.h"
+#include "compress.h"
+
+void
+cifs_wake_up_task(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	if (mid->mid_state == MID_RESPONSE_RECEIVED)
+		mid->mid_state = MID_RESPONSE_READY;
+	wake_up_process(mid->callback_data);
+}
+
+void __release_mid(struct TCP_Server_Info *server, struct mid_q_entry *midEntry)
+{
+#ifdef CONFIG_CIFS_STATS2
+	__le16 command = server->vals->lock_cmd;
+	__u16 smb_cmd = le16_to_cpu(midEntry->command);
+	unsigned long now;
+	unsigned long roundtrip_time;
+#endif
+
+	if (midEntry->resp_buf && (midEntry->wait_cancelled) &&
+	    (midEntry->mid_state == MID_RESPONSE_RECEIVED ||
+	     midEntry->mid_state == MID_RESPONSE_READY) &&
+	    server->ops->handle_cancelled_mid)
+		server->ops->handle_cancelled_mid(midEntry, server);
+
+	midEntry->mid_state = MID_FREE;
+	atomic_dec(&mid_count);
+	if (midEntry->large_buf)
+		cifs_buf_release(midEntry->resp_buf);
+	else
+		cifs_small_buf_release(midEntry->resp_buf);
+#ifdef CONFIG_CIFS_STATS2
+	now = jiffies;
+	if (now < midEntry->when_alloc)
+		cifs_server_dbg(VFS, "Invalid mid allocation time\n");
+	roundtrip_time = now - midEntry->when_alloc;
+
+	if (smb_cmd < NUMBER_OF_SMB2_COMMANDS) {
+		if (atomic_read(&server->num_cmds[smb_cmd]) == 0) {
+			server->slowest_cmd[smb_cmd] = roundtrip_time;
+			server->fastest_cmd[smb_cmd] = roundtrip_time;
+		} else {
+			if (server->slowest_cmd[smb_cmd] < roundtrip_time)
+				server->slowest_cmd[smb_cmd] = roundtrip_time;
+			else if (server->fastest_cmd[smb_cmd] > roundtrip_time)
+				server->fastest_cmd[smb_cmd] = roundtrip_time;
+		}
+		cifs_stats_inc(&server->num_cmds[smb_cmd]);
+		server->time_per_cmd[smb_cmd] += roundtrip_time;
+	}
+	/*
+	 * commands taking longer than one second (default) can be indications
+	 * that something is wrong, unless it is quite a slow link or a very
+	 * busy server. Note that this calc is unlikely or impossible to wrap
+	 * as long as slow_rsp_threshold is not set way above recommended max
+	 * value (32767 ie 9 hours) and is generally harmless even if wrong
+	 * since only affects debug counters - so leaving the calc as simple
+	 * comparison rather than doing multiple conversions and overflow
+	 * checks
+	 */
+	if ((slow_rsp_threshold != 0) &&
+	    time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
+	    (midEntry->command != command)) {
+		/*
+		 * smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command
+		 * NB: le16_to_cpu returns unsigned so can not be negative below
+		 */
+		if (smb_cmd < NUMBER_OF_SMB2_COMMANDS)
+			cifs_stats_inc(&server->smb2slowcmd[smb_cmd]);
+
+		trace_smb3_slow_rsp(smb_cmd, midEntry->mid, midEntry->pid,
+			       midEntry->when_sent, midEntry->when_received);
+		if (cifsFYI & CIFS_TIMER) {
+			pr_debug("slow rsp: cmd %d mid %llu",
+				 midEntry->command, midEntry->mid);
+			cifs_info("A: 0x%lx S: 0x%lx R: 0x%lx\n",
+				  now - midEntry->when_alloc,
+				  now - midEntry->when_sent,
+				  now - midEntry->when_received);
+		}
+	}
+#endif
+	put_task_struct(midEntry->creator);
+
+	mempool_free(midEntry, &cifs_mid_pool);
+}
+
+void
+delete_mid(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	spin_lock(&server->mid_queue_lock);
+
+	if (!mid->deleted_from_q) {
+		list_del_init(&mid->qhead);
+		mid->deleted_from_q = true;
+	}
+	spin_unlock(&server->mid_queue_lock);
+
+	release_mid(server, mid);
+}
+
+/*
+ * smb_send_kvec - send an array of kvecs to the server
+ * @server:	Server to send the data to
+ * @smb_msg:	Message to send
+ * @sent:	amount of data sent on socket is stored here
+ *
+ * Our basic "send data to server" function. Should be called with srv_mutex
+ * held. The caller is responsible for handling the results.
+ */
+int
+smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
+	      size_t *sent)
+{
+	int rc = 0;
+	int retries = 0;
+	struct socket *ssocket = server->ssocket;
+
+	*sent = 0;
+
+	if (server->noblocksnd)
+		smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
+	else
+		smb_msg->msg_flags = MSG_NOSIGNAL;
+
+	while (msg_data_left(smb_msg)) {
+		/*
+		 * If blocking send, we try 3 times, since each can block
+		 * for 5 seconds. For nonblocking  we have to try more
+		 * but wait increasing amounts of time allowing time for
+		 * socket to clear.  The overall time we wait in either
+		 * case to send on the socket is about 15 seconds.
+		 * Similarly we wait for 15 seconds for a response from
+		 * the server in SendReceive[2] for the server to send
+		 * a response back for most types of requests (except
+		 * SMB Write past end of file which can be slow, and
+		 * blocking lock operations). NFS waits slightly longer
+		 * than CIFS, but this can make it take longer for
+		 * nonresponsive servers to be detected and 15 seconds
+		 * is more than enough time for modern networks to
+		 * send a packet.  In most cases if we fail to send
+		 * after the retries we will kill the socket and
+		 * reconnect which may clear the network problem.
+		 *
+		 * Even if regular signals are masked, EINTR might be
+		 * propagated from sk_stream_wait_memory() to here when
+		 * TIF_NOTIFY_SIGNAL is used for task work. For example,
+		 * certain io_uring completions will use that. Treat
+		 * having EINTR with pending task work the same as EAGAIN
+		 * to avoid unnecessary reconnects.
+		 */
+		rc = sock_sendmsg(ssocket, smb_msg);
+		if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) {
+			retries++;
+			if (retries >= 14 ||
+			    (!server->noblocksnd && (retries > 2))) {
+				cifs_server_dbg(VFS, "sends on sock %p stuck for 15 seconds\n",
+					 ssocket);
+				return -EAGAIN;
+			}
+			msleep(1 << retries);
+			continue;
+		}
+
+		if (rc < 0)
+			return rc;
+
+		if (rc == 0) {
+			/* should never happen, letting socket clear before
+			   retrying is our only obvious option here */
+			cifs_server_dbg(VFS, "tcp sent no data\n");
+			msleep(500);
+			continue;
+		}
+
+		/* send was at least partially successful */
+		*sent += rc;
+		retries = 0; /* in case we get ENOSPC on the next send */
+	}
+	return 0;
+}
+
+unsigned long
+smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+{
+	unsigned int i;
+	struct kvec *iov;
+	int nvec;
+	unsigned long buflen = 0;
+
+	if (!is_smb1(server) && rqst->rq_nvec >= 2 &&
+	    rqst->rq_iov[0].iov_len == 4) {
+		iov = &rqst->rq_iov[1];
+		nvec = rqst->rq_nvec - 1;
+	} else {
+		iov = rqst->rq_iov;
+		nvec = rqst->rq_nvec;
+	}
+
+	/* total up iov array first */
+	for (i = 0; i < nvec; i++)
+		buflen += iov[i].iov_len;
+
+	buflen += iov_iter_count(&rqst->rq_iter);
+	return buflen;
+}
+
+int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+		    struct smb_rqst *rqst)
+{
+	int rc;
+	struct kvec *iov;
+	int n_vec;
+	unsigned int send_length = 0;
+	unsigned int i, j;
+	sigset_t mask, oldmask;
+	size_t total_len = 0, sent, size;
+	struct socket *ssocket = server->ssocket;
+	struct msghdr smb_msg = {};
+	__be32 rfc1002_marker;
+
+	cifs_in_send_inc(server);
+	if (cifs_rdma_enabled(server)) {
+		/* return -EAGAIN when connecting or reconnecting */
+		rc = -EAGAIN;
+		if (server->smbd_conn)
+			rc = smbd_send(server, num_rqst, rqst);
+		goto smbd_done;
+	}
+
+	rc = -EAGAIN;
+	if (ssocket == NULL)
+		goto out;
+
+	rc = -ERESTARTSYS;
+	if (fatal_signal_pending(current)) {
+		cifs_dbg(FYI, "signal pending before send request\n");
+		goto out;
+	}
+
+	rc = 0;
+	/* cork the socket */
+	tcp_sock_set_cork(ssocket->sk, true);
+
+	for (j = 0; j < num_rqst; j++)
+		send_length += smb_rqst_len(server, &rqst[j]);
+	rfc1002_marker = cpu_to_be32(send_length);
+
+	/*
+	 * We should not allow signals to interrupt the network send because
+	 * any partial send will cause session reconnects thus increasing
+	 * latency of system calls and overload a server with unnecessary
+	 * requests.
+	 */
+
+	sigfillset(&mask);
+	sigprocmask(SIG_BLOCK, &mask, &oldmask);
+
+	/* Generate a rfc1002 marker */
+	{
+		struct kvec hiov = {
+			.iov_base = &rfc1002_marker,
+			.iov_len  = 4
+		};
+		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, &hiov, 1, 4);
+		rc = smb_send_kvec(server, &smb_msg, &sent);
+		if (rc < 0)
+			goto unmask;
+
+		total_len += sent;
+		send_length += 4;
+	}
+
+	cifs_dbg(FYI, "Sending smb: smb_len=%u\n", send_length);
+
+	for (j = 0; j < num_rqst; j++) {
+		iov = rqst[j].rq_iov;
+		n_vec = rqst[j].rq_nvec;
+
+		size = 0;
+		for (i = 0; i < n_vec; i++) {
+			dump_smb(iov[i].iov_base, iov[i].iov_len);
+			size += iov[i].iov_len;
+		}
+
+		iov_iter_kvec(&smb_msg.msg_iter, ITER_SOURCE, iov, n_vec, size);
+
+		rc = smb_send_kvec(server, &smb_msg, &sent);
+		if (rc < 0)
+			goto unmask;
+
+		total_len += sent;
+
+		if (iov_iter_count(&rqst[j].rq_iter) > 0) {
+			smb_msg.msg_iter = rqst[j].rq_iter;
+			rc = smb_send_kvec(server, &smb_msg, &sent);
+			if (rc < 0)
+				break;
+			total_len += sent;
+		}
+	}
+
+unmask:
+	sigprocmask(SIG_SETMASK, &oldmask, NULL);
+
+	/*
+	 * If signal is pending but we have already sent the whole packet to
+	 * the server we need to return success status to allow a corresponding
+	 * mid entry to be kept in the pending requests queue thus allowing
+	 * to handle responses from the server by the client.
+	 *
+	 * If only part of the packet has been sent there is no need to hide
+	 * interrupt because the session will be reconnected anyway, so there
+	 * won't be any response from the server to handle.
+	 */
+
+	if (signal_pending(current) && (total_len != send_length)) {
+		cifs_dbg(FYI, "signal is pending after attempt to send\n");
+		rc = -ERESTARTSYS;
+	}
+
+	/* uncork it */
+	tcp_sock_set_cork(ssocket->sk, false);
+
+	if ((total_len > 0) && (total_len != send_length)) {
+		cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
+			 send_length, total_len);
+		/*
+		 * If we have only sent part of an SMB then the next SMB could
+		 * be taken as the remainder of this one. We need to kill the
+		 * socket so the server throws away the partial SMB
+		 */
+		cifs_signal_cifsd_for_reconnect(server, false);
+		trace_smb3_partial_send_reconnect(server->current_mid,
+						  server->conn_id, server->hostname);
+	}
+smbd_done:
+	/*
+	 * there's hardly any use for the layers above to know the
+	 * actual error code here. All they should do at this point is
+	 * to retry the connection and hope it goes away.
+	 */
+	if (rc < 0 && rc != -EINTR && rc != -EAGAIN) {
+		cifs_server_dbg(VFS, "Error %d sending data on socket to server\n",
+			 rc);
+		rc = -ECONNABORTED;
+		cifs_signal_cifsd_for_reconnect(server, false);
+	} else if (rc > 0)
+		rc = 0;
+out:
+	cifs_in_send_dec(server);
+	return rc;
+}
+
+static int
+smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+	      struct smb_rqst *rqst, int flags)
+{
+	struct smb2_transform_hdr tr_hdr;
+	struct smb_rqst new_rqst[MAX_COMPOUND] = {};
+	struct kvec iov = {
+		.iov_base = &tr_hdr,
+		.iov_len = sizeof(tr_hdr),
+	};
+	int rc;
+
+	if (flags & CIFS_COMPRESS_REQ)
+		return smb_compress(server, &rqst[0], __smb_send_rqst);
+
+	if (!(flags & CIFS_TRANSFORM_REQ))
+		return __smb_send_rqst(server, num_rqst, rqst);
+
+	if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1))
+		return smb_EIO1(smb_eio_trace_tx_max_compound, num_rqst);
+
+	if (!server->ops->init_transform_rq) {
+		cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n");
+		return smb_EIO(smb_eio_trace_tx_need_transform);
+	}
+
+	new_rqst[0].rq_iov = &iov;
+	new_rqst[0].rq_nvec = 1;
+
+	rc = server->ops->init_transform_rq(server, num_rqst + 1,
+					    new_rqst, rqst);
+	if (!rc) {
+		rc = __smb_send_rqst(server, num_rqst + 1, new_rqst);
+		smb3_free_compound_rqst(num_rqst, &new_rqst[1]);
+	}
+	return rc;
+}
+
+static int
+wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
+		      const int timeout, const int flags,
+		      unsigned int *instance)
+{
+	long rc;
+	int *credits;
+	int optype;
+	long int t;
+	int scredits, in_flight;
+
+	if (timeout < 0)
+		t = MAX_JIFFY_OFFSET;
+	else
+		t = msecs_to_jiffies(timeout);
+
+	optype = flags & CIFS_OP_MASK;
+
+	*instance = 0;
+
+	credits = server->ops->get_credits_field(server, optype);
+	/* Since an echo is already inflight, no need to wait to send another */
+	if (*credits <= 0 && optype == CIFS_ECHO_OP)
+		return -EAGAIN;
+
+	spin_lock(&server->req_lock);
+	if ((flags & CIFS_TIMEOUT_MASK) == CIFS_NON_BLOCKING) {
+		/* oplock breaks must not be held up */
+		server->in_flight++;
+		if (server->in_flight > server->max_in_flight)
+			server->max_in_flight = server->in_flight;
+		*credits -= 1;
+		*instance = server->reconnect_instance;
+		scredits = *credits;
+		in_flight = server->in_flight;
+		spin_unlock(&server->req_lock);
+
+		trace_smb3_nblk_credits(server->current_mid,
+				server->conn_id, server->hostname, scredits, -1, in_flight);
+		cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+				__func__, 1, scredits);
+
+		return 0;
+	}
+
+	while (1) {
+		spin_unlock(&server->req_lock);
+
+		spin_lock(&server->srv_lock);
+		if (server->tcpStatus == CifsExiting) {
+			spin_unlock(&server->srv_lock);
+			return -ENOENT;
+		}
+		spin_unlock(&server->srv_lock);
+
+		spin_lock(&server->req_lock);
+		if (*credits < num_credits) {
+			scredits = *credits;
+			spin_unlock(&server->req_lock);
+
+			cifs_num_waiters_inc(server);
+			rc = wait_event_killable_timeout(server->request_q,
+				has_credits(server, credits, num_credits), t);
+			cifs_num_waiters_dec(server);
+			if (!rc) {
+				spin_lock(&server->req_lock);
+				scredits = *credits;
+				in_flight = server->in_flight;
+				spin_unlock(&server->req_lock);
+
+				trace_smb3_credit_timeout(server->current_mid,
+						server->conn_id, server->hostname, scredits,
+						num_credits, in_flight);
+				cifs_server_dbg(VFS, "wait timed out after %d ms\n",
+						timeout);
+				return -EBUSY;
+			}
+			if (rc == -ERESTARTSYS)
+				return -ERESTARTSYS;
+			spin_lock(&server->req_lock);
+		} else {
+			/*
+			 * For normal commands, reserve the last MAX_COMPOUND
+			 * credits to compound requests.
+			 * Otherwise these compounds could be permanently
+			 * starved for credits by single-credit requests.
+			 *
+			 * To prevent spinning CPU, block this thread until
+			 * there are >MAX_COMPOUND credits available.
+			 * But only do this is we already have a lot of
+			 * credits in flight to avoid triggering this check
+			 * for servers that are slow to hand out credits on
+			 * new sessions.
+			 */
+			if (!optype && num_credits == 1 &&
+			    server->in_flight > 2 * MAX_COMPOUND &&
+			    *credits <= MAX_COMPOUND) {
+				spin_unlock(&server->req_lock);
+
+				cifs_num_waiters_inc(server);
+				rc = wait_event_killable_timeout(
+					server->request_q,
+					has_credits(server, credits,
+						    MAX_COMPOUND + 1),
+					t);
+				cifs_num_waiters_dec(server);
+				if (!rc) {
+					spin_lock(&server->req_lock);
+					scredits = *credits;
+					in_flight = server->in_flight;
+					spin_unlock(&server->req_lock);
+
+					trace_smb3_credit_timeout(
+							server->current_mid,
+							server->conn_id, server->hostname,
+							scredits, num_credits, in_flight);
+					cifs_server_dbg(VFS, "wait timed out after %d ms\n",
+							timeout);
+					return -EBUSY;
+				}
+				if (rc == -ERESTARTSYS)
+					return -ERESTARTSYS;
+				spin_lock(&server->req_lock);
+				continue;
+			}
+
+			/*
+			 * Can not count locking commands against total
+			 * as they are allowed to block on server.
+			 */
+
+			/* update # of requests on the wire to server */
+			if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
+				*credits -= num_credits;
+				server->in_flight += num_credits;
+				if (server->in_flight > server->max_in_flight)
+					server->max_in_flight = server->in_flight;
+				*instance = server->reconnect_instance;
+			}
+			scredits = *credits;
+			in_flight = server->in_flight;
+			spin_unlock(&server->req_lock);
+
+			trace_smb3_waitff_credits(server->current_mid,
+					server->conn_id, server->hostname, scredits,
+					-(num_credits), in_flight);
+			cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+					__func__, num_credits, scredits);
+			break;
+		}
+	}
+	return 0;
+}
+
+int wait_for_free_request(struct TCP_Server_Info *server, const int flags,
+			  unsigned int *instance)
+{
+	return wait_for_free_credits(server, 1, -1, flags,
+				     instance);
+}
+
+static int
+wait_for_compound_request(struct TCP_Server_Info *server, int num,
+			  const int flags, unsigned int *instance)
+{
+	int *credits;
+	int scredits, in_flight;
+
+	credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
+
+	spin_lock(&server->req_lock);
+	scredits = *credits;
+	in_flight = server->in_flight;
+
+	if (*credits < num) {
+		/*
+		 * If the server is tight on resources or just gives us less
+		 * credits for other reasons (e.g. requests are coming out of
+		 * order and the server delays granting more credits until it
+		 * processes a missing mid) and we exhausted most available
+		 * credits there may be situations when we try to send
+		 * a compound request but we don't have enough credits. At this
+		 * point the client needs to decide if it should wait for
+		 * additional credits or fail the request. If at least one
+		 * request is in flight there is a high probability that the
+		 * server will return enough credits to satisfy this compound
+		 * request.
+		 *
+		 * Return immediately if no requests in flight since we will be
+		 * stuck on waiting for credits.
+		 */
+		if (server->in_flight == 0) {
+			spin_unlock(&server->req_lock);
+			trace_smb3_insufficient_credits(server->current_mid,
+					server->conn_id, server->hostname, scredits,
+					num, in_flight);
+			cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
+					__func__, in_flight, num, scredits);
+			return -EDEADLK;
+		}
+	}
+	spin_unlock(&server->req_lock);
+
+	return wait_for_free_credits(server, num, 60000, flags,
+				     instance);
+}
+
+int
+cifs_wait_mtu_credits(struct TCP_Server_Info *server, size_t size,
+		      size_t *num, struct cifs_credits *credits)
+{
+	*num = size;
+	credits->value = 0;
+	credits->instance = server->reconnect_instance;
+	return 0;
+}
+
+int wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	unsigned int sleep_state = TASK_KILLABLE;
+	int error;
+
+	if (mid->sr_flags & CIFS_INTERRUPTIBLE_WAIT)
+		sleep_state = TASK_INTERRUPTIBLE;
+
+	error = wait_event_state(server->response_q,
+				 mid->mid_state != MID_REQUEST_SUBMITTED &&
+				 mid->mid_state != MID_RESPONSE_RECEIVED,
+				 (sleep_state | TASK_FREEZABLE_UNSAFE));
+	if (error < 0)
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+/*
+ * Send a SMB request and set the callback function in the mid to handle
+ * the result. Caller is responsible for dealing with timeouts.
+ */
+int
+cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
+		mid_receive_t receive, mid_callback_t callback,
+		mid_handle_t handle, void *cbdata, const int flags,
+		const struct cifs_credits *exist_credits)
+{
+	int rc;
+	struct mid_q_entry *mid;
+	struct cifs_credits credits = { .value = 0, .instance = 0 };
+	unsigned int instance;
+	int optype;
+
+	optype = flags & CIFS_OP_MASK;
+
+	if ((flags & CIFS_HAS_CREDITS) == 0) {
+		rc = wait_for_free_request(server, flags, &instance);
+		if (rc)
+			return rc;
+		credits.value = 1;
+		credits.instance = instance;
+	} else
+		instance = exist_credits->instance;
+
+	cifs_server_lock(server);
+
+	/*
+	 * We can't use credits obtained from the previous session to send this
+	 * request. Check if there were reconnects after we obtained credits and
+	 * return -EAGAIN in such cases to let callers handle it.
+	 */
+	if (instance != server->reconnect_instance) {
+		cifs_server_unlock(server);
+		add_credits_and_wake_if(server, &credits, optype);
+		return -EAGAIN;
+	}
+
+	mid = server->ops->setup_async_request(server, rqst);
+	if (IS_ERR(mid)) {
+		cifs_server_unlock(server);
+		add_credits_and_wake_if(server, &credits, optype);
+		return PTR_ERR(mid);
+	}
+
+	mid->sr_flags = flags;
+	mid->receive = receive;
+	mid->callback = callback;
+	mid->callback_data = cbdata;
+	mid->handle = handle;
+	mid->mid_state = MID_REQUEST_SUBMITTED;
+
+	/* put it on the pending_mid_q */
+	spin_lock(&server->mid_queue_lock);
+	list_add_tail(&mid->qhead, &server->pending_mid_q);
+	spin_unlock(&server->mid_queue_lock);
+
+	/*
+	 * Need to store the time in mid before calling I/O. For call_async,
+	 * I/O response may come back and free the mid entry on another thread.
+	 */
+	cifs_save_when_sent(mid);
+	rc = smb_send_rqst(server, 1, rqst, flags);
+
+	if (rc < 0) {
+		revert_current_mid(server, mid->credits);
+		server->sequence_number -= 2;
+		delete_mid(server, mid);
+	}
+
+	cifs_server_unlock(server);
+
+	if (rc == 0)
+		return 0;
+
+	add_credits_and_wake_if(server, &credits, optype);
+	return rc;
+}
+
+int cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
+{
+	int rc = 0;
+
+	cifs_dbg(FYI, "%s: cmd=%d mid=%llu state=%d\n",
+		 __func__, le16_to_cpu(mid->command), mid->mid, mid->mid_state);
+
+	spin_lock(&server->mid_queue_lock);
+	switch (mid->mid_state) {
+	case MID_RESPONSE_READY:
+		spin_unlock(&server->mid_queue_lock);
+		return rc;
+	case MID_RETRY_NEEDED:
+		rc = -EAGAIN;
+		break;
+	case MID_RESPONSE_MALFORMED:
+		rc = smb_EIO(smb_eio_trace_rx_sync_mid_malformed);
+		break;
+	case MID_SHUTDOWN:
+		rc = -EHOSTDOWN;
+		break;
+	case MID_RC:
+		rc = mid->mid_rc;
+		break;
+	default:
+		if (mid->deleted_from_q == false) {
+			list_del_init(&mid->qhead);
+			mid->deleted_from_q = true;
+		}
+		spin_unlock(&server->mid_queue_lock);
+		cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
+			 __func__, mid->mid, mid->mid_state);
+		rc = smb_EIO1(smb_eio_trace_rx_sync_mid_invalid, mid->mid_state);
+		goto sync_mid_done;
+	}
+	spin_unlock(&server->mid_queue_lock);
+
+sync_mid_done:
+	release_mid(server, mid);
+	return rc;
+}
+
+static void
+cifs_compound_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	struct cifs_credits credits = {
+		.value = server->ops->get_credits(mid),
+		.instance = server->reconnect_instance,
+	};
+
+	add_credits(server, &credits, mid->optype);
+
+	if (mid->mid_state == MID_RESPONSE_RECEIVED)
+		mid->mid_state = MID_RESPONSE_READY;
+}
+
+static void
+cifs_compound_last_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	cifs_compound_callback(server, mid);
+	cifs_wake_up_task(server, mid);
+}
+
+static void
+cifs_cancelled_callback(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	cifs_compound_callback(server, mid);
+	release_mid(server, mid);
+}
+
+/*
+ * Return a channel (master if none) of @ses that can be used to send
+ * regular requests.
+ *
+ * If we are currently binding a new channel (negprot/sess.setup),
+ * return the new incomplete channel.
+ */
+struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
+{
+	uint index = 0;
+	unsigned int min_in_flight = UINT_MAX, max_in_flight = 0;
+	struct TCP_Server_Info *server = NULL;
+	int i, start, cur;
+
+	if (!ses)
+		return NULL;
+
+	spin_lock(&ses->chan_lock);
+	start = atomic_inc_return(&ses->chan_seq);
+	for (i = 0; i < ses->chan_count; i++) {
+		cur = (start + i) % ses->chan_count;
+		server = ses->chans[cur].server;
+		if (!server || server->terminate)
+			continue;
+
+		if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur))
+			continue;
+
+		/*
+		 * strictly speaking, we should pick up req_lock to read
+		 * server->in_flight. But it shouldn't matter much here if we
+		 * race while reading this data. The worst that can happen is
+		 * that we could use a channel that's not least loaded. Avoiding
+		 * taking the lock could help reduce wait time, which is
+		 * important for this function
+		 */
+		if (server->in_flight < min_in_flight) {
+			min_in_flight = server->in_flight;
+			index = cur;
+		}
+		if (server->in_flight > max_in_flight)
+			max_in_flight = server->in_flight;
+	}
+
+	/* if all channels are equally loaded, fall back to round-robin */
+	if (min_in_flight == max_in_flight)
+		index = (uint)start % ses->chan_count;
+
+	server = ses->chans[index].server;
+	spin_unlock(&ses->chan_lock);
+
+	return server;
+}
+
+int
+compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
+		   struct TCP_Server_Info *server,
+		   const int flags, const int num_rqst, struct smb_rqst *rqst,
+		   int *resp_buf_type, struct kvec *resp_iov)
+{
+	int i, j, optype, rc = 0;
+	struct mid_q_entry *mid[MAX_COMPOUND];
+	bool cancelled_mid[MAX_COMPOUND] = {false};
+	struct cifs_credits credits[MAX_COMPOUND] = {
+		{ .value = 0, .instance = 0 }
+	};
+	unsigned int instance;
+	char *buf;
+
+	optype = flags & CIFS_OP_MASK;
+
+	for (i = 0; i < num_rqst; i++)
+		resp_buf_type[i] = CIFS_NO_BUFFER;  /* no response buf yet */
+
+	if (!ses || !ses->server || !server) {
+		cifs_dbg(VFS, "Null session\n");
+		return smb_EIO(smb_eio_trace_null_pointers);
+	}
+
+	spin_lock(&server->srv_lock);
+	if (server->tcpStatus == CifsExiting) {
+		spin_unlock(&server->srv_lock);
+		return -ENOENT;
+	}
+	spin_unlock(&server->srv_lock);
+
+	/*
+	 * Wait for all the requests to become available.
+	 * This approach still leaves the possibility to be stuck waiting for
+	 * credits if the server doesn't grant credits to the outstanding
+	 * requests and if the client is completely idle, not generating any
+	 * other requests.
+	 * This can be handled by the eventual session reconnect.
+	 */
+	rc = wait_for_compound_request(server, num_rqst, flags,
+				       &instance);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < num_rqst; i++) {
+		credits[i].value = 1;
+		credits[i].instance = instance;
+	}
+
+	/*
+	 * Make sure that we sign in the same order that we send on this socket
+	 * and avoid races inside tcp sendmsg code that could cause corruption
+	 * of smb data.
+	 */
+
+	cifs_server_lock(server);
+
+	/*
+	 * All the parts of the compound chain belong obtained credits from the
+	 * same session. We can not use credits obtained from the previous
+	 * session to send this request. Check if there were reconnects after
+	 * we obtained credits and return -EAGAIN in such cases to let callers
+	 * handle it.
+	 */
+	if (instance != server->reconnect_instance) {
+		cifs_server_unlock(server);
+		for (j = 0; j < num_rqst; j++)
+			add_credits(server, &credits[j], optype);
+		return -EAGAIN;
+	}
+
+	for (i = 0; i < num_rqst; i++) {
+		mid[i] = server->ops->setup_request(ses, server, &rqst[i]);
+		if (IS_ERR(mid[i])) {
+			revert_current_mid(server, i);
+			for (j = 0; j < i; j++)
+				delete_mid(server, mid[j]);
+			cifs_server_unlock(server);
+
+			/* Update # of requests on wire to server */
+			for (j = 0; j < num_rqst; j++)
+				add_credits(server, &credits[j], optype);
+			return PTR_ERR(mid[i]);
+		}
+
+		mid[i]->sr_flags = flags;
+		mid[i]->mid_state = MID_REQUEST_SUBMITTED;
+		mid[i]->optype = optype;
+		/*
+		 * Invoke callback for every part of the compound chain
+		 * to calculate credits properly. Wake up this thread only when
+		 * the last element is received.
+		 */
+		if (i < num_rqst - 1)
+			mid[i]->callback = cifs_compound_callback;
+		else
+			mid[i]->callback = cifs_compound_last_callback;
+	}
+	rc = smb_send_rqst(server, num_rqst, rqst, flags);
+
+	for (i = 0; i < num_rqst; i++)
+		cifs_save_when_sent(mid[i]);
+
+	if (rc < 0) {
+		revert_current_mid(server, num_rqst);
+		server->sequence_number -= 2;
+	}
+
+	cifs_server_unlock(server);
+
+	/*
+	 * If sending failed for some reason or it is an oplock break that we
+	 * will not receive a response to - return credits back
+	 */
+	if (rc < 0 || (flags & CIFS_NO_SRV_RSP)) {
+		for (i = 0; i < num_rqst; i++)
+			add_credits(server, &credits[i], optype);
+		goto out;
+	}
+
+	/*
+	 * At this point the request is passed to the network stack - we assume
+	 * that any credits taken from the server structure on the client have
+	 * been spent and we can't return them back. Once we receive responses
+	 * we will collect credits granted by the server in the mid callbacks
+	 * and add those credits to the server structure.
+	 */
+
+	/*
+	 * Compounding is never used during session establish.
+	 */
+	spin_lock(&ses->ses_lock);
+	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+		spin_unlock(&ses->ses_lock);
+
+		if (WARN_ON_ONCE(num_rqst != 1 || !resp_iov))
+			return -EINVAL;
+
+		cifs_server_lock(server);
+		smb311_update_preauth_hash(ses, server, rqst[0].rq_iov, rqst[0].rq_nvec);
+		cifs_server_unlock(server);
+
+		spin_lock(&ses->ses_lock);
+	}
+	spin_unlock(&ses->ses_lock);
+
+	for (i = 0; i < num_rqst; i++) {
+		rc = wait_for_response(server, mid[i]);
+		if (rc != 0)
+			break;
+	}
+	if (rc != 0) {
+		for (; i < num_rqst; i++) {
+			cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
+				 mid[i]->mid, le16_to_cpu(mid[i]->command));
+			send_cancel(ses, server, &rqst[i], mid[i], xid);
+			spin_lock(&mid[i]->mid_lock);
+			mid[i]->wait_cancelled = true;
+			if (mid[i]->mid_state == MID_REQUEST_SUBMITTED ||
+			    mid[i]->mid_state == MID_RESPONSE_RECEIVED) {
+				mid[i]->callback = cifs_cancelled_callback;
+				cancelled_mid[i] = true;
+				credits[i].value = 0;
+			}
+			spin_unlock(&mid[i]->mid_lock);
+		}
+	}
+
+	for (i = 0; i < num_rqst; i++) {
+		if (rc < 0)
+			goto out;
+
+		rc = cifs_sync_mid_result(mid[i], server);
+		if (rc != 0) {
+			/* mark this mid as cancelled to not free it below */
+			cancelled_mid[i] = true;
+			goto out;
+		}
+
+		if (!mid[i]->resp_buf ||
+		    mid[i]->mid_state != MID_RESPONSE_READY) {
+			rc = smb_EIO1(smb_eio_trace_rx_mid_unready, mid[i]->mid_state);
+			cifs_dbg(FYI, "Bad MID state?\n");
+			goto out;
+		}
+
+		rc = server->ops->check_receive(mid[i], server,
+						flags & CIFS_LOG_ERROR);
+
+		if (resp_iov) {
+			buf = (char *)mid[i]->resp_buf;
+			resp_iov[i].iov_base = buf;
+			resp_iov[i].iov_len = mid[i]->resp_buf_size;
+
+			if (mid[i]->large_buf)
+				resp_buf_type[i] = CIFS_LARGE_BUFFER;
+			else
+				resp_buf_type[i] = CIFS_SMALL_BUFFER;
+
+			/* mark it so buf will not be freed by delete_mid */
+			if ((flags & CIFS_NO_RSP_BUF) == 0)
+				mid[i]->resp_buf = NULL;
+		}
+	}
+
+	/*
+	 * Compounding is never used during session establish.
+	 */
+	spin_lock(&ses->ses_lock);
+	if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+		struct kvec iov = {
+			.iov_base = resp_iov[0].iov_base,
+			.iov_len = resp_iov[0].iov_len
+		};
+		spin_unlock(&ses->ses_lock);
+		cifs_server_lock(server);
+		smb311_update_preauth_hash(ses, server, &iov, 1);
+		cifs_server_unlock(server);
+		spin_lock(&ses->ses_lock);
+	}
+	spin_unlock(&ses->ses_lock);
+
+out:
+	/*
+	 * This will dequeue all mids. After this it is important that the
+	 * demultiplex_thread will not process any of these mids any further.
+	 * This is prevented above by using a noop callback that will not
+	 * wake this thread except for the very last PDU.
+	 */
+	for (i = 0; i < num_rqst; i++) {
+		if (!cancelled_mid[i])
+			delete_mid(server, mid[i]);
+	}
+
+	return rc;
+}
+
+int
+cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
+	       struct TCP_Server_Info *server,
+	       struct smb_rqst *rqst, int *resp_buf_type, const int flags,
+	       struct kvec *resp_iov)
+{
+	return compound_send_recv(xid, ses, server, flags, 1,
+				  rqst, resp_buf_type, resp_iov);
+}
+
+
+/*
+ * Discard any remaining data in the current SMB. To do this, we borrow the
+ * current bigbuf.
+ */
+int
+cifs_discard_remaining_data(struct TCP_Server_Info *server)
+{
+	unsigned int rfclen = server->pdu_size;
+	size_t remaining = rfclen - server->total_read;
+
+	while (remaining > 0) {
+		ssize_t length;
+
+		length = cifs_discard_from_socket(server,
+				min_t(size_t, remaining,
+				      CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
+		if (length < 0)
+			return length;
+		server->total_read += length;
+		remaining -= length;
+	}
+
+	return 0;
+}
+
+static int
+__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
+		     bool malformed)
+{
+	int length;
+
+	length = cifs_discard_remaining_data(server);
+	dequeue_mid(server, mid, malformed);
+	mid->resp_buf = server->smallbuf;
+	server->smallbuf = NULL;
+	return length;
+}
+
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	struct cifs_io_subrequest *rdata = mid->callback_data;
+
+	return  __cifs_readv_discard(server, mid, rdata->result);
+}
+
+int
+cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+	int length, len;
+	unsigned int data_offset, data_len;
+	struct cifs_io_subrequest *rdata = mid->callback_data;
+	char *buf = server->smallbuf;
+	unsigned int buflen = server->pdu_size;
+	bool use_rdma_mr = false;
+
+	cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%zu\n",
+		 __func__, mid->mid, rdata->subreq.start, rdata->subreq.len);
+
+	/*
+	 * read the rest of READ_RSP header (sans Data array), or whatever we
+	 * can if there's not enough data. At this point, we've read down to
+	 * the Mid.
+	 */
+	len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
+							HEADER_SIZE(server) + 1;
+
+	length = cifs_read_from_socket(server,
+				       buf + HEADER_SIZE(server) - 1, len);
+	if (length < 0)
+		return length;
+	server->total_read += length;
+
+	if (server->ops->is_session_expired &&
+	    server->ops->is_session_expired(buf)) {
+		cifs_reconnect(server, true);
+		return -1;
+	}
+
+	if (server->ops->is_status_pending &&
+	    server->ops->is_status_pending(buf, server)) {
+		cifs_discard_remaining_data(server);
+		return -1;
+	}
+
+	/* set up first two iov for signature check and to get credits */
+	rdata->iov[0].iov_base = buf;
+	rdata->iov[0].iov_len = server->total_read;
+	cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+		 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
+
+	/* Was the SMB read successful? */
+	rdata->result = server->ops->map_error(buf, false);
+	if (rdata->result != 0) {
+		cifs_dbg(FYI, "%s: server returned error %d\n",
+			 __func__, rdata->result);
+		/* normal error on read response */
+		return __cifs_readv_discard(server, mid, false);
+	}
+
+	/* Is there enough to get to the rest of the READ_RSP header? */
+	if (server->total_read < server->vals->read_rsp_size) {
+		cifs_dbg(FYI, "%s: server returned short header. got=%u expected=%zu\n",
+			 __func__, server->total_read,
+			 server->vals->read_rsp_size);
+		rdata->result = smb_EIO2(smb_eio_trace_read_rsp_short,
+					 server->total_read, server->vals->read_rsp_size);
+		return cifs_readv_discard(server, mid);
+	}
+
+	data_offset = server->ops->read_data_offset(buf);
+	if (data_offset < server->total_read) {
+		/*
+		 * win2k8 sometimes sends an offset of 0 when the read
+		 * is beyond the EOF. Treat it as if the data starts just after
+		 * the header.
+		 */
+		cifs_dbg(FYI, "%s: data offset (%u) inside read response header\n",
+			 __func__, data_offset);
+		data_offset = server->total_read;
+	} else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
+		/* data_offset is beyond the end of smallbuf */
+		cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
+			 __func__, data_offset);
+		rdata->result = smb_EIO1(smb_eio_trace_read_overlarge,
+					 data_offset);
+		return cifs_readv_discard(server, mid);
+	}
+
+	cifs_dbg(FYI, "%s: total_read=%u data_offset=%u\n",
+		 __func__, server->total_read, data_offset);
+
+	len = data_offset - server->total_read;
+	if (len > 0) {
+		/* read any junk before data into the rest of smallbuf */
+		length = cifs_read_from_socket(server,
+					       buf + server->total_read, len);
+		if (length < 0)
+			return length;
+		server->total_read += length;
+		rdata->iov[0].iov_len = server->total_read;
+	}
+
+	/* how much data is in the response? */
+#ifdef CONFIG_CIFS_SMB_DIRECT
+	use_rdma_mr = rdata->mr;
+#endif
+	data_len = server->ops->read_data_length(buf, use_rdma_mr);
+	if (!use_rdma_mr && (data_offset + data_len > buflen)) {
+		/* data_len is corrupt -- discard frame */
+		rdata->result = smb_EIO2(smb_eio_trace_read_rsp_malformed,
+					 data_offset + data_len, buflen);
+		return cifs_readv_discard(server, mid);
+	}
+
+#ifdef CONFIG_CIFS_SMB_DIRECT
+	if (rdata->mr)
+		length = data_len; /* An RDMA read is already done. */
+	else
+#endif
+		length = cifs_read_iter_from_socket(server, &rdata->subreq.io_iter,
+						    data_len);
+	if (length > 0)
+		rdata->got_bytes += length;
+	server->total_read += length;
+
+	cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
+		 server->total_read, buflen, data_len);
+
+	/* discard anything left over */
+	if (server->total_read < buflen)
+		return cifs_readv_discard(server, mid);
+
+	dequeue_mid(server, mid, false);
+	mid->resp_buf = server->smallbuf;
+	server->smallbuf = NULL;
+	return length;
+}