summaryrefslogtreecommitdiff
path: root/net/smc/af_smc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 14:31:10 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 14:31:10 -0800
commitb2fe5fa68642860e7de76167c3111623aa0d5de1 (patch)
treeb7f9b89b7039ecefbc35fe3c8e73a6ff972641dd /net/smc/af_smc.c
parenta103950e0dd2058df5e8a8d4a915707bdcf205f0 (diff)
parenta54667f6728c2714a400f3c884727da74b6d1717 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Significantly shrink the core networking routing structures. Result of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf 2) Add netdevsim driver for testing various offloads, from Jakub Kicinski. 3) Support cross-chip FDB operations in DSA, from Vivien Didelot. 4) Add a 2nd listener hash table for TCP, similar to what was done for UDP. From Martin KaFai Lau. 5) Add eBPF based queue selection to tun, from Jason Wang. 6) Lockless qdisc support, from John Fastabend. 7) SCTP stream interleave support, from Xin Long. 8) Smoother TCP receive autotuning, from Eric Dumazet. 9) Lots of erspan tunneling enhancements, from William Tu. 10) Add true function call support to BPF, from Alexei Starovoitov. 11) Add explicit support for GRO HW offloading, from Michael Chan. 12) Support extack generation in more netlink subsystems. From Alexander Aring, Quentin Monnet, and Jakub Kicinski. 13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From Russell King. 14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso. 15) Many improvements and simplifications to the NFP driver bpf JIT, from Jakub Kicinski. 16) Support for ipv6 non-equal cost multipath routing, from Ido Schimmel. 17) Add resource abstration to devlink, from Arkadi Sharshevsky. 18) Packet scheduler classifier shared filter block support, from Jiri Pirko. 19) Avoid locking in act_csum, from Davide Caratti. 20) devinet_ioctl() simplifications from Al viro. 21) More TCP bpf improvements from Lawrence Brakmo. 22) Add support for onlink ipv6 route flag, similar to ipv4, from David Ahern. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits) tls: Add support for encryption using async offload accelerator ip6mr: fix stale iterator net/sched: kconfig: Remove blank help texts openvswitch: meter: Use 64-bit arithmetic instead of 32-bit tcp_nv: fix potential integer overflow in tcpnv_acked r8169: fix RTL8168EP take too long to complete driver initialization. qmi_wwan: Add support for Quectel EP06 rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK ipmr: Fix ptrdiff_t print formatting ibmvnic: Wait for device response when changing MAC qlcnic: fix deadlock bug tcp: release sk_frag.page in tcp_disconnect ipv4: Get the address of interface correctly. net_sched: gen_estimator: fix lockdep splat net: macb: Handle HRESP error net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring ipv6: addrconf: break critical section in addrconf_verify_rtnl() ipv6: change route cache aging logic i40e/i40evf: Update DESC_NEEDED value to reflect larger value bnxt_en: cleanup DIM work on device shutdown ...
Diffstat (limited to 'net/smc/af_smc.c')
-rw-r--r--net/smc/af_smc.c231
1 files changed, 134 insertions, 97 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 449f62e1e270..3583c8ab1bae 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
return rc ? rc : local_contact;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
@@ -520,15 +529,19 @@ decline_rdma:
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (rc < 0)
goto out_err;
}
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -581,40 +594,33 @@ out_err:
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
{
- struct sock *sk = &lsmc->sk;
- struct socket *new_clcsock;
+ struct socket *new_clcsock = NULL;
+ struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
int rc;
- release_sock(&lsmc->sk);
- new_sk = smc_sock_alloc(sock_net(sk), NULL);
+ release_sock(lsk);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL);
if (!new_sk) {
rc = -ENOMEM;
- lsmc->sk.sk_err = ENOMEM;
+ lsk->sk_err = ENOMEM;
*new_smc = NULL;
- lock_sock(&lsmc->sk);
+ lock_sock(lsk);
goto out;
}
*new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
- lock_sock(&lsmc->sk);
- if (rc < 0) {
- lsmc->sk.sk_err = -rc;
- new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
- *new_smc = NULL;
- goto out;
- }
- if (lsmc->sk.sk_state == SMC_CLOSED) {
+ lock_sock(lsk);
+ if (rc < 0)
+ lsk->sk_err = -rc;
+ if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ if (isk->clcsock) {
+ sock_release(isk->clcsock);
+ isk->clcsock = NULL;
+ }
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -751,14 +759,16 @@ static void smc_listen_work(struct work_struct *work)
{
struct smc_sock *new_smc = container_of(work, struct smc_sock,
smc_listen_work);
+ struct smc_clc_msg_proposal_prefix *pclc_prfx;
struct socket *newclcsock = new_smc->clcsock;
struct smc_sock *lsmc = new_smc->listen_smc;
struct smc_clc_msg_accept_confirm cclc;
int local_contact = SMC_REUSE_CONTACT;
struct sock *newsmcsk = &new_smc->sk;
- struct smc_clc_msg_proposal pclc;
+ struct smc_clc_msg_proposal *pclc;
struct smc_ib_device *smcibdev;
struct sockaddr_in peeraddr;
+ u8 buf[SMC_CLC_MAX_LEN];
struct smc_link *link;
int reason_code = 0;
int rc = 0, len;
@@ -775,7 +785,7 @@ static void smc_listen_work(struct work_struct *work)
/* do inband token exchange -
*wait for and receive SMC Proposal CLC message
*/
- reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
+ reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
SMC_CLC_PROPOSAL);
if (reason_code < 0)
goto out_err;
@@ -804,8 +814,11 @@ static void smc_listen_work(struct work_struct *work)
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
- if ((pclc.outgoing_subnet != subnet) ||
- (pclc.prefix_len != prefix_len)) {
+
+ pclc = (struct smc_clc_msg_proposal *)&buf;
+ pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (pclc_prfx->outgoing_subnet != subnet ||
+ pclc_prfx->prefix_len != prefix_len) {
reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
goto decline_rdma;
}
@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
/* allocate connection / link group */
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- smcibdev, ibport, &pclc.lcl, 0);
+ smcibdev, ibport, &pclc->lcl, 0);
if (local_contact < 0) {
rc = local_contact;
if (rc == -ENOMEM)
@@ -879,11 +892,9 @@ static void smc_listen_work(struct work_struct *work)
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
- if (reason_code < 0) {
+ if (reason_code < 0)
/* peer is not aware of a problem */
- rc = reason_code;
goto out_err_unlock;
- }
if (reason_code > 0)
goto decline_rdma_unlock;
}
@@ -910,21 +921,26 @@ enqueue:
return;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(new_smc, reason_code);
- if (rc < sizeof(struct smc_clc_msg_decline))
+ if (smc_clc_send_decline(new_smc, reason_code) < 0)
goto out_err;
}
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -934,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
{
struct smc_sock *lsmc = container_of(work, struct smc_sock,
tcp_listen_work);
+ struct sock *lsk = &lsmc->sk;
struct smc_sock *new_smc;
int rc = 0;
- lock_sock(&lsmc->sk);
- while (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock(lsk);
+ while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
goto out;
@@ -947,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->listen_smc = lsmc;
new_smc->use_fallback = false; /* assume rdma capability first*/
- sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
+ sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
- release_sock(&lsmc->sk);
- lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
+ if (lsmc->clcsock) {
+ sock_release(lsmc->clcsock);
+ lsmc->clcsock = NULL;
+ }
+ release_sock(lsk);
+ /* no more listening, wake up smc_close_wait_listen_clcsock and
+ * accept
+ */
+ lsk->sk_state_change(lsk);
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -989,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1006,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1040,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1109,21 +1140,15 @@ out:
static __poll_t smc_accept_poll(struct sock *parent)
{
- struct smc_sock *isk;
- struct sock *sk;
+ struct smc_sock *isk = smc_sk(parent);
+ int mask = 0;
- lock_sock(parent);
- list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)isk;
+ spin_lock(&isk->accept_q_lock);
+ if (!list_empty(&isk->accept_q))
+ mask = POLLIN | POLLRDNORM;
+ spin_unlock(&isk->accept_q_lock);
- if (sk->sk_state == SMC_ACTIVE) {
- release_sock(parent);
- return POLLIN | POLLRDNORM;
- }
- }
- release_sock(parent);
-
- return 0;
+ return mask;
}
static __poll_t smc_poll(struct file *file, struct socket *sock,
@@ -1134,9 +1159,15 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
struct smc_sock *smc;
int rc;
+ if (!sk)
+ return POLLNVAL;
+
smc = smc_sk(sock->sk);
+ sock_hold(sk);
+ lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
+ release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
@@ -1148,37 +1179,43 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= POLLERR;
- else
- /* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ /* success cases including fallback */
+ mask |= POLLOUT | POLLWRNORM;
}
}
- release_sock(sk);
} else {
- sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == SMC_LISTEN)
- /* woken up by sk_data_ready in smc_listen_work() */
- mask |= smc_accept_poll(sk);
+ if (sk->sk_state != SMC_CLOSED) {
+ release_sock(sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ lock_sock(sk);
+ }
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space) ||
- (sk->sk_shutdown & SEND_SHUTDOWN)) {
- mask |= POLLOUT | POLLWRNORM;
- } else {
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
mask |= POLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ if (sk->sk_state == SMC_LISTEN) {
+ /* woken up by sk_data_ready in smc_listen_work() */
+ mask = smc_accept_poll(sk);
+ } else {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ sk->sk_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+ }
}
+ release_sock(sk);
+ sock_put(sk);
return mask;
}