summaryrefslogtreecommitdiff
path: root/net/smc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-23 05:41:27 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-23 05:41:27 -1000
commit71aa60f67f032dffe58999bd8ae4b2f38a9ea05e (patch)
tree4d755f640b3135b14bf482215609b22379b62554 /net/smc
parent79444df4e7f03843be78e4b9188d095931648842 (diff)
parent4e683f499a15cd777d3cb51aaebe48d72334c852 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Fix NAPI poll list corruption in enic driver, from Christian Lamparter. 2) Fix route use after free, from Eric Dumazet. 3) Fix regression in reuseaddr handling, from Josef Bacik. 4) Assert the size of control messages in compat handling since we copy it in from userspace twice. From Meng Xu. 5) SMC layer bug fixes (missing RCU locking, bad refcounting, etc.) from Ursula Braun. 6) Fix races in AF_PACKET fanout handling, from Willem de Bruijn. 7) Don't use ARRAY_SIZE on spinlock array which might have zero entries, from Geert Uytterhoeven. 8) Fix miscomputation of checksum in ipv6 udp code, from Subash Abhinov Kasiviswanathan. 9) Push the ipv6 header properly in ipv6 GRE tunnel driver, from Xin Long. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (75 commits) inet: fix improper empty comparison net: use inet6_rcv_saddr to compare sockets net: set tb->fast_sk_family net: orphan frags on stand-alone ptype in dev_queue_xmit_nit MAINTAINERS: update git tree locations for ieee802154 subsystem net: prevent dst uses after free net: phy: Fix truncation of large IRQ numbers in phy_attached_print() net/smc: no close wait in case of process shut down net/smc: introduce a delay net/smc: terminate link group if out-of-sync is received net/smc: longer delay for client link group removal net/smc: adapt send request completion notification net/smc: adjust net_device refcount net/smc: take RCU read lock for routing cache lookup net/smc: add receive timeout check net/smc: add missing dev_put net: stmmac: Cocci spatch "of_table" lan78xx: Use default values loaded from EEPROM/OTP after reset lan78xx: Allow EEPROM write for less than MAX_EEPROM_SIZE lan78xx: Fix for eeprom read/write when device auto suspend ...
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/af_smc.c16
-rw-r--r--net/smc/smc.h2
-rw-r--r--net/smc/smc_clc.c10
-rw-r--r--net/smc/smc_clc.h3
-rw-r--r--net/smc/smc_close.c27
-rw-r--r--net/smc/smc_core.c16
-rw-r--r--net/smc/smc_ib.c1
-rw-r--r--net/smc/smc_pnet.c4
-rw-r--r--net/smc/smc_rx.c2
-rw-r--r--net/smc/smc_tx.c12
-rw-r--r--net/smc/smc_wr.c2
11 files changed, 58 insertions, 37 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8c6d24b2995d..745f145d4c4d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
__be32 *subnet, u8 *prefix_len)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct in_device *in_dev;
struct sockaddr_in addr;
int rc = -ENOENT;
int len;
@@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
/* get address to which the internal TCP socket is bound */
kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
/* analyze IPv4 specific data of net_device belonging to TCP socket */
- for_ifa(dst->dev->ip_ptr) {
- if (ifa->ifa_address != addr.sin_addr.s_addr)
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(dst->dev);
+ for_ifa(in_dev) {
+ if (!inet_ifa_match(addr.sin_addr.s_addr, ifa))
continue;
*prefix_len = inet_mask_len(ifa->ifa_mask);
*subnet = ifa->ifa_address & ifa->ifa_mask;
rc = 0;
break;
- } endfor_ifa(dst->dev->ip_ptr);
+ } endfor_ifa(in_dev);
+ rcu_read_unlock();
out_rel:
dst_release(dst);
@@ -509,7 +513,7 @@ decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(smc, reason_code, 0);
+ rc = smc_clc_send_decline(smc, reason_code);
if (rc < sizeof(struct smc_clc_msg_decline))
goto out_err;
}
@@ -804,8 +808,6 @@ static void smc_listen_work(struct work_struct *work)
rc = local_contact;
if (rc == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
- else if (rc == -ENOLINK)
- reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
goto decline_rdma;
}
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@ -899,7 +901,7 @@ decline_rdma:
smc_conn_free(&new_smc->conn);
new_smc->use_fallback = true;
if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- rc = smc_clc_send_decline(new_smc, reason_code, 0);
+ rc = smc_clc_send_decline(new_smc, reason_code);
if (rc < sizeof(struct smc_clc_msg_decline))
goto out_err;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6e44313e4467..0ccd6fa387ad 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -149,7 +149,7 @@ struct smc_connection {
atomic_t sndbuf_space; /* remaining space in sndbuf */
u16 tx_cdc_seq; /* sequence # for CDC send */
spinlock_t send_lock; /* protect wr_sends */
- struct work_struct tx_work; /* retry of smc_cdc_msg_send */
+ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl.
* .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 3934913ab835..b7dd2743fb5c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -95,9 +95,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
if (clcm->type == SMC_CLC_DECLINE) {
reason_code = SMC_CLC_DECL_REPLY;
- if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
- == SMC_CLC_DECL_SYNCERR)
+ if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
smc->conn.lgr->sync_err = true;
+ smc_lgr_terminate(smc->conn.lgr);
+ }
}
out:
@@ -105,8 +106,7 @@ out:
}
/* send CLC DECLINE message across internal TCP socket */
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
- u8 out_of_sync)
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
{
struct smc_clc_msg_decline dclc;
struct msghdr msg;
@@ -118,7 +118,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
- dclc.hdr.flag = out_of_sync ? 1 : 0;
+ dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 13db8ce177c9..1c55414041d4 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -106,8 +106,7 @@ struct smc_ib_device;
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
-int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
- u8 out_of_sync);
+int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
u8 ibport);
int smc_clc_send_confirm(struct smc_sock *smc);
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 3c2e166b5d22..f0d16fb825f7 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -174,15 +174,15 @@ int smc_close_active(struct smc_sock *smc)
{
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
struct smc_connection *conn = &smc->conn;
struct sock *sk = &smc->sk;
int old_state;
+ long timeout;
int rc = 0;
- if (sock_flag(sk, SOCK_LINGER) &&
- !(current->flags & PF_EXITING))
- timeout = sk->sk_lingertime;
+ timeout = current->flags & PF_EXITING ?
+ 0 : sock_flag(sk, SOCK_LINGER) ?
+ sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
again:
old_state = sk->sk_state;
@@ -208,7 +208,7 @@ again:
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
@@ -234,7 +234,7 @@ again:
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
if (sk->sk_err != ECONNABORTED) {
/* confirm close from peer */
@@ -263,7 +263,9 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- cancel_work_sync(&conn->tx_work);
+ release_sock(sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
smc_close_wait_tx_pends(smc);
@@ -411,13 +413,14 @@ void smc_close_sock_put_work(struct work_struct *work)
int smc_close_shutdown_write(struct smc_sock *smc)
{
struct smc_connection *conn = &smc->conn;
- long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
struct sock *sk = &smc->sk;
int old_state;
+ long timeout;
int rc = 0;
- if (sock_flag(sk, SOCK_LINGER))
- timeout = sk->sk_lingertime;
+ timeout = current->flags & PF_EXITING ?
+ 0 : sock_flag(sk, SOCK_LINGER) ?
+ sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
again:
old_state = sk->sk_state;
@@ -425,7 +428,7 @@ again:
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
/* send close wr request */
rc = smc_close_wr(conn);
@@ -439,7 +442,7 @@ again:
if (!smc_cdc_rxed_any_close(conn))
smc_close_stream_wait(smc, timeout);
release_sock(sk);
- cancel_work_sync(&conn->tx_work);
+ cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
/* confirm close from peer */
rc = smc_close_wr(conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1a16d51e2330..20b66e79c5d6 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -25,8 +25,9 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_LGR_NUM_INCR 256
-#define SMC_LGR_FREE_DELAY (600 * HZ)
+#define SMC_LGR_NUM_INCR 256
+#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
+#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10)
static u32 smc_lgr_num; /* unique link group number */
@@ -107,8 +108,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- if (reduced && !lgr->conns_num)
- schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
+ if (!reduced || lgr->conns_num)
+ return;
+ /* client link group creation always follows the server link group
+ * creation. For client use a somewhat higher removal delay time,
+ * otherwise there is a risk of out-of-sync link groups.
+ */
+ mod_delayed_work(system_wq, &lgr->free_work,
+ lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
+ SMC_LGR_FREE_DELAY_SERV);
}
static void smc_lgr_free_work(struct work_struct *work)
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 547e0e113b17..0b5852299158 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -380,6 +380,7 @@ static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
if (ndev) {
memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
+ dev_put(ndev);
} else if (!rc) {
memcpy(&smcibdev->mac[ibport - 1][0],
&smcibdev->gid[ibport - 1].raw[8], 3);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 78f7af28ae4f..31f8453c25c5 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -181,8 +181,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem)
sizeof(new_pnetelem->ndev->name)) ||
smc_pnet_same_ibname(pnetelem,
new_pnetelem->smcibdev->ibdev->name,
- new_pnetelem->ib_port))
+ new_pnetelem->ib_port)) {
+ dev_put(pnetelem->ndev);
goto found;
+ }
}
list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist);
rc = 0;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index b17a333e9bb0..3e631ae4b6b6 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -148,6 +148,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
read_done = sock_intr_errno(timeo);
break;
}
+ if (!timeo)
+ return -EAGAIN;
}
if (!atomic_read(&conn->bytes_to_rcv)) {
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3c656beb8820..3866573288dd 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,8 @@
#include "smc_cdc.h"
#include "smc_tx.h"
+#define SMC_TX_WORK_DELAY HZ
+
/***************************** sndbuf producer *******************************/
/* callback implementation for sk.sk_write_space()
@@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_work(&conn->tx_work);
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -430,7 +433,7 @@ out_unlock:
*/
static void smc_tx_work(struct work_struct *work)
{
- struct smc_connection *conn = container_of(work,
+ struct smc_connection *conn = container_of(to_delayed_work(work),
struct smc_connection,
tx_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
@@ -468,7 +471,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
if (!rc)
rc = smc_cdc_msg_send(conn, wr_buf, pend);
if (rc < 0) {
- schedule_work(&conn->tx_work);
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
return;
}
smc_curs_write(&conn->rx_curs_confirmed,
@@ -487,6 +491,6 @@ void smc_tx_consumer_update(struct smc_connection *conn)
void smc_tx_init(struct smc_sock *smc)
{
smc->sk.sk_write_space = smc_tx_write_space;
- INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+ INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
spin_lock_init(&smc->conn.send_lock);
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index ab56bda66783..525d91e0d57e 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -244,7 +244,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
int rc;
ib_req_notify_cq(link->smcibdev->roce_cq_send,
- IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS);
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);