summaryrefslogtreecommitdiff
path: root/net/smc/af_smc.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc/af_smc.c')
-rw-r--r--net/smc/af_smc.c131
1 files changed, 80 insertions, 51 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9d76e902fd77..f97f77b041d9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -16,8 +16,7 @@
* based on prototype from Frank Blaschka
*/
-#define KMSG_COMPONENT "smc"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#define pr_fmt(fmt) "smc: " fmt
#include <linux/module.h>
#include <linux/socket.h>
@@ -30,6 +29,10 @@
#include <linux/splice.h>
#include <net/sock.h>
+#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#endif
#include <net/tcp.h>
#include <net/smc.h>
#include <asm/ioctls.h>
@@ -53,8 +56,8 @@
#include "smc_stats.h"
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
-#include "smc_loopback.h"
#include "smc_inet.h"
+#include "smc_hs_bpf.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -360,8 +363,21 @@ static void smc_destruct(struct sock *sk)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet_sock_destruct(sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ inet6_sock_destruct(sk);
+ break;
+#endif
+ }
}
+static struct lock_class_key smc_key;
+static struct lock_class_key smc_slock_key;
+
void smc_sk_init(struct net *net, struct sock *sk, int protocol)
{
struct smc_sock *smc = smc_sk(sk);
@@ -375,6 +391,8 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol)
INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
INIT_LIST_HEAD(&smc->accept_q);
+ sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key,
+ "sk_lock-AF_SMC", &smc_key);
spin_lock_init(&smc->accept_q_lock);
spin_lock_init(&smc->conn.send_lock);
sk->sk_prot->hash(sk);
@@ -383,6 +401,7 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol)
smc->limit_smc_hs = net->smc.limit_smc_hs;
smc->use_fallback = false; /* assume rdma capability first */
smc->fallback_rsn = 0;
+ smc_close_init(smc);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
@@ -402,7 +421,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
return sk;
}
-int smc_bind(struct socket *sock, struct sockaddr *uaddr,
+int smc_bind(struct socket *sock, struct sockaddr_unsized *uaddr,
int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
@@ -480,8 +499,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
{
/* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
- nsk->sk_sndtimeo = osk->sk_sndtimeo;
- nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+ nsk->sk_sndtimeo = READ_ONCE(osk->sk_sndtimeo);
+ nsk->sk_rcvtimeo = READ_ONCE(osk->sk_rcvtimeo);
nsk->sk_mark = READ_ONCE(osk->sk_mark);
nsk->sk_priority = READ_ONCE(osk->sk_priority);
nsk->sk_rcvlowat = osk->sk_rcvlowat;
@@ -1077,8 +1096,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */
-static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
- struct smc_init_info *ini)
+static int smc_connect_ism_vlan_setup(struct smc_init_info *ini)
{
if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
return SMC_CLC_DECL_ISMVLANERR;
@@ -1093,7 +1111,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
/* check if there is an ism device available */
if (!(ini->smcd_version & SMC_V1) ||
smc_find_ism_device(smc, ini) ||
- smc_connect_ism_vlan_setup(smc, ini))
+ smc_connect_ism_vlan_setup(ini))
ini->smcd_version &= ~SMC_V1;
/* else ISM V1 is supported for this connection */
@@ -1116,7 +1134,10 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
ini->check_smcrv2 = true;
ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
if (!(ini->smcr_version & SMC_V2) ||
- smc->clcsock->sk->sk_family != AF_INET ||
+#if IS_ENABLED(CONFIG_IPV6)
+ (smc->clcsock->sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) ||
+#endif
!smc_clc_ueid_count() ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V2;
@@ -1135,8 +1156,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc,
/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
* used, the VLAN ID will be registered again during the connection setup.
*/
-static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
- struct smc_init_info *ini)
+static int smc_connect_ism_vlan_cleanup(struct smc_init_info *ini)
{
if (!smcd_indicated(ini->smc_type_v1))
return 0;
@@ -1299,7 +1319,6 @@ static int smc_connect_rdma(struct smc_sock *smc,
goto connect_abort;
}
- smc_close_init(smc);
smc_rx_init(smc);
if (ini->first_contact_local) {
@@ -1435,7 +1454,6 @@ static int smc_connect_ism(struct smc_sock *smc,
goto connect_abort;
}
}
- smc_close_init(smc);
smc_rx_init(smc);
smc_tx_init(smc);
@@ -1561,13 +1579,13 @@ static int __smc_connect(struct smc_sock *smc)
goto vlan_cleanup;
SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
- smc_connect_ism_vlan_cleanup(smc, ini);
+ smc_connect_ism_vlan_cleanup(ini);
kfree(buf);
kfree(ini);
return 0;
vlan_cleanup:
- smc_connect_ism_vlan_cleanup(smc, ini);
+ smc_connect_ism_vlan_cleanup(ini);
kfree(buf);
fallback:
kfree(ini);
@@ -1578,7 +1596,7 @@ static void smc_connect_work(struct work_struct *work)
{
struct smc_sock *smc = container_of(work, struct smc_sock,
connect_work);
- long timeo = smc->sk.sk_sndtimeo;
+ long timeo = READ_ONCE(smc->sk.sk_sndtimeo);
int rc = 0;
if (!timeo)
@@ -1624,7 +1642,7 @@ out:
release_sock(&smc->sk);
}
-int smc_connect(struct socket *sock, struct sockaddr *addr,
+int smc_connect(struct socket *sock, struct sockaddr_unsized *addr,
int alen, int flags)
{
struct sock *sk = sock->sk;
@@ -1676,7 +1694,7 @@ int smc_connect(struct socket *sock, struct sockaddr *addr,
rc = -EALREADY;
goto out;
}
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ rc = kernel_connect(smc->clcsock, (struct sockaddr_unsized *)addr, alen, flags);
if (rc && rc != -EINPROGRESS)
goto out;
@@ -1901,6 +1919,7 @@ static void smc_listen_out(struct smc_sock *new_smc)
if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
atomic_dec(&lsmc->queued_smc_hs);
+ release_sock(newsmcsk); /* lock in smc_listen_work() */
if (lsmc->sk.sk_state == SMC_LISTEN) {
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -2032,6 +2051,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc,
if (pclc->hdr.typev1 == SMC_TYPE_N)
return 0;
pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ if (!pclc_prfx)
+ return -EPROTO;
if (smc_clc_prfx_match(newclcsock, pclc_prfx))
return SMC_CLC_DECL_DIFFPREFIX;
@@ -2119,7 +2140,7 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini,
}
}
-static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini)
+static void smc_init_info_store_rc(u32 rc, struct smc_init_info *ini)
{
if (!ini->rc)
ini->rc = rc;
@@ -2145,6 +2166,8 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
pclc_smcd = smc_get_clc_msg_smcd(pclc);
smc_v2_ext = smc_get_clc_v2_ext(pclc);
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
+ if (!pclc_smcd || !smc_v2_ext || !smcd_v2_ext)
+ goto not_found;
mutex_lock(&smcd_dev_list.mutex);
if (pclc_smcd->ism.chid) {
@@ -2180,7 +2203,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
mutex_unlock(&smcd_dev_list.mutex);
if (!ini->ism_dev[0]) {
- smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
+ smc_init_info_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
goto not_found;
}
@@ -2197,7 +2220,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
ini->ism_selected = i;
rc = smc_listen_ism_init(new_smc, ini);
if (rc) {
- smc_find_ism_store_rc(rc, ini);
+ smc_init_info_store_rc(rc, ini);
/* try next active ISM device */
continue;
}
@@ -2221,7 +2244,9 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
int rc = 0;
/* check if ISM V1 is available */
- if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
+ if (!(ini->smcd_version & SMC_V1) ||
+ !smcd_indicated(ini->smc_type_v1) ||
+ !pclc_smcd)
goto not_found;
ini->is_smcd = true; /* prepare ISM check */
ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid);
@@ -2235,7 +2260,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
return; /* V1 ISM device found */
not_found:
- smc_find_ism_store_rc(rc, ini);
+ smc_init_info_store_rc(rc, ini);
ini->smcd_version &= ~SMC_V1;
ini->ism_dev[0] = NULL;
ini->is_smcd = false;
@@ -2272,7 +2297,8 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
goto not_found;
smc_v2_ext = smc_get_clc_v2_ext(pclc);
- if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
+ if (!smc_v2_ext ||
+ !smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
goto not_found;
/* prepare RDMA check */
@@ -2285,7 +2311,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
rc = smc_find_rdma_device(new_smc, ini);
if (rc) {
- smc_find_ism_store_rc(rc, ini);
+ smc_init_info_store_rc(rc, ini);
goto not_found;
}
if (!ini->smcrv2.uses_gateway)
@@ -2302,7 +2328,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
if (!rc)
return;
ini->smcr_version = smcr_version;
- smc_find_ism_store_rc(rc, ini);
+ smc_init_info_store_rc(rc, ini);
not_found:
ini->smcr_version &= ~SMC_V2;
@@ -2349,7 +2375,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
/* check for matching IP prefix and subnet length (V1) */
prfx_rc = smc_listen_prfx_check(new_smc, pclc);
if (prfx_rc)
- smc_find_ism_store_rc(prfx_rc, ini);
+ smc_init_info_store_rc(prfx_rc, ini);
/* get vlan id from IP device */
if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
@@ -2376,7 +2402,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc,
int rc;
rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
- smc_find_ism_store_rc(rc, ini);
+ smc_init_info_store_rc(rc, ini);
return (!rc) ? 0 : ini->rc;
}
return prfx_rc;
@@ -2422,6 +2448,7 @@ static void smc_listen_work(struct work_struct *work)
u8 accept_version;
int rc = 0;
+ lock_sock(&new_smc->sk); /* release in smc_listen_out() */
if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
return smc_listen_out_err(new_smc);
@@ -2479,7 +2506,6 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
mutex_lock(&smc_server_lgr_pending);
- smc_close_init(new_smc);
smc_rx_init(new_smc);
smc_tx_init(new_smc);
@@ -2539,8 +2565,9 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
}
- smc_listen_out_connected(new_smc);
SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
+ /* smc_listen_out() will release smcsk */
+ smc_listen_out_connected(new_smc);
goto out_free;
out_unlock:
@@ -2720,8 +2747,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock,
if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
/* wait till data arrives on the socket */
- timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
- MSEC_PER_SEC);
+ timeo = secs_to_jiffies(lsmc->sockopt_defer_accept);
if (smc_sk(nsk)->use_fallback) {
struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
@@ -2731,7 +2757,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock,
release_sock(clcsk);
} else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
lock_sock(nsk);
- smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
+ smc_rx_wait(smc_sk(nsk), &timeo, 0, smc_rx_data_available);
release_sock(nsk);
}
}
@@ -2881,6 +2907,13 @@ __poll_t smc_poll(struct file *file, struct socket *sock,
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+ if (sk->sk_state != SMC_INIT) {
+ /* Race breaker the same way as tcp_poll(). */
+ smp_mb__after_atomic();
+ if (atomic_read(&smc->conn.sndbuf_space))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ }
}
if (atomic_read(&smc->conn.bytes_to_rcv))
mask |= EPOLLIN | EPOLLRDNORM;
@@ -3320,10 +3353,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family)
* which need net ref.
*/
sk = smc->clcsock->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
+ sk_net_refcnt_upgrade(sk);
return 0;
}
@@ -3504,15 +3534,15 @@ static int __init smc_init(void)
rc = -ENOMEM;
- smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
+ smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", WQ_PERCPU, 0);
if (!smc_tcp_ls_wq)
goto out_pnet;
- smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
+ smc_hs_wq = alloc_workqueue("smc_hs_wq", WQ_PERCPU, 0);
if (!smc_hs_wq)
goto out_alloc_tcp_ls_wq;
- smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
+ smc_close_wq = alloc_workqueue("smc_close_wq", WQ_PERCPU, 0);
if (!smc_close_wq)
goto out_alloc_hs_wq;
@@ -3560,28 +3590,28 @@ static int __init smc_init(void)
goto out_sock;
}
- rc = smc_loopback_init();
- if (rc) {
- pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc);
- goto out_ib;
- }
-
rc = tcp_register_ulp(&smc_ulp_ops);
if (rc) {
pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
- goto out_lo;
+ goto out_ib;
}
rc = smc_inet_init();
if (rc) {
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
goto out_ulp;
}
+ rc = bpf_smc_hs_ctrl_init();
+ if (rc) {
+ pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
+ rc);
+ goto out_inet;
+ }
static_branch_enable(&tcp_have_smc);
return 0;
+out_inet:
+ smc_inet_exit();
out_ulp:
tcp_unregister_ulp(&smc_ulp_ops);
-out_lo:
- smc_loopback_exit();
out_ib:
smc_ib_unregister_client();
out_sock:
@@ -3620,7 +3650,6 @@ static void __exit smc_exit(void)
tcp_unregister_ulp(&smc_ulp_ops);
sock_unregister(PF_SMC);
smc_core_exit();
- smc_loopback_exit();
smc_ib_unregister_client();
smc_ism_exit();
destroy_workqueue(smc_close_wq);