diff options
Diffstat (limited to 'net/smc/af_smc.c')
| -rw-r--r-- | net/smc/af_smc.c | 131 |
1 files changed, 80 insertions, 51 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9d76e902fd77..f97f77b041d9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -16,8 +16,7 @@ * based on prototype from Frank Blaschka */ -#define KMSG_COMPONENT "smc" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#define pr_fmt(fmt) "smc: " fmt #include <linux/module.h> #include <linux/socket.h> @@ -30,6 +29,10 @@ #include <linux/splice.h> #include <net/sock.h> +#include <net/inet_common.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#endif #include <net/tcp.h> #include <net/smc.h> #include <asm/ioctls.h> @@ -53,8 +56,8 @@ #include "smc_stats.h" #include "smc_tracepoint.h" #include "smc_sysctl.h" -#include "smc_loopback.h" #include "smc_inet.h" +#include "smc_hs_bpf.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -360,8 +363,21 @@ static void smc_destruct(struct sock *sk) return; if (!sock_flag(sk, SOCK_DEAD)) return; + switch (sk->sk_family) { + case AF_INET: + inet_sock_destruct(sk); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + inet6_sock_destruct(sk); + break; +#endif + } } +static struct lock_class_key smc_key; +static struct lock_class_key smc_slock_key; + void smc_sk_init(struct net *net, struct sock *sk, int protocol) { struct smc_sock *smc = smc_sk(sk); @@ -375,6 +391,8 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol) INIT_WORK(&smc->connect_work, smc_connect_work); INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); INIT_LIST_HEAD(&smc->accept_q); + sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key, + "sk_lock-AF_SMC", &smc_key); spin_lock_init(&smc->accept_q_lock); spin_lock_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); @@ -383,6 +401,7 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol) smc->limit_smc_hs = net->smc.limit_smc_hs; smc->use_fallback = false; /* assume rdma capability first */ smc->fallback_rsn = 0; + smc_close_init(smc); } static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, @@ -402,7 +421,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, return sk; } -int smc_bind(struct socket *sock, struct sockaddr *uaddr, +int smc_bind(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; @@ -480,8 +499,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, { /* options we don't get control via setsockopt for */ nsk->sk_type = osk->sk_type; - nsk->sk_sndtimeo = osk->sk_sndtimeo; - nsk->sk_rcvtimeo = osk->sk_rcvtimeo; + nsk->sk_sndtimeo = READ_ONCE(osk->sk_sndtimeo); + nsk->sk_rcvtimeo = READ_ONCE(osk->sk_rcvtimeo); nsk->sk_mark = READ_ONCE(osk->sk_mark); nsk->sk_priority = READ_ONCE(osk->sk_priority); nsk->sk_rcvlowat = osk->sk_rcvlowat; @@ -1077,8 +1096,7 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, } /* Check for VLAN ID and register it on ISM device just for CLC handshake */ -static int smc_connect_ism_vlan_setup(struct smc_sock *smc, - struct smc_init_info *ini) +static int smc_connect_ism_vlan_setup(struct smc_init_info *ini) { if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_ISMVLANERR; @@ -1093,7 +1111,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc, /* check if there is an ism device available */ if (!(ini->smcd_version & SMC_V1) || smc_find_ism_device(smc, ini) || - smc_connect_ism_vlan_setup(smc, ini)) + smc_connect_ism_vlan_setup(ini)) ini->smcd_version &= ~SMC_V1; /* else ISM V1 is supported for this connection */ @@ -1116,7 +1134,10 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ini->check_smcrv2 = true; ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; if (!(ini->smcr_version & SMC_V2) || - smc->clcsock->sk->sk_family != AF_INET || +#if IS_ENABLED(CONFIG_IPV6) + (smc->clcsock->sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) || +#endif !smc_clc_ueid_count() || smc_find_rdma_device(smc, ini)) ini->smcr_version &= ~SMC_V2; @@ -1135,8 +1156,7 @@ static int smc_find_proposal_devices(struct smc_sock *smc, /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is * used, the VLAN ID will be registered again during the connection setup. */ -static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, - struct smc_init_info *ini) +static int smc_connect_ism_vlan_cleanup(struct smc_init_info *ini) { if (!smcd_indicated(ini->smc_type_v1)) return 0; @@ -1299,7 +1319,6 @@ static int smc_connect_rdma(struct smc_sock *smc, goto connect_abort; } - smc_close_init(smc); smc_rx_init(smc); if (ini->first_contact_local) { @@ -1435,7 +1454,6 @@ static int smc_connect_ism(struct smc_sock *smc, goto connect_abort; } } - smc_close_init(smc); smc_rx_init(smc); smc_tx_init(smc); @@ -1561,13 +1579,13 @@ static int __smc_connect(struct smc_sock *smc) goto vlan_cleanup; SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc); - smc_connect_ism_vlan_cleanup(smc, ini); + smc_connect_ism_vlan_cleanup(ini); kfree(buf); kfree(ini); return 0; vlan_cleanup: - smc_connect_ism_vlan_cleanup(smc, ini); + smc_connect_ism_vlan_cleanup(ini); kfree(buf); fallback: kfree(ini); @@ -1578,7 +1596,7 @@ static void smc_connect_work(struct work_struct *work) { struct smc_sock *smc = container_of(work, struct smc_sock, connect_work); - long timeo = smc->sk.sk_sndtimeo; + long timeo = READ_ONCE(smc->sk.sk_sndtimeo); int rc = 0; if (!timeo) @@ -1624,7 +1642,7 @@ out: release_sock(&smc->sk); } -int smc_connect(struct socket *sock, struct sockaddr *addr, +int smc_connect(struct socket *sock, struct sockaddr_unsized *addr, int alen, int flags) { struct sock *sk = sock->sk; @@ -1676,7 +1694,7 @@ int smc_connect(struct socket *sock, struct sockaddr *addr, rc = -EALREADY; goto out; } - rc = kernel_connect(smc->clcsock, addr, alen, flags); + rc = kernel_connect(smc->clcsock, (struct sockaddr_unsized *)addr, alen, flags); if (rc && rc != -EINPROGRESS) goto out; @@ -1901,6 +1919,7 @@ static void smc_listen_out(struct smc_sock *new_smc) if (tcp_sk(new_smc->clcsock->sk)->syn_smc) atomic_dec(&lsmc->queued_smc_hs); + release_sock(newsmcsk); /* lock in smc_listen_work() */ if (lsmc->sk.sk_state == SMC_LISTEN) { lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); smc_accept_enqueue(&lsmc->sk, newsmcsk); @@ -2032,6 +2051,8 @@ static int smc_listen_prfx_check(struct smc_sock *new_smc, if (pclc->hdr.typev1 == SMC_TYPE_N) return 0; pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (!pclc_prfx) + return -EPROTO; if (smc_clc_prfx_match(newclcsock, pclc_prfx)) return SMC_CLC_DECL_DIFFPREFIX; @@ -2119,7 +2140,7 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, } } -static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini) +static void smc_init_info_store_rc(u32 rc, struct smc_init_info *ini) { if (!ini->rc) ini->rc = rc; @@ -2145,6 +2166,8 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, pclc_smcd = smc_get_clc_msg_smcd(pclc); smc_v2_ext = smc_get_clc_v2_ext(pclc); smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); + if (!pclc_smcd || !smc_v2_ext || !smcd_v2_ext) + goto not_found; mutex_lock(&smcd_dev_list.mutex); if (pclc_smcd->ism.chid) { @@ -2180,7 +2203,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, mutex_unlock(&smcd_dev_list.mutex); if (!ini->ism_dev[0]) { - smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); + smc_init_info_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); goto not_found; } @@ -2197,7 +2220,7 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, ini->ism_selected = i; rc = smc_listen_ism_init(new_smc, ini); if (rc) { - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); /* try next active ISM device */ continue; } @@ -2221,7 +2244,9 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, int rc = 0; /* check if ISM V1 is available */ - if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) + if (!(ini->smcd_version & SMC_V1) || + !smcd_indicated(ini->smc_type_v1) || + !pclc_smcd) goto not_found; ini->is_smcd = true; /* prepare ISM check */ ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid); @@ -2235,7 +2260,7 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, return; /* V1 ISM device found */ not_found: - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); ini->smcd_version &= ~SMC_V1; ini->ism_dev[0] = NULL; ini->is_smcd = false; @@ -2272,7 +2297,8 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, goto not_found; smc_v2_ext = smc_get_clc_v2_ext(pclc); - if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL)) + if (!smc_v2_ext || + !smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL)) goto not_found; /* prepare RDMA check */ @@ -2285,7 +2311,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce); rc = smc_find_rdma_device(new_smc, ini); if (rc) { - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); goto not_found; } if (!ini->smcrv2.uses_gateway) @@ -2302,7 +2328,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, if (!rc) return; ini->smcr_version = smcr_version; - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); not_found: ini->smcr_version &= ~SMC_V2; @@ -2349,7 +2375,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc, /* check for matching IP prefix and subnet length (V1) */ prfx_rc = smc_listen_prfx_check(new_smc, pclc); if (prfx_rc) - smc_find_ism_store_rc(prfx_rc, ini); + smc_init_info_store_rc(prfx_rc, ini); /* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) @@ -2376,7 +2402,7 @@ static int smc_listen_find_device(struct smc_sock *new_smc, int rc; rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini); - smc_find_ism_store_rc(rc, ini); + smc_init_info_store_rc(rc, ini); return (!rc) ? 0 : ini->rc; } return prfx_rc; @@ -2422,6 +2448,7 @@ static void smc_listen_work(struct work_struct *work) u8 accept_version; int rc = 0; + lock_sock(&new_smc->sk); /* release in smc_listen_out() */ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) return smc_listen_out_err(new_smc); @@ -2479,7 +2506,6 @@ static void smc_listen_work(struct work_struct *work) goto out_decl; mutex_lock(&smc_server_lgr_pending); - smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@ -2539,8 +2565,9 @@ static void smc_listen_work(struct work_struct *work) goto out_decl; } - smc_listen_out_connected(new_smc); SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini); + /* smc_listen_out() will release smcsk */ + smc_listen_out_connected(new_smc); goto out_free; out_unlock: @@ -2720,8 +2747,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock, if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) { /* wait till data arrives on the socket */ - timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * - MSEC_PER_SEC); + timeo = secs_to_jiffies(lsmc->sockopt_defer_accept); if (smc_sk(nsk)->use_fallback) { struct sock *clcsk = smc_sk(nsk)->clcsock->sk; @@ -2731,7 +2757,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock, release_sock(clcsk); } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { lock_sock(nsk); - smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); + smc_rx_wait(smc_sk(nsk), &timeo, 0, smc_rx_data_available); release_sock(nsk); } } @@ -2881,6 +2907,13 @@ __poll_t smc_poll(struct file *file, struct socket *sock, } else { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + + if (sk->sk_state != SMC_INIT) { + /* Race breaker the same way as tcp_poll(). */ + smp_mb__after_atomic(); + if (atomic_read(&smc->conn.sndbuf_space)) + mask |= EPOLLOUT | EPOLLWRNORM; + } } if (atomic_read(&smc->conn.bytes_to_rcv)) mask |= EPOLLIN | EPOLLRDNORM; @@ -3320,10 +3353,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) * which need net ref. */ sk = smc->clcsock->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); return 0; } @@ -3504,15 +3534,15 @@ static int __init smc_init(void) rc = -ENOMEM; - smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0); + smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", WQ_PERCPU, 0); if (!smc_tcp_ls_wq) goto out_pnet; - smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0); + smc_hs_wq = alloc_workqueue("smc_hs_wq", WQ_PERCPU, 0); if (!smc_hs_wq) goto out_alloc_tcp_ls_wq; - smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0); + smc_close_wq = alloc_workqueue("smc_close_wq", WQ_PERCPU, 0); if (!smc_close_wq) goto out_alloc_hs_wq; @@ -3560,28 +3590,28 @@ static int __init smc_init(void) goto out_sock; } - rc = smc_loopback_init(); - if (rc) { - pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc); - goto out_ib; - } - rc = tcp_register_ulp(&smc_ulp_ops); if (rc) { pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); - goto out_lo; + goto out_ib; } rc = smc_inet_init(); if (rc) { pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); goto out_ulp; } + rc = bpf_smc_hs_ctrl_init(); + if (rc) { + pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__, + rc); + goto out_inet; + } static_branch_enable(&tcp_have_smc); return 0; +out_inet: + smc_inet_exit(); out_ulp: tcp_unregister_ulp(&smc_ulp_ops); -out_lo: - smc_loopback_exit(); out_ib: smc_ib_unregister_client(); out_sock: @@ -3620,7 +3650,6 @@ static void __exit smc_exit(void) tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); - smc_loopback_exit(); smc_ib_unregister_client(); smc_ism_exit(); destroy_workqueue(smc_close_wq); |
