diff options
Diffstat (limited to 'net/smc')
-rw-r--r-- | net/smc/af_smc.c | 167 | ||||
-rw-r--r-- | net/smc/smc.h | 7 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 151 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 53 | ||||
-rw-r--r-- | net/smc/smc_core.c | 38 | ||||
-rw-r--r-- | net/smc/smc_core.h | 26 | ||||
-rw-r--r-- | net/smc/smc_ib.h | 1 | ||||
-rw-r--r-- | net/smc/smc_llc.c | 25 | ||||
-rw-r--r-- | net/smc/smc_sysctl.c | 10 |
9 files changed, 375 insertions, 103 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a7f887d91d89..bacdd971615e 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; sk->sk_protocol = protocol; - WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem)); - WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem)); + WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem)); + WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem)); smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->connect_work, smc_connect_work); @@ -436,24 +436,9 @@ out: return rc; } -static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, - unsigned long mask) -{ - /* options we don't get control via setsockopt for */ - nsk->sk_type = osk->sk_type; - nsk->sk_sndbuf = osk->sk_sndbuf; - nsk->sk_rcvbuf = osk->sk_rcvbuf; - nsk->sk_sndtimeo = osk->sk_sndtimeo; - nsk->sk_rcvtimeo = osk->sk_rcvtimeo; - nsk->sk_mark = osk->sk_mark; - nsk->sk_priority = osk->sk_priority; - nsk->sk_rcvlowat = osk->sk_rcvlowat; - nsk->sk_bound_dev_if = osk->sk_bound_dev_if; - nsk->sk_err = osk->sk_err; - - nsk->sk_flags &= ~mask; - nsk->sk_flags |= osk->sk_flags & mask; -} +/* copy only relevant settings and flags of SOL_SOCKET level from smc to + * clc socket (since smc is not called for these options from net/core) + */ #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ (1UL << SOCK_KEEPOPEN) | \ @@ -470,9 +455,55 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, (1UL << SOCK_NOFCS) | \ (1UL << SOCK_FILTER_LOCKED) | \ (1UL << SOCK_TSTAMP_NEW)) -/* copy only relevant settings and flags of SOL_SOCKET level from smc to - * clc socket (since smc is not called for these options from net/core) - */ + +/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */ +static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk, + unsigned long mask) +{ + struct net *nnet = sock_net(nsk); + + nsk->sk_userlocks = osk->sk_userlocks; + if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) { + nsk->sk_sndbuf = osk->sk_sndbuf; + } else { + if (mask == SK_FLAGS_SMC_TO_CLC) + WRITE_ONCE(nsk->sk_sndbuf, + READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1])); + else + WRITE_ONCE(nsk->sk_sndbuf, + 2 * READ_ONCE(nnet->smc.sysctl_wmem)); + } + if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) { + nsk->sk_rcvbuf = osk->sk_rcvbuf; + } else { + if (mask == SK_FLAGS_SMC_TO_CLC) + WRITE_ONCE(nsk->sk_rcvbuf, + READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1])); + else + WRITE_ONCE(nsk->sk_rcvbuf, + 2 * READ_ONCE(nnet->smc.sysctl_rmem)); + } +} + +static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, + unsigned long mask) +{ + /* options we don't get control via setsockopt for */ + nsk->sk_type = osk->sk_type; + nsk->sk_sndtimeo = osk->sk_sndtimeo; + nsk->sk_rcvtimeo = osk->sk_rcvtimeo; + nsk->sk_mark = READ_ONCE(osk->sk_mark); + nsk->sk_priority = osk->sk_priority; + nsk->sk_rcvlowat = osk->sk_rcvlowat; + nsk->sk_bound_dev_if = osk->sk_bound_dev_if; + nsk->sk_err = osk->sk_err; + + nsk->sk_flags &= ~mask; + nsk->sk_flags |= osk->sk_flags & mask; + + smc_adjust_sock_bufsizes(nsk, osk, mask); +} + static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) { smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); @@ -610,20 +641,22 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) smc_llc_link_active(link); smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); - /* optional 2nd link, receive ADD LINK request from server */ - qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, - SMC_LLC_ADD_LINK); - if (!qentry) { - struct smc_clc_msg_decline dclc; - - rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), - SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); - if (rc == -EAGAIN) - rc = 0; /* no DECLINE received, go with one link */ - return rc; + if (link->lgr->max_links > 1) { + /* optional 2nd link, receive ADD LINK request from server */ + qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, + SMC_LLC_ADD_LINK); + if (!qentry) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); + if (rc == -EAGAIN) + rc = 0; /* no DECLINE received, go with one link */ + return rc; + } + smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); + smc_llc_cli_add_link(link, qentry); } - smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); - smc_llc_cli_add_link(link, qentry); return 0; } @@ -1113,7 +1146,7 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, #define SMC_CLC_MAX_ACCEPT_LEN \ (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ - sizeof(struct smc_clc_first_contact_ext) + \ + sizeof(struct smc_clc_first_contact_ext_v2x) + \ sizeof(struct smc_clc_msg_trail)) /* CLC handshake during connect */ @@ -1167,8 +1200,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, struct smc_clc_msg_accept_confirm_v2 *clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = - (struct smc_clc_first_contact_ext *) - (((u8 *)clc_v2) + sizeof(*clc_v2)); + smc_get_clc_first_contact_ext(clc_v2, false); + int rc; if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) return 0; @@ -1187,6 +1220,12 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, return SMC_CLC_DECL_NOINDIRECT; } } + + ini->release_nr = fce->release; + rc = smc_clc_clnt_v2x_features_validate(fce, ini); + if (rc) + return rc; + return 0; } @@ -1205,6 +1244,8 @@ static int smc_connect_rdma(struct smc_sock *smc, memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN); memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE); memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN); + ini->max_conns = SMC_CONN_PER_LGR_MAX; + ini->max_links = SMC_LINKS_ADD_LNK_MAX; reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini); if (reason_code) @@ -1355,6 +1396,16 @@ static int smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)aclc; + if (ini->first_contact_peer) { + struct smc_clc_first_contact_ext *fce = + smc_get_clc_first_contact_ext(aclc_v2, true); + + ini->release_nr = fce->release; + rc = smc_clc_clnt_v2x_features_validate(fce, ini); + if (rc) + return rc; + } + rc = smc_v2_determine_accepted_chid(aclc_v2, ini); if (rc) return rc; @@ -1389,7 +1440,7 @@ static int smc_connect_ism(struct smc_sock *smc, } rc = smc_clc_send_confirm(smc, ini->first_contact_local, - aclc->hdr.version, eid, NULL); + aclc->hdr.version, eid, ini); if (rc) goto connect_abort; mutex_unlock(&smc_server_lgr_pending); @@ -1789,7 +1840,7 @@ void smc_close_non_accepted(struct sock *sk) lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */ - sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; + WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT); __smc_release(smc); release_sock(sk); sock_put(sk); /* sock_hold above */ @@ -1839,10 +1890,12 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) smc_llc_link_active(link); smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); - down_write(&link->lgr->llc_conf_mutex); - /* initial contact - try to establish second link */ - smc_llc_srv_add_link(link, NULL); - up_write(&link->lgr->llc_conf_mutex); + if (link->lgr->max_links > 1) { + down_write(&link->lgr->llc_conf_mutex); + /* initial contact - try to establish second link */ + smc_llc_srv_add_link(link, NULL); + up_write(&link->lgr->llc_conf_mutex); + } return 0; } @@ -1965,6 +2018,10 @@ static int smc_listen_v2_check(struct smc_sock *new_smc, } } + ini->release_nr = pclc_v2_ext->hdr.flag.release; + if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE) + ini->release_nr = SMC_RELEASE; + out: if (!ini->smcd_version && !ini->smcr_version) return rc; @@ -2399,6 +2456,10 @@ static void smc_listen_work(struct work_struct *work) if (rc) goto out_decl; + rc = smc_clc_srv_v2x_features_validate(pclc, ini); + if (rc) + goto out_decl; + mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); @@ -2412,7 +2473,7 @@ static void smc_listen_work(struct work_struct *work) /* send SMC Accept CLC message */ accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version; rc = smc_clc_send_accept(new_smc, ini->first_contact_local, - accept_version, ini->negotiated_eid); + accept_version, ini->negotiated_eid, ini); if (rc) goto out_unlock; @@ -2431,6 +2492,18 @@ static void smc_listen_work(struct work_struct *work) goto out_decl; } + rc = smc_clc_v2x_features_confirm_check(cclc, ini); + if (rc) { + if (!ini->is_smcd) + goto out_unlock; + goto out_decl; + } + + /* fce smc release version is needed in smc_listen_rdma_finish, + * so save fce info here. + */ + smc_conn_save_peer_info_fce(new_smc, cclc); + /* finish worker */ if (!ini->is_smcd) { rc = smc_listen_rdma_finish(new_smc, cclc, @@ -2479,8 +2552,6 @@ static void smc_tcp_listen_work(struct work_struct *work) sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); - new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; - new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; sock_hold(&new_smc->sk); /* sock_put in passive closing */ if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work)) sock_put(&new_smc->sk); diff --git a/net/smc/smc.h b/net/smc/smc.h index 2eeea4cdc718..24745fde4ac2 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -21,7 +21,10 @@ #define SMC_V1 1 /* SMC version V1 */ #define SMC_V2 2 /* SMC version V2 */ -#define SMC_RELEASE 0 + +#define SMC_RELEASE_0 0 +#define SMC_RELEASE_1 1 +#define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ @@ -161,7 +164,7 @@ struct smc_connection { struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ - int rmbe_size_short;/* compressed notation */ + int rmbe_size_comp; /* compressed notation */ int rmbe_update_limit; /* lower limit for consumer * cursor update diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index b9b8b07aa702..8deb46c28f1d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -391,9 +391,7 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) return false; } else { if (hdr->typev1 == SMC_TYPE_D && - ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 && - (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 + - sizeof(struct smc_clc_first_contact_ext))) + ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2) return false; if (hdr->typev1 == SMC_TYPE_R && ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) @@ -420,13 +418,29 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) return true; } -static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len) +static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, + struct smc_init_info *ini) { + int ret = sizeof(*fce); + memset(fce, 0, sizeof(*fce)); - fce->os_type = SMC_CLC_OS_LINUX; - fce->release = SMC_RELEASE; - memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname)); - (*len) += sizeof(*fce); + fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX; + fce->fce_v2_base.release = ini->release_nr; + memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname)); + if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) { + ret = sizeof(struct smc_clc_first_contact_ext); + goto out; + } + + if (ini->release_nr >= SMC_RELEASE_1) { + if (!ini->is_smcd) { + fce->max_conns = ini->max_conns; + fce->max_links = ini->max_links; + } + } + +out: + return ret; } /* check if received message has a correct header length and contains valid @@ -927,8 +941,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) sizeof(struct smc_clc_smcd_gid_chid); } } - if (smcr_indicated(ini->smc_type_v2)) + if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); + v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER; + v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER; + } pclc_base->hdr.length = htons(plen); memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -986,13 +1003,13 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, u8 *eid, struct smc_init_info *ini) { struct smc_connection *conn = &smc->conn; + struct smc_clc_first_contact_ext_v2x fce; struct smc_clc_msg_accept_confirm *clc; - struct smc_clc_first_contact_ext fce; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; + int i, len, fce_len; struct kvec vec[5]; struct msghdr msg; - int i, len; /* send SMC Confirm CLC msg */ clc = (struct smc_clc_msg_accept_confirm *)clc_v2; @@ -1007,7 +1024,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, clc->d0.gid = conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd); clc->d0.token = conn->rmb_desc->token; - clc->d0.dmbe_size = conn->rmbe_size_short; + clc->d0.dmbe_size = conn->rmbe_size_comp; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); if (version == SMC_V1) { @@ -1018,8 +1035,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, if (eid && eid[0]) memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) - smc_clc_fill_fce(&fce, &len); + if (first_contact) { + fce_len = smc_clc_fill_fce(&fce, ini); + len += fce_len; + } clc_v2->hdr.length = htons(len); } memcpy(trl.eyecatcher, SMCD_EYECATCHER, @@ -1050,7 +1069,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); break; } - clc->r0.rmbe_size = conn->rmbe_size_short; + clc->r0.rmbe_size = conn->rmbe_size_comp; clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : cpu_to_be64((u64)sg_dma_address @@ -1063,15 +1082,14 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) { - smc_clc_fill_fce(&fce, &len); - fce.v2_direct = !link->lgr->uses_gateway; - memset(&gle, 0, sizeof(gle)); - if (ini && clc->hdr.type == SMC_CLC_CONFIRM) { + fce_len = smc_clc_fill_fce(&fce, ini); + len += fce_len; + fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway; + if (clc->hdr.type == SMC_CLC_CONFIRM) { + memset(&gle, 0, sizeof(gle)); gle.gid_cnt = ini->smcrv2.gidlist.len; len += sizeof(gle); len += gle.gid_cnt * sizeof(gle.gid[0]); - } else { - len += sizeof(gle.reserved); } } clc_v2->hdr.length = htons(len); @@ -1094,7 +1112,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, sizeof(trl); if (version > SMC_V1 && first_contact) { vec[i].iov_base = &fce; - vec[i++].iov_len = sizeof(fce); + vec[i++].iov_len = fce_len; if (!conn->lgr->is_smcd) { if (clc->hdr.type == SMC_CLC_CONFIRM) { vec[i].iov_base = &gle; @@ -1102,9 +1120,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, vec[i].iov_base = &ini->smcrv2.gidlist.list; vec[i++].iov_len = gle.gid_cnt * sizeof(gle.gid[0]); - } else { - vec[i].iov_base = &gle.reserved; - vec[i++].iov_len = sizeof(gle.reserved); } } } @@ -1141,7 +1156,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, /* send CLC ACCEPT message across internal TCP socket */ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, - u8 version, u8 *negotiated_eid) + u8 version, u8 *negotiated_eid, struct smc_init_info *ini) { struct smc_clc_msg_accept_confirm_v2 aclc_v2; int len; @@ -1149,13 +1164,95 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, memset(&aclc_v2, 0, sizeof(aclc_v2)); aclc_v2.hdr.type = SMC_CLC_ACCEPT; len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, - version, negotiated_eid, NULL); + version, negotiated_eid, ini); if (len < ntohs(aclc_v2.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; } +int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini) +{ + struct smc_clc_v2_extension *pclc_v2_ext; + + ini->max_conns = SMC_CONN_PER_LGR_MAX; + ini->max_links = SMC_LINKS_ADD_LNK_MAX; + + if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) || + ini->release_nr < SMC_RELEASE_1) + return 0; + + pclc_v2_ext = smc_get_clc_v2_ext(pclc); + if (!pclc_v2_ext) + return SMC_CLC_DECL_NOV2EXT; + + if (ini->smcr_version & SMC_V2) { + ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER); + if (ini->max_conns < SMC_CONN_PER_LGR_MIN) + return SMC_CLC_DECL_MAXCONNERR; + + ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER); + if (ini->max_links < SMC_LINKS_ADD_LNK_MIN) + return SMC_CLC_DECL_MAXLINKERR; + } + + return 0; +} + +int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, + struct smc_init_info *ini) +{ + struct smc_clc_first_contact_ext_v2x *fce_v2x = + (struct smc_clc_first_contact_ext_v2x *)fce; + + if (ini->release_nr < SMC_RELEASE_1) + return 0; + + if (!ini->is_smcd) { + if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN) + return SMC_CLC_DECL_MAXCONNERR; + ini->max_conns = fce_v2x->max_conns; + + if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX || + fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN) + return SMC_CLC_DECL_MAXLINKERR; + ini->max_links = fce_v2x->max_links; + } + + return 0; +} + +int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, + struct smc_init_info *ini) +{ + struct smc_clc_msg_accept_confirm_v2 *clc_v2 = + (struct smc_clc_msg_accept_confirm_v2 *)cclc; + struct smc_clc_first_contact_ext *fce = + smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd); + struct smc_clc_first_contact_ext_v2x *fce_v2x = + (struct smc_clc_first_contact_ext_v2x *)fce; + + if (cclc->hdr.version == SMC_V1 || + !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + return 0; + + if (ini->release_nr != fce->release) + return SMC_CLC_DECL_RELEASEERR; + + if (fce->release < SMC_RELEASE_1) + return 0; + + if (!ini->is_smcd) { + if (fce_v2x->max_conns != ini->max_conns) + return SMC_CLC_DECL_MAXCONNERR; + if (fce_v2x->max_links != ini->max_links) + return SMC_CLC_DECL_MAXLINKERR; + } + + return 0; +} + void smc_clc_get_hostname(u8 **host) { *host = &smc_hostname[0]; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 5fee545c9a10..c5c8e7db775a 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -45,6 +45,9 @@ #define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */ #define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */ #define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */ +#define SMC_CLC_DECL_RELEASEERR 0x03030009 /* release version negotiate failed */ +#define SMC_CLC_DECL_MAXCONNERR 0x0303000a /* max connections negotiate failed */ +#define SMC_CLC_DECL_MAXLINKERR 0x0303000b /* max links negotiate failed */ #define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/ #define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */ #define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */ @@ -133,7 +136,9 @@ struct smc_clc_smcd_gid_chid { struct smc_clc_v2_extension { struct smc_clnt_opts_area_hdr hdr; u8 roce[16]; /* RoCEv2 GID */ - u8 reserved[16]; + u8 max_conns; + u8 max_links; + u8 reserved[14]; u8 user_eids[][SMC_MAX_EID_LEN]; }; @@ -147,7 +152,9 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/ struct smc_clc_msg_smcd { /* SMC-D GID information */ struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */ __be16 v2_ext_offset; /* SMC Version 2 Extension Offset */ - u8 reserved[28]; + u8 vendor_oui[3]; /* vendor organizationally unique identifier */ + u8 vendor_exp_options[5]; + u8 reserved[20]; }; struct smc_clc_smcd_v2_extension { @@ -231,8 +238,19 @@ struct smc_clc_first_contact_ext { u8 hostname[SMC_MAX_HOSTNAME_LEN]; }; +struct smc_clc_first_contact_ext_v2x { + struct smc_clc_first_contact_ext fce_v2_base; + u8 max_conns; /* for SMC-R only */ + u8 max_links; /* for SMC-R only */ + u8 reserved3[2]; + __be32 vendor_exp_options; + u8 reserved4[8]; +} __packed; /* format defined in + * IBM Shared Memory Communications Version 2 (Third Edition) + * (https://www.ibm.com/support/pages/node/7009315) + */ + struct smc_clc_fce_gid_ext { - u8 reserved[16]; u8 gid_cnt; u8 reserved2[3]; u8 gid[][SMC_GID_SIZE]; @@ -370,6 +388,27 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) ntohs(prop_v2ext->hdr.smcd_v2_ext_offset)); } +static inline struct smc_clc_first_contact_ext * +smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2, + bool is_smcd) +{ + int clc_v2_len; + + if (clc_v2->hdr.version == SMC_V1 || + !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + return NULL; + + if (is_smcd) + clc_v2_len = + offsetofend(struct smc_clc_msg_accept_confirm_v2, d1); + else + clc_v2_len = + offsetofend(struct smc_clc_msg_accept_confirm_v2, r1); + + return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + + clc_v2_len); +} + struct smcd_dev; struct smc_init_info; @@ -382,7 +421,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini); int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, - u8 version, u8 *negotiated_eid); + u8 version, u8 *negotiated_eid, struct smc_init_info *ini); +int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, + struct smc_init_info *ini); +int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, + struct smc_init_info *ini); +int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, + struct smc_init_info *ini); void smc_clc_init(void) __init; void smc_clc_exit(void); void smc_clc_get_hostname(u8 **host); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3f465faf2b68..bd01dd31e4bd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -319,6 +319,10 @@ static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway)) goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns)) + goto errv2attr; + if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links)) + goto errv2attr; nla_nest_end(skb, v2_attrs); return 0; @@ -895,9 +899,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->uses_gateway = ini->smcrv2.uses_gateway; memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac, ETH_ALEN); + lgr->max_conns = ini->max_conns; + lgr->max_links = ini->max_links; } else { ibdev = ini->ib_dev; ibport = ini->ib_port; + lgr->max_conns = SMC_CONN_PER_LGR_MAX; + lgr->max_links = SMC_LINKS_ADD_LNK_MAX; } memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], SMC_MAX_PNETID_LEN); @@ -1664,6 +1672,9 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) !rdma_dev_access_netns(smcibdev->ibdev, lgr->net)) continue; + if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) + continue; + /* trigger local add link processing */ link = smc_llc_usable_link(lgr); if (link) @@ -1888,7 +1899,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || - (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && + (lgr->conns_num < lgr->max_conns && !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { /* link group found */ ini->first_contact_local = 0; @@ -2309,31 +2320,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; struct list_head *buf_list; - int bufsize, bufsize_short; + int bufsize, bufsize_comp; struct rw_semaphore *lock; /* lock buffer list */ bool is_dgraded = false; - int sk_buf_size; if (is_rmb) /* use socket recv buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_rcvbuf; + bufsize = smc->sk.sk_rcvbuf / 2; else /* use socket send buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_sndbuf; + bufsize = smc->sk.sk_sndbuf / 2; - for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); - bufsize_short >= 0; bufsize_short--) { + for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb); + bufsize_comp >= 0; bufsize_comp--) { if (is_rmb) { lock = &lgr->rmbs_lock; - buf_list = &lgr->rmbs[bufsize_short]; + buf_list = &lgr->rmbs[bufsize_comp]; } else { lock = &lgr->sndbufs_lock; - buf_list = &lgr->sndbufs[bufsize_short]; + buf_list = &lgr->sndbufs[bufsize_comp]; } - bufsize = smc_uncompress_bufsize(bufsize_short); + bufsize = smc_uncompress_bufsize(bufsize_comp); /* check for reusable slot in the link group */ - buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); + buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list); if (buf_desc) { buf_desc->is_dma_need_sync = 0; SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); @@ -2377,8 +2387,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_rmb) { conn->rmb_desc = buf_desc; - conn->rmbe_size_short = bufsize_short; - smc->sk.sk_rcvbuf = bufsize; + conn->rmbe_size_comp = bufsize_comp; + smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(buf_desc->len); @@ -2386,7 +2396,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; - smc->sk.sk_sndbuf = bufsize; + smc->sk.sk_sndbuf = bufsize * 2; atomic_set(&conn->sndbuf_space, bufsize); } return 0; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 1645fba0d2d3..120027d40469 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -22,6 +22,15 @@ #include "smc_ib.h" #define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */ +#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */ +#define SMC_CONN_PER_LGR_MAX 255 /* max. # of connections per link group, + * also is the default value for SMC-R v1 and v2.0 + */ +#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for + * SMC-R v2.1 and later negotiation, vendors or + * distrubutions may modify it to a value between + * 16-255 as needed. + */ struct smc_lgr_list { /* list of link group definition */ struct list_head list; @@ -164,6 +173,15 @@ struct smc_link { */ #define SMC_LINKS_PER_LGR_MAX 3 #define SMC_SINGLE_LINK 0 +#define SMC_LINKS_ADD_LNK_MIN 1 /* min. # of links per link group */ +#define SMC_LINKS_ADD_LNK_MAX 2 /* max. # of links per link group, also is the + * default value for smc-r v1.0 and v2.0 + */ +#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for + * SMC-R v2.1 and later negotiation, vendors or + * distrubutions may modify it to a value between + * 1-2 as needed. + */ /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */ struct smc_buf_desc { @@ -331,6 +349,10 @@ struct smc_link_group { __be32 saddr; /* net namespace */ struct net *net; + u8 max_conns; + /* max conn can be assigned to lgr */ + u8 max_links; + /* max links can be added in lgr */ }; struct { /* SMC-D */ u64 peer_gid; @@ -374,6 +396,9 @@ struct smc_init_info { u8 is_smcd; u8 smc_type_v1; u8 smc_type_v2; + u8 release_nr; + u8 max_conns; + u8 max_links; u8 first_contact_peer; u8 first_contact_local; unsigned short vlan_id; @@ -539,7 +564,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini); void smc_conn_free(struct smc_connection *conn); int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini); -void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr); int smc_core_init(void); void smc_core_exit(void); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 034295676e88..4df5f8c8a0a1 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -96,7 +96,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk); int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); -int smc_ib_modify_qp_reset(struct smc_link *lnk); int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 90f0b60b196a..018ce8133b02 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -52,14 +52,13 @@ struct smc_llc_msg_confirm_link { /* type 0x01 */ u8 link_num; u8 link_uid[SMC_LGR_ID_SIZE]; u8 max_links; - u8 reserved[9]; + u8 max_conns; + u8 reserved[8]; }; #define SMC_LLC_FLAG_ADD_LNK_REJ 0x40 #define SMC_LLC_REJ_RSN_NO_ALT_PATH 1 -#define SMC_LLC_ADD_LNK_MAX_LINKS 2 - struct smc_llc_msg_add_link { /* type 0x02 */ struct smc_llc_hdr hd; u8 sender_mac[ETH_ALEN]; @@ -471,7 +470,12 @@ int smc_llc_send_confirm_link(struct smc_link *link, hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE); - confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; + confllc->max_links = SMC_LINKS_ADD_LNK_MAX; + if (link->lgr->smc_version == SMC_V2 && + link->lgr->peer_smc_release >= SMC_RELEASE_1) { + confllc->max_conns = link->lgr->max_conns; + confllc->max_links = link->lgr->max_links; + } /* send llc message */ rc = smc_wr_tx_send(link, pend); put_out: @@ -1041,6 +1045,11 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) goto out_reject; } + if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) { + rc = 0; + goto out_reject; + } + ini->vlan_id = lgr->vlan_id; if (lgr->smc_version == SMC_V2) { ini->check_smcrv2 = true; @@ -1165,6 +1174,9 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link, lgr->type == SMC_LGR_ASYMMETRIC_PEER) goto out; + if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) + goto out; + ini = kzalloc(sizeof(*ini), GFP_KERNEL); if (!ini) goto out; @@ -1410,6 +1422,11 @@ int smc_llc_srv_add_link(struct smc_link *link, goto out; } + if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) { + rc = 0; + goto out; + } + /* ignore client add link recommendation, start new flow */ ini->vlan_id = lgr->vlan_id; if (lgr->smc_version == SMC_V2) { diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 3ab2d8eefc55..5cbc18c6e62b 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -21,6 +21,10 @@ static int min_sndbuf = SMC_BUF_MIN_SIZE; static int min_rcvbuf = SMC_BUF_MIN_SIZE; +static int max_sndbuf = INT_MAX / 2; +static int max_rcvbuf = INT_MAX / 2; +static const int net_smc_wmem_init = (64 * 1024); +static const int net_smc_rmem_init = (64 * 1024); static struct ctl_table smc_table[] = { { @@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_sndbuf, + .extra2 = &max_sndbuf, }, { .procname = "rmem", @@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &min_rcvbuf, + .extra2 = &max_rcvbuf, }, { } }; @@ -89,8 +95,8 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; - WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1])); - WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1])); + WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); + WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); return 0; |