summaryrefslogtreecommitdiff
path: root/net/smc
diff options
context:
space:
mode:
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/af_smc.c167
-rw-r--r--net/smc/smc.h7
-rw-r--r--net/smc/smc_clc.c151
-rw-r--r--net/smc/smc_clc.h53
-rw-r--r--net/smc/smc_core.c38
-rw-r--r--net/smc/smc_core.h26
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_llc.c25
-rw-r--r--net/smc/smc_sysctl.c10
9 files changed, 375 insertions, 103 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a7f887d91d89..bacdd971615e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
- WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
- WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+ WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+ WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,24 +436,9 @@ out:
return rc;
}
-static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
- unsigned long mask)
-{
- /* options we don't get control via setsockopt for */
- nsk->sk_type = osk->sk_type;
- nsk->sk_sndbuf = osk->sk_sndbuf;
- nsk->sk_rcvbuf = osk->sk_rcvbuf;
- nsk->sk_sndtimeo = osk->sk_sndtimeo;
- nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
- nsk->sk_mark = osk->sk_mark;
- nsk->sk_priority = osk->sk_priority;
- nsk->sk_rcvlowat = osk->sk_rcvlowat;
- nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
- nsk->sk_err = osk->sk_err;
-
- nsk->sk_flags &= ~mask;
- nsk->sk_flags |= osk->sk_flags & mask;
-}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
@@ -470,9 +455,55 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
(1UL << SOCK_NOFCS) | \
(1UL << SOCK_FILTER_LOCKED) | \
(1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ struct net *nnet = sock_net(nsk);
+
+ nsk->sk_userlocks = osk->sk_userlocks;
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_sndbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_sndbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+ }
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+ }
+}
+
+static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ /* options we don't get control via setsockopt for */
+ nsk->sk_type = osk->sk_type;
+ nsk->sk_sndtimeo = osk->sk_sndtimeo;
+ nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+ nsk->sk_mark = READ_ONCE(osk->sk_mark);
+ nsk->sk_priority = osk->sk_priority;
+ nsk->sk_rcvlowat = osk->sk_rcvlowat;
+ nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
+ nsk->sk_err = osk->sk_err;
+
+ nsk->sk_flags &= ~mask;
+ nsk->sk_flags |= osk->sk_flags & mask;
+
+ smc_adjust_sock_bufsizes(nsk, osk, mask);
+}
+
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -610,20 +641,22 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- /* optional 2nd link, receive ADD LINK request from server */
- qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
- SMC_LLC_ADD_LINK);
- if (!qentry) {
- struct smc_clc_msg_decline dclc;
-
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- if (rc == -EAGAIN)
- rc = 0; /* no DECLINE received, go with one link */
- return rc;
+ if (link->lgr->max_links > 1) {
+ /* optional 2nd link, receive ADD LINK request from server */
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK);
+ if (!qentry) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ if (rc == -EAGAIN)
+ rc = 0; /* no DECLINE received, go with one link */
+ return rc;
+ }
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+ smc_llc_cli_add_link(link, qentry);
}
- smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
- smc_llc_cli_add_link(link, qentry);
return 0;
}
@@ -1113,7 +1146,7 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
#define SMC_CLC_MAX_ACCEPT_LEN \
(sizeof(struct smc_clc_msg_accept_confirm_v2) + \
- sizeof(struct smc_clc_first_contact_ext) + \
+ sizeof(struct smc_clc_first_contact_ext_v2x) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
@@ -1167,8 +1200,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
- (struct smc_clc_first_contact_ext *)
- (((u8 *)clc_v2) + sizeof(*clc_v2));
+ smc_get_clc_first_contact_ext(clc_v2, false);
+ int rc;
if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
return 0;
@@ -1187,6 +1220,12 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
return SMC_CLC_DECL_NOINDIRECT;
}
}
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+
return 0;
}
@@ -1205,6 +1244,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
if (reason_code)
@@ -1355,6 +1396,16 @@ static int smc_connect_ism(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
(struct smc_clc_msg_accept_confirm_v2 *)aclc;
+ if (ini->first_contact_peer) {
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(aclc_v2, true);
+
+ ini->release_nr = fce->release;
+ rc = smc_clc_clnt_v2x_features_validate(fce, ini);
+ if (rc)
+ return rc;
+ }
+
rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
if (rc)
return rc;
@@ -1389,7 +1440,7 @@ static int smc_connect_ism(struct smc_sock *smc,
}
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
- aclc->hdr.version, eid, NULL);
+ aclc->hdr.version, eid, ini);
if (rc)
goto connect_abort;
mutex_unlock(&smc_server_lgr_pending);
@@ -1789,7 +1840,7 @@ void smc_close_non_accepted(struct sock *sk)
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
- sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
__smc_release(smc);
release_sock(sk);
sock_put(sk); /* sock_hold above */
@@ -1839,10 +1890,12 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
- down_write(&link->lgr->llc_conf_mutex);
- /* initial contact - try to establish second link */
- smc_llc_srv_add_link(link, NULL);
- up_write(&link->lgr->llc_conf_mutex);
+ if (link->lgr->max_links > 1) {
+ down_write(&link->lgr->llc_conf_mutex);
+ /* initial contact - try to establish second link */
+ smc_llc_srv_add_link(link, NULL);
+ up_write(&link->lgr->llc_conf_mutex);
+ }
return 0;
}
@@ -1965,6 +2018,10 @@ static int smc_listen_v2_check(struct smc_sock *new_smc,
}
}
+ ini->release_nr = pclc_v2_ext->hdr.flag.release;
+ if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE)
+ ini->release_nr = SMC_RELEASE;
+
out:
if (!ini->smcd_version && !ini->smcr_version)
return rc;
@@ -2399,6 +2456,10 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
+ rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ if (rc)
+ goto out_decl;
+
mutex_lock(&smc_server_lgr_pending);
smc_close_init(new_smc);
smc_rx_init(new_smc);
@@ -2412,7 +2473,7 @@ static void smc_listen_work(struct work_struct *work)
/* send SMC Accept CLC message */
accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
- accept_version, ini->negotiated_eid);
+ accept_version, ini->negotiated_eid, ini);
if (rc)
goto out_unlock;
@@ -2431,6 +2492,18 @@ static void smc_listen_work(struct work_struct *work)
goto out_decl;
}
+ rc = smc_clc_v2x_features_confirm_check(cclc, ini);
+ if (rc) {
+ if (!ini->is_smcd)
+ goto out_unlock;
+ goto out_decl;
+ }
+
+ /* fce smc release version is needed in smc_listen_rdma_finish,
+ * so save fce info here.
+ */
+ smc_conn_save_peer_info_fce(new_smc, cclc);
+
/* finish worker */
if (!ini->is_smcd) {
rc = smc_listen_rdma_finish(new_smc, cclc,
@@ -2479,8 +2552,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
- new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2eeea4cdc718..24745fde4ac2 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -21,7 +21,10 @@
#define SMC_V1 1 /* SMC version V1 */
#define SMC_V2 2 /* SMC version V2 */
-#define SMC_RELEASE 0
+
+#define SMC_RELEASE_0 0
+#define SMC_RELEASE_1 1
+#define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
@@ -161,7 +164,7 @@ struct smc_connection {
struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
- int rmbe_size_short;/* compressed notation */
+ int rmbe_size_comp; /* compressed notation */
int rmbe_update_limit;
/* lower limit for consumer
* cursor update
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index b9b8b07aa702..8deb46c28f1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -391,9 +391,7 @@ smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
return false;
} else {
if (hdr->typev1 == SMC_TYPE_D &&
- ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 &&
- (ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 +
- sizeof(struct smc_clc_first_contact_ext)))
+ ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2)
return false;
if (hdr->typev1 == SMC_TYPE_R &&
ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2)
@@ -420,13 +418,29 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
return true;
}
-static void smc_clc_fill_fce(struct smc_clc_first_contact_ext *fce, int *len)
+static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
+ struct smc_init_info *ini)
{
+ int ret = sizeof(*fce);
+
memset(fce, 0, sizeof(*fce));
- fce->os_type = SMC_CLC_OS_LINUX;
- fce->release = SMC_RELEASE;
- memcpy(fce->hostname, smc_hostname, sizeof(smc_hostname));
- (*len) += sizeof(*fce);
+ fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
+ fce->fce_v2_base.release = ini->release_nr;
+ memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname));
+ if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
+ ret = sizeof(struct smc_clc_first_contact_ext);
+ goto out;
+ }
+
+ if (ini->release_nr >= SMC_RELEASE_1) {
+ if (!ini->is_smcd) {
+ fce->max_conns = ini->max_conns;
+ fce->max_links = ini->max_links;
+ }
+ }
+
+out:
+ return ret;
}
/* check if received message has a correct header length and contains valid
@@ -927,8 +941,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
sizeof(struct smc_clc_smcd_gid_chid);
}
}
- if (smcr_indicated(ini->smc_type_v2))
+ if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
+ v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
+ v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ }
pclc_base->hdr.length = htons(plen);
memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -986,13 +1003,13 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
u8 *eid, struct smc_init_info *ini)
{
struct smc_connection *conn = &smc->conn;
+ struct smc_clc_first_contact_ext_v2x fce;
struct smc_clc_msg_accept_confirm *clc;
- struct smc_clc_first_contact_ext fce;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
+ int i, len, fce_len;
struct kvec vec[5];
struct msghdr msg;
- int i, len;
/* send SMC Confirm CLC msg */
clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
@@ -1007,7 +1024,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->d0.gid =
conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
- clc->d0.dmbe_size = conn->rmbe_size_short;
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
if (version == SMC_V1) {
@@ -1018,8 +1035,10 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
if (eid && eid[0])
memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact)
- smc_clc_fill_fce(&fce, &len);
+ if (first_contact) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ }
clc_v2->hdr.length = htons(len);
}
memcpy(trl.eyecatcher, SMCD_EYECATCHER,
@@ -1050,7 +1069,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
break;
}
- clc->r0.rmbe_size = conn->rmbe_size_short;
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
cpu_to_be64((u64)sg_dma_address
@@ -1063,15 +1082,14 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
if (first_contact) {
- smc_clc_fill_fce(&fce, &len);
- fce.v2_direct = !link->lgr->uses_gateway;
- memset(&gle, 0, sizeof(gle));
- if (ini && clc->hdr.type == SMC_CLC_CONFIRM) {
+ fce_len = smc_clc_fill_fce(&fce, ini);
+ len += fce_len;
+ fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway;
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ memset(&gle, 0, sizeof(gle));
gle.gid_cnt = ini->smcrv2.gidlist.len;
len += sizeof(gle);
len += gle.gid_cnt * sizeof(gle.gid[0]);
- } else {
- len += sizeof(gle.reserved);
}
}
clc_v2->hdr.length = htons(len);
@@ -1094,7 +1112,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
sizeof(trl);
if (version > SMC_V1 && first_contact) {
vec[i].iov_base = &fce;
- vec[i++].iov_len = sizeof(fce);
+ vec[i++].iov_len = fce_len;
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
vec[i].iov_base = &gle;
@@ -1102,9 +1120,6 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
vec[i].iov_base = &ini->smcrv2.gidlist.list;
vec[i++].iov_len = gle.gid_cnt *
sizeof(gle.gid[0]);
- } else {
- vec[i].iov_base = &gle.reserved;
- vec[i++].iov_len = sizeof(gle.reserved);
}
}
}
@@ -1141,7 +1156,7 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid)
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
{
struct smc_clc_msg_accept_confirm_v2 aclc_v2;
int len;
@@ -1149,13 +1164,95 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
memset(&aclc_v2, 0, sizeof(aclc_v2));
aclc_v2.hdr.type = SMC_CLC_ACCEPT;
len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
- version, negotiated_eid, NULL);
+ version, negotiated_eid, ini);
if (len < ntohs(aclc_v2.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_v2_extension *pclc_v2_ext;
+
+ ini->max_conns = SMC_CONN_PER_LGR_MAX;
+ ini->max_links = SMC_LINKS_ADD_LNK_MAX;
+
+ if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
+ ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ pclc_v2_ext = smc_get_clc_v2_ext(pclc);
+ if (!pclc_v2_ext)
+ return SMC_CLC_DECL_NOV2EXT;
+
+ if (ini->smcr_version & SMC_V2) {
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (ini->release_nr < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN)
+ return SMC_CLC_DECL_MAXCONNERR;
+ ini->max_conns = fce_v2x->max_conns;
+
+ if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX ||
+ fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN)
+ return SMC_CLC_DECL_MAXLINKERR;
+ ini->max_links = fce_v2x->max_links;
+ }
+
+ return 0;
+}
+
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini)
+{
+ struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
+ (struct smc_clc_msg_accept_confirm_v2 *)cclc;
+ struct smc_clc_first_contact_ext *fce =
+ smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd);
+ struct smc_clc_first_contact_ext_v2x *fce_v2x =
+ (struct smc_clc_first_contact_ext_v2x *)fce;
+
+ if (cclc->hdr.version == SMC_V1 ||
+ !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return 0;
+
+ if (ini->release_nr != fce->release)
+ return SMC_CLC_DECL_RELEASEERR;
+
+ if (fce->release < SMC_RELEASE_1)
+ return 0;
+
+ if (!ini->is_smcd) {
+ if (fce_v2x->max_conns != ini->max_conns)
+ return SMC_CLC_DECL_MAXCONNERR;
+ if (fce_v2x->max_links != ini->max_links)
+ return SMC_CLC_DECL_MAXLINKERR;
+ }
+
+ return 0;
+}
+
void smc_clc_get_hostname(u8 **host)
{
*host = &smc_hostname[0];
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 5fee545c9a10..c5c8e7db775a 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -45,6 +45,9 @@
#define SMC_CLC_DECL_NOSEID 0x03030006 /* peer sent no SEID */
#define SMC_CLC_DECL_NOSMCD2DEV 0x03030007 /* no SMC-Dv2 device found */
#define SMC_CLC_DECL_NOUEID 0x03030008 /* peer sent no UEID */
+#define SMC_CLC_DECL_RELEASEERR 0x03030009 /* release version negotiate failed */
+#define SMC_CLC_DECL_MAXCONNERR 0x0303000a /* max connections negotiate failed */
+#define SMC_CLC_DECL_MAXLINKERR 0x0303000b /* max links negotiate failed */
#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
@@ -133,7 +136,9 @@ struct smc_clc_smcd_gid_chid {
struct smc_clc_v2_extension {
struct smc_clnt_opts_area_hdr hdr;
u8 roce[16]; /* RoCEv2 GID */
- u8 reserved[16];
+ u8 max_conns;
+ u8 max_links;
+ u8 reserved[14];
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -147,7 +152,9 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
struct smc_clc_msg_smcd { /* SMC-D GID information */
struct smc_clc_smcd_gid_chid ism; /* ISM native GID+CHID of requestor */
__be16 v2_ext_offset; /* SMC Version 2 Extension Offset */
- u8 reserved[28];
+ u8 vendor_oui[3]; /* vendor organizationally unique identifier */
+ u8 vendor_exp_options[5];
+ u8 reserved[20];
};
struct smc_clc_smcd_v2_extension {
@@ -231,8 +238,19 @@ struct smc_clc_first_contact_ext {
u8 hostname[SMC_MAX_HOSTNAME_LEN];
};
+struct smc_clc_first_contact_ext_v2x {
+ struct smc_clc_first_contact_ext fce_v2_base;
+ u8 max_conns; /* for SMC-R only */
+ u8 max_links; /* for SMC-R only */
+ u8 reserved3[2];
+ __be32 vendor_exp_options;
+ u8 reserved4[8];
+} __packed; /* format defined in
+ * IBM Shared Memory Communications Version 2 (Third Edition)
+ * (https://www.ibm.com/support/pages/node/7009315)
+ */
+
struct smc_clc_fce_gid_ext {
- u8 reserved[16];
u8 gid_cnt;
u8 reserved2[3];
u8 gid[][SMC_GID_SIZE];
@@ -370,6 +388,27 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
ntohs(prop_v2ext->hdr.smcd_v2_ext_offset));
}
+static inline struct smc_clc_first_contact_ext *
+smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ bool is_smcd)
+{
+ int clc_v2_len;
+
+ if (clc_v2->hdr.version == SMC_V1 ||
+ !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ return NULL;
+
+ if (is_smcd)
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, d1);
+ else
+ clc_v2_len =
+ offsetofend(struct smc_clc_msg_accept_confirm_v2, r1);
+
+ return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) +
+ clc_v2_len);
+}
+
struct smcd_dev;
struct smc_init_info;
@@ -382,7 +421,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini);
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
- u8 version, u8 *negotiated_eid);
+ u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
+int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+ struct smc_init_info *ini);
+int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
+ struct smc_init_info *ini);
+int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
+ struct smc_init_info *ini);
void smc_clc_init(void) __init;
void smc_clc_exit(void);
void smc_clc_get_hostname(u8 **host);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3f465faf2b68..bd01dd31e4bd 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -319,6 +319,10 @@ static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
+ goto errv2attr;
+ if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
+ goto errv2attr;
nla_nest_end(skb, v2_attrs);
return 0;
@@ -895,9 +899,13 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->uses_gateway = ini->smcrv2.uses_gateway;
memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
ETH_ALEN);
+ lgr->max_conns = ini->max_conns;
+ lgr->max_links = ini->max_links;
} else {
ibdev = ini->ib_dev;
ibport = ini->ib_port;
+ lgr->max_conns = SMC_CONN_PER_LGR_MAX;
+ lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
}
memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
SMC_MAX_PNETID_LEN);
@@ -1664,6 +1672,9 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
!rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
continue;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ continue;
+
/* trigger local add link processing */
link = smc_llc_usable_link(lgr);
if (link)
@@ -1888,7 +1899,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
(ini->smcd_version == SMC_V2 ||
lgr->vlan_id == ini->vlan_id) &&
(role == SMC_CLNT || ini->is_smcd ||
- (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ (lgr->conns_num < lgr->max_conns &&
!bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
ini->first_contact_local = 0;
@@ -2309,31 +2320,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
- int bufsize, bufsize_short;
+ int bufsize, bufsize_comp;
struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- int sk_buf_size;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_rcvbuf;
+ bufsize = smc->sk.sk_rcvbuf / 2;
else
/* use socket send buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_sndbuf;
+ bufsize = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
- bufsize_short >= 0; bufsize_short--) {
+ for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+ bufsize_comp >= 0; bufsize_comp--) {
if (is_rmb) {
lock = &lgr->rmbs_lock;
- buf_list = &lgr->rmbs[bufsize_short];
+ buf_list = &lgr->rmbs[bufsize_comp];
} else {
lock = &lgr->sndbufs_lock;
- buf_list = &lgr->sndbufs[bufsize_short];
+ buf_list = &lgr->sndbufs[bufsize_comp];
}
- bufsize = smc_uncompress_bufsize(bufsize_short);
+ bufsize = smc_uncompress_bufsize(bufsize_comp);
/* check for reusable slot in the link group */
- buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+ buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2387,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
- conn->rmbe_size_short = bufsize_short;
- smc->sk.sk_rcvbuf = bufsize;
+ conn->rmbe_size_comp = bufsize_comp;
+ smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2396,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
- smc->sk.sk_sndbuf = bufsize;
+ smc->sk.sk_sndbuf = bufsize * 2;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 1645fba0d2d3..120027d40469 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -22,6 +22,15 @@
#include "smc_ib.h"
#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */
+#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */
+#define SMC_CONN_PER_LGR_MAX 255 /* max. # of connections per link group,
+ * also is the default value for SMC-R v1 and v2.0
+ */
+#define SMC_CONN_PER_LGR_PREFER 255 /* Preferred connections per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 16-255 as needed.
+ */
struct smc_lgr_list { /* list of link group definition */
struct list_head list;
@@ -164,6 +173,15 @@ struct smc_link {
*/
#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
+#define SMC_LINKS_ADD_LNK_MIN 1 /* min. # of links per link group */
+#define SMC_LINKS_ADD_LNK_MAX 2 /* max. # of links per link group, also is the
+ * default value for smc-r v1.0 and v2.0
+ */
+#define SMC_LINKS_PER_LGR_MAX_PREFER 2 /* Preferred max links per link group used for
+ * SMC-R v2.1 and later negotiation, vendors or
+ * distrubutions may modify it to a value between
+ * 1-2 as needed.
+ */
/* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
struct smc_buf_desc {
@@ -331,6 +349,10 @@ struct smc_link_group {
__be32 saddr;
/* net namespace */
struct net *net;
+ u8 max_conns;
+ /* max conn can be assigned to lgr */
+ u8 max_links;
+ /* max links can be added in lgr */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -374,6 +396,9 @@ struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
u8 smc_type_v2;
+ u8 release_nr;
+ u8 max_conns;
+ u8 max_links;
u8 first_contact_peer;
u8 first_contact_local;
unsigned short vlan_id;
@@ -539,7 +564,6 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini);
-void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 034295676e88..4df5f8c8a0a1 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -96,7 +96,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk);
int smc_ib_create_queue_pair(struct smc_link *lnk);
int smc_ib_ready_link(struct smc_link *lnk);
int smc_ib_modify_qp_rts(struct smc_link *lnk);
-int smc_ib_modify_qp_reset(struct smc_link *lnk);
int smc_ib_modify_qp_error(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 90f0b60b196a..018ce8133b02 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -52,14 +52,13 @@ struct smc_llc_msg_confirm_link { /* type 0x01 */
u8 link_num;
u8 link_uid[SMC_LGR_ID_SIZE];
u8 max_links;
- u8 reserved[9];
+ u8 max_conns;
+ u8 reserved[8];
};
#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
-#define SMC_LLC_ADD_LNK_MAX_LINKS 2
-
struct smc_llc_msg_add_link { /* type 0x02 */
struct smc_llc_hdr hd;
u8 sender_mac[ETH_ALEN];
@@ -471,7 +470,12 @@ int smc_llc_send_confirm_link(struct smc_link *link,
hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
confllc->link_num = link->link_id;
memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
- confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
+ confllc->max_links = SMC_LINKS_ADD_LNK_MAX;
+ if (link->lgr->smc_version == SMC_V2 &&
+ link->lgr->peer_smc_release >= SMC_RELEASE_1) {
+ confllc->max_conns = link->lgr->max_conns;
+ confllc->max_links = link->lgr->max_links;
+ }
/* send llc message */
rc = smc_wr_tx_send(link, pend);
put_out:
@@ -1041,6 +1045,11 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
goto out_reject;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out_reject;
+ }
+
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
ini->check_smcrv2 = true;
@@ -1165,6 +1174,9 @@ static void smc_llc_cli_add_link_invite(struct smc_link *link,
lgr->type == SMC_LGR_ASYMMETRIC_PEER)
goto out;
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
+ goto out;
+
ini = kzalloc(sizeof(*ini), GFP_KERNEL);
if (!ini)
goto out;
@@ -1410,6 +1422,11 @@ int smc_llc_srv_add_link(struct smc_link *link,
goto out;
}
+ if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) {
+ rc = 0;
+ goto out;
+ }
+
/* ignore client add link recommendation, start new flow */
ini->vlan_id = lgr->vlan_id;
if (lgr->smc_version == SMC_V2) {
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 3ab2d8eefc55..5cbc18c6e62b 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -21,6 +21,10 @@
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
static struct ctl_table smc_table[] = {
{
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
+ .extra2 = &max_sndbuf,
},
{
.procname = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
+ .extra2 = &max_rcvbuf,
},
{ }
};
@@ -89,8 +95,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
- WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
- WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+ WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+ WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
return 0;