summaryrefslogtreecommitdiff
path: root/net/smc/smc_core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 16:27:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 16:27:18 -0700
commitcb8e59cc87201af93dfbb6c3dccc8fcad72a09c2 (patch)
treea334db9022f89654b777bbce8c4c6632e65b9031 /net/smc/smc_core.c
parent2e63f6ce7ed2c4ff83ba30ad9ccad422289a6c63 (diff)
parent065fcfd49763ec71ae345bb5c5a74f961031e70e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Allow setting bluetooth L2CAP modes via socket option, from Luiz Augusto von Dentz. 2) Add GSO partial support to igc, from Sasha Neftin. 3) Several cleanups and improvements to r8169 from Heiner Kallweit. 4) Add IF_OPER_TESTING link state and use it when ethtool triggers a device self-test. From Andrew Lunn. 5) Start moving away from custom driver versions, use the globally defined kernel version instead, from Leon Romanovsky. 6) Support GRO vis gro_cells in DSA layer, from Alexander Lobakin. 7) Allow hard IRQ deferral during NAPI, from Eric Dumazet. 8) Add sriov and vf support to hinic, from Luo bin. 9) Support Media Redundancy Protocol (MRP) in the bridging code, from Horatiu Vultur. 10) Support netmap in the nft_nat code, from Pablo Neira Ayuso. 11) Allow UDPv6 encapsulation of ESP in the ipsec code, from Sabrina Dubroca. Also add ipv6 support for espintcp. 12) Lots of ReST conversions of the networking documentation, from Mauro Carvalho Chehab. 13) Support configuration of ethtool rxnfc flows in bcmgenet driver, from Doug Berger. 14) Allow to dump cgroup id and filter by it in inet_diag code, from Dmitry Yakunin. 15) Add infrastructure to export netlink attribute policies to userspace, from Johannes Berg. 16) Several optimizations to sch_fq scheduler, from Eric Dumazet. 17) Fallback to the default qdisc if qdisc init fails because otherwise a packet scheduler init failure will make a device inoperative. From Jesper Dangaard Brouer. 18) Several RISCV bpf jit optimizations, from Luke Nelson. 19) Correct the return type of the ->ndo_start_xmit() method in several drivers, it's netdev_tx_t but many drivers were using 'int'. From Yunjian Wang. 20) Add an ethtool interface for PHY master/slave config, from Oleksij Rempel. 21) Add BPF iterators, from Yonghang Song. 22) Add cable test infrastructure, including ethool interfaces, from Andrew Lunn. Marvell PHY driver is the first to support this facility. 23) Remove zero-length arrays all over, from Gustavo A. R. Silva. 24) Calculate and maintain an explicit frame size in XDP, from Jesper Dangaard Brouer. 25) Add CAP_BPF, from Alexei Starovoitov. 26) Support terse dumps in the packet scheduler, from Vlad Buslov. 27) Support XDP_TX bulking in dpaa2 driver, from Ioana Ciornei. 28) Add devm_register_netdev(), from Bartosz Golaszewski. 29) Minimize qdisc resets, from Cong Wang. 30) Get rid of kernel_getsockopt and kernel_setsockopt in order to eliminate set_fs/get_fs calls. From Christoph Hellwig. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2517 commits) selftests: net: ip_defrag: ignore EPERM net_failover: fixed rollback in net_failover_open() Revert "tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv" Revert "tipc: Fix potential tipc_node refcnt leak in tipc_rcv" vmxnet3: allow rx flow hash ops only when rss is enabled hinic: add set_channels ethtool_ops support selftests/bpf: Add a default $(CXX) value tools/bpf: Don't use $(COMPILE.c) bpf, selftests: Use bpf_probe_read_kernel s390/bpf: Use bcr 0,%0 as tail call nop filler s390/bpf: Maintain 8-byte stack alignment selftests/bpf: Fix verifier test selftests/bpf: Fix sample_cnt shared between two threads bpf, selftests: Adapt cls_redirect to call csum_level helper bpf: Add csum_level helper for fixing up csum levels bpf: Fix up bpf_skb_adjust_room helper's skb csum setting sfc: add missing annotation for efx_ef10_try_update_nic_stats_vf() crypto/chtls: IPv6 support for inline TLS Crypto/chcr: Fixes a coccinile check error Crypto/chcr: Fixes compilations warnings ...
Diffstat (limited to 'net/smc/smc_core.c')
-rw-r--r--net/smc/smc_core.c1175
1 files changed, 935 insertions, 240 deletions
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 824c5211b027..7964a21e5e6f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -44,10 +44,20 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
+struct smc_ib_up_work {
+ struct work_struct work;
+ struct smc_link_group *lgr;
+ struct smc_ib_device *smcibdev;
+ u8 ibport;
+};
+
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
+static void smc_link_up_work(struct work_struct *work);
+static void smc_link_down_work(struct work_struct *work);
+
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
spinlock_t **lgr_lock)
@@ -111,16 +121,60 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
}
+/* assign an SMC-R link to the connection */
+static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
+{
+ enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
+ SMC_LNK_ACTIVE;
+ int i, j;
+
+ /* do link balancing */
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &conn->lgr->lnk[i];
+
+ if (lnk->state != expected || lnk->link_is_asym)
+ continue;
+ if (conn->lgr->role == SMC_CLNT) {
+ conn->lnk = lnk; /* temporary, SMC server assigns link*/
+ break;
+ }
+ if (conn->lgr->conns_num % 2) {
+ for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ struct smc_link *lnk2;
+
+ lnk2 = &conn->lgr->lnk[j];
+ if (lnk2->state == expected &&
+ !lnk2->link_is_asym) {
+ conn->lnk = lnk2;
+ break;
+ }
+ }
+ }
+ if (!conn->lnk)
+ conn->lnk = lnk;
+ break;
+ }
+ if (!conn->lnk)
+ return SMC_CLC_DECL_NOACTLINK;
+ return 0;
+}
+
/* Register connection in link group by assigning an alert token
* registered in a search tree.
* Requires @conns_lock
* Note that '0' is a reserved value and not assigned.
*/
-static void smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
static atomic_t nexttoken = ATOMIC_INIT(0);
+ int rc;
+ if (!conn->lgr->is_smcd) {
+ rc = smcr_lgr_conn_assign_link(conn, first);
+ if (rc)
+ return rc;
+ }
/* find a new alert_token_local value not yet used by some connection
* in this link group
*/
@@ -132,6 +186,7 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
}
smc_lgr_add_alert_token(conn);
conn->lgr->conns_num++;
+ return 0;
}
/* Unregister connection and reset the alert token of the given connection<
@@ -166,27 +221,33 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
void smc_lgr_cleanup_early(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
+ struct list_head *lgr_list;
+ spinlock_t *lgr_lock;
if (!lgr)
return;
smc_conn_free(conn);
- smc_lgr_forget(lgr);
+ lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+ spin_lock_bh(lgr_lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(lgr_list))
+ list_del_init(lgr_list);
+ spin_unlock_bh(lgr_lock);
smc_lgr_schedule_free_work_fast(lgr);
}
-/* Send delete link, either as client to request the initiation
- * of the DELETE LINK sequence from server; or as server to
- * initiate the delete processing. See smc_llc_rx_delete_link().
- */
-static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
+static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
{
- if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
- smc_llc_link_deleting(lnk);
- return 0;
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk))
+ lnk->state = SMC_LNK_INACTIVE;
}
- return -ENOTCONN;
+ wake_up_interruptible_all(&lgr->llc_waiter);
}
static void smc_lgr_free(struct smc_link_group *lgr);
@@ -197,7 +258,6 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group,
free_work);
spinlock_t *lgr_lock;
- struct smc_link *lnk;
bool conns;
smc_lgr_list_head(lgr, &lgr_lock);
@@ -214,26 +274,17 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
-
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- if (!lgr->is_smcd && !lgr->terminating) {
- /* try to send del link msg, on error free lgr immediately */
- if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_link_send_delete(lnk, true)) {
- /* reschedule in case we never receive a response */
- smc_lgr_schedule_free_work(lgr);
- spin_unlock_bh(lgr_lock);
- return;
- }
- }
lgr->freeing = 1; /* this instance does the freeing, no new schedule */
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
- if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
+ if (!lgr->is_smcd && !lgr->terminating)
+ smc_llc_send_link_delete_all(lgr, true,
+ SMC_LLC_DEL_PROG_INIT_TERM);
if (lgr->is_smcd && !lgr->terminating)
smc_ism_signal_shutdown(lgr);
+ if (!lgr->is_smcd)
+ smcr_lgr_link_deactivate_all(lgr);
smc_lgr_free(lgr);
}
@@ -245,6 +296,89 @@ static void smc_lgr_terminate_work(struct work_struct *work)
__smc_lgr_terminate(lgr, true);
}
+/* return next unique link id for the lgr */
+static u8 smcr_next_link_id(struct smc_link_group *lgr)
+{
+ u8 link_id;
+ int i;
+
+ while (1) {
+ link_id = ++lgr->next_link_id;
+ if (!link_id) /* skip zero as link_id */
+ link_id = ++lgr->next_link_id;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (smc_link_usable(&lgr->lnk[i]) &&
+ lgr->lnk[i].link_id == link_id)
+ continue;
+ }
+ break;
+ }
+ return link_id;
+}
+
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+ u8 link_idx, struct smc_init_info *ini)
+{
+ u8 rndvec[3];
+ int rc;
+
+ get_device(&ini->ib_dev->ibdev->dev);
+ atomic_inc(&ini->ib_dev->lnk_cnt);
+ lnk->state = SMC_LNK_ACTIVATING;
+ lnk->link_id = smcr_next_link_id(lgr);
+ lnk->lgr = lgr;
+ lnk->link_idx = link_idx;
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ smc_llc_link_set_uid(lnk);
+ INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
+ if (!ini->ib_dev->initialized) {
+ rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+ if (rc)
+ goto out;
+ }
+ get_random_bytes(rndvec, sizeof(rndvec));
+ lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+ (rndvec[2] << 16);
+ rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+ ini->vlan_id, lnk->gid, &lnk->sgid_index);
+ if (rc)
+ goto out;
+ rc = smc_llc_link_init(lnk);
+ if (rc)
+ goto out;
+ rc = smc_wr_alloc_link_mem(lnk);
+ if (rc)
+ goto clear_llc_lnk;
+ rc = smc_ib_create_protection_domain(lnk);
+ if (rc)
+ goto free_link_mem;
+ rc = smc_ib_create_queue_pair(lnk);
+ if (rc)
+ goto dealloc_pd;
+ rc = smc_wr_create_link(lnk);
+ if (rc)
+ goto destroy_qp;
+ return 0;
+
+destroy_qp:
+ smc_ib_destroy_queue_pair(lnk);
+dealloc_pd:
+ smc_ib_dealloc_protection_domain(lnk);
+free_link_mem:
+ smc_wr_free_link_mem(lnk);
+clear_llc_lnk:
+ smc_llc_link_clear(lnk, false);
+out:
+ put_device(&ini->ib_dev->ibdev->dev);
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
+ wake_up(&ini->ib_dev->lnks_deleted);
+ return rc;
+}
+
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
@@ -252,7 +386,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
struct list_head *lgr_list;
struct smc_link *lnk;
spinlock_t *lgr_lock;
- u8 rndvec[3];
+ u8 link_idx;
int rc = 0;
int i;
@@ -274,13 +408,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
- rwlock_init(&lgr->sndbufs_lock);
- rwlock_init(&lgr->rmbs_lock);
+ mutex_init(&lgr->sndbufs_lock);
+ mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
}
+ lgr->next_link_id = 0;
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
@@ -297,48 +432,21 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev->lgr_cnt);
} else {
/* SMC-R specific settings */
- get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
+ memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+ SMC_MAX_PNETID_LEN);
+ smc_llc_lgr_init(lgr, smc);
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- /* initialize link */
- lnk->state = SMC_LNK_ACTIVATING;
- lnk->link_id = SMC_SINGLE_LINK;
- lnk->smcibdev = ini->ib_dev;
- lnk->ibport = ini->ib_port;
- lgr_list = &smc_lgr_list.list;
- lgr_lock = &smc_lgr_list.lock;
- lnk->path_mtu =
- ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
- if (!ini->ib_dev->initialized)
- smc_ib_setup_per_ibdev(ini->ib_dev);
- get_random_bytes(rndvec, sizeof(rndvec));
- lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
- (rndvec[2] << 16);
- rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- ini->vlan_id, lnk->gid,
- &lnk->sgid_index);
+ link_idx = SMC_SINGLE_LINK;
+ lnk = &lgr->lnk[link_idx];
+ rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
- rc = smc_llc_link_init(lnk);
- if (rc)
- goto free_lgr;
- rc = smc_wr_alloc_link_mem(lnk);
- if (rc)
- goto clear_llc_lnk;
- rc = smc_ib_create_protection_domain(lnk);
- if (rc)
- goto free_link_mem;
- rc = smc_ib_create_queue_pair(lnk);
- if (rc)
- goto dealloc_pd;
- rc = smc_wr_create_link(lnk);
- if (rc)
- goto destroy_qp;
+ lgr_list = &smc_lgr_list.list;
+ lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
- atomic_inc(&ini->ib_dev->lnk_cnt);
}
smc->conn.lgr = lgr;
spin_lock_bh(lgr_lock);
@@ -346,14 +454,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
-destroy_qp:
- smc_ib_destroy_queue_pair(lnk);
-dealloc_pd:
- smc_ib_dealloc_protection_domain(lnk);
-free_link_mem:
- smc_wr_free_link_mem(lnk);
-clear_llc_lnk:
- smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
ism_put_vlan:
@@ -369,29 +469,186 @@ out:
return rc;
}
+static int smc_write_space(struct smc_connection *conn)
+{
+ int buffer_len = conn->peer_rmbe_size;
+ union smc_host_cursor prod;
+ union smc_host_cursor cons;
+ int space;
+
+ smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
+ smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+ /* determine rx_buf space */
+ space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
+ return space;
+}
+
+static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
+ struct smc_wr_buf *wr_buf)
+{
+ struct smc_connection *conn = &smc->conn;
+ union smc_host_cursor cons, fin;
+ int rc = 0;
+ int diff;
+
+ smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
+ smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
+ /* set prod cursor to old state, enforce tx_rdma_writes() */
+ smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
+ smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+
+ if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
+ /* cons cursor advanced more than fin, and prod was set
+ * fin above, so now prod is smaller than cons. Fix that.
+ */
+ diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
+ smc_curs_add(conn->sndbuf_desc->len,
+ &conn->tx_curs_sent, diff);
+ smc_curs_add(conn->sndbuf_desc->len,
+ &conn->tx_curs_fin, diff);
+
+ smp_mb__before_atomic();
+ atomic_add(diff, &conn->sndbuf_space);
+ smp_mb__after_atomic();
+
+ smc_curs_add(conn->peer_rmbe_size,
+ &conn->local_tx_ctrl.prod, diff);
+ smc_curs_add(conn->peer_rmbe_size,
+ &conn->local_tx_ctrl_fin, diff);
+ }
+ /* recalculate, value is used by tx_rdma_writes() */
+ atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
+
+ if (smc->sk.sk_state != SMC_INIT &&
+ smc->sk.sk_state != SMC_CLOSED) {
+ rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
+ if (!rc) {
+ schedule_delayed_work(&conn->tx_work, 0);
+ smc->sk.sk_data_ready(&smc->sk);
+ }
+ } else {
+ smc_wr_tx_put_slot(conn->lnk,
+ (struct smc_wr_tx_pend_priv *)pend);
+ }
+ return rc;
+}
+
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+ struct smc_link *from_lnk, bool is_dev_err)
+{
+ struct smc_link *to_lnk = NULL;
+ struct smc_cdc_tx_pend *pend;
+ struct smc_connection *conn;
+ struct smc_wr_buf *wr_buf;
+ struct smc_sock *smc;
+ struct rb_node *node;
+ int i, rc = 0;
+
+ /* link is inactive, wake up tx waiters */
+ smc_wr_wakeup_tx_wait(from_lnk);
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
+ i == from_lnk->link_idx)
+ continue;
+ if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
+ from_lnk->ibport == lgr->lnk[i].ibport) {
+ continue;
+ }
+ to_lnk = &lgr->lnk[i];
+ break;
+ }
+ if (!to_lnk) {
+ smc_lgr_terminate_sched(lgr);
+ return NULL;
+ }
+again:
+ read_lock_bh(&lgr->conns_lock);
+ for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
+ conn = rb_entry(node, struct smc_connection, alert_node);
+ if (conn->lnk != from_lnk)
+ continue;
+ smc = container_of(conn, struct smc_sock, conn);
+ /* conn->lnk not yet set in SMC_INIT state */
+ if (smc->sk.sk_state == SMC_INIT)
+ continue;
+ if (smc->sk.sk_state == SMC_CLOSED ||
+ smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
+ smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
+ smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
+ smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
+ smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
+ smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
+ smc->sk.sk_state == SMC_PEERABORTWAIT ||
+ smc->sk.sk_state == SMC_PROCESSABORT) {
+ spin_lock_bh(&conn->send_lock);
+ conn->lnk = to_lnk;
+ spin_unlock_bh(&conn->send_lock);
+ continue;
+ }
+ sock_hold(&smc->sk);
+ read_unlock_bh(&lgr->conns_lock);
+ /* pre-fetch buffer outside of send_lock, might sleep */
+ rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
+ if (rc) {
+ smcr_link_down_cond_sched(to_lnk);
+ return NULL;
+ }
+ /* avoid race with smcr_tx_sndbuf_nonempty() */
+ spin_lock_bh(&conn->send_lock);
+ conn->lnk = to_lnk;
+ rc = smc_switch_cursor(smc, pend, wr_buf);
+ spin_unlock_bh(&conn->send_lock);
+ sock_put(&smc->sk);
+ if (rc) {
+ smcr_link_down_cond_sched(to_lnk);
+ return NULL;
+ }
+ goto again;
+ }
+ read_unlock_bh(&lgr->conns_lock);
+ return to_lnk;
+}
+
+static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+ struct smc_link_group *lgr)
+{
+ int rc;
+
+ if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+ /* unregister rmb with peer */
+ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+ if (!rc) {
+ /* protect against smc_llc_cli_rkey_exchange() */
+ mutex_lock(&lgr->llc_conf_mutex);
+ smc_llc_do_delete_rkey(lgr, rmb_desc);
+ rmb_desc->is_conf_rkey = false;
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ }
+ }
+
+ if (rmb_desc->is_reg_err) {
+ /* buf registration failed, reuse not possible */
+ mutex_lock(&lgr->rmbs_lock);
+ list_del(&rmb_desc->list);
+ mutex_unlock(&lgr->rmbs_lock);
+
+ smc_buf_free(lgr, true, rmb_desc);
+ } else {
+ rmb_desc->used = 0;
+ }
+}
+
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0;
- if (conn->rmb_desc) {
- if (!conn->rmb_desc->regerr) {
- if (!lgr->is_smcd && !list_empty(&lgr->list)) {
- /* unregister rmb with peer */
- smc_llc_do_delete_rkey(
- &lgr->lnk[SMC_SINGLE_LINK],
- conn->rmb_desc);
- }
- conn->rmb_desc->used = 0;
- } else {
- /* buf registration failed, reuse not possible */
- write_lock_bh(&lgr->rmbs_lock);
- list_del(&conn->rmb_desc->list);
- write_unlock_bh(&lgr->rmbs_lock);
-
- smc_buf_free(lgr, true, conn->rmb_desc);
- }
- }
+ if (conn->rmb_desc && lgr->is_smcd)
+ conn->rmb_desc->used = 0;
+ else if (conn->rmb_desc)
+ smcr_buf_unuse(conn->rmb_desc, lgr);
}
/* remove a finished connection from its link group */
@@ -407,6 +664,8 @@ void smc_conn_free(struct smc_connection *conn)
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
+ if (current_work() != &conn->abort_work)
+ cancel_work_sync(&conn->abort_work);
}
if (!list_empty(&lgr->list)) {
smc_lgr_unregister_conn(conn);
@@ -417,35 +676,91 @@ void smc_conn_free(struct smc_connection *conn)
smc_lgr_schedule_free_work(lgr);
}
-static void smc_link_clear(struct smc_link *lnk)
+/* unregister a link from a buf_desc */
+static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+ struct smc_link *lnk)
+{
+ if (is_rmb)
+ buf_desc->is_reg_mr[lnk->link_idx] = false;
+ if (!buf_desc->is_map_ib[lnk->link_idx])
+ return;
+ if (is_rmb) {
+ if (buf_desc->mr_rx[lnk->link_idx]) {
+ smc_ib_put_memory_region(
+ buf_desc->mr_rx[lnk->link_idx]);
+ buf_desc->mr_rx[lnk->link_idx] = NULL;
+ }
+ smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
+ } else {
+ smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+ }
+ sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+ buf_desc->is_map_ib[lnk->link_idx] = false;
+}
+
+/* unmap all buffers of lgr for a deleted link */
+static void smcr_buf_unmap_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_buf_desc *buf_desc, *bf;
+ int i;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ mutex_lock(&lgr->rmbs_lock);
+ list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
+ smcr_buf_unmap_link(buf_desc, true, lnk);
+ mutex_unlock(&lgr->rmbs_lock);
+ mutex_lock(&lgr->sndbufs_lock);
+ list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
+ list)
+ smcr_buf_unmap_link(buf_desc, false, lnk);
+ mutex_unlock(&lgr->sndbufs_lock);
+ }
+}
+
+static void smcr_rtoken_clear_link(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ lgr->rtokens[i][lnk->link_idx].rkey = 0;
+ lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
+ }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk, bool log)
{
+ struct smc_ib_device *smcibdev;
+
+ if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+ return;
lnk->peer_qpn = 0;
- smc_llc_link_clear(lnk);
+ smc_llc_link_clear(lnk, log);
+ smcr_buf_unmap_lgr(lnk);
+ smcr_rtoken_clear_link(lnk);
smc_ib_modify_qp_reset(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
- if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
- wake_up(&lnk->smcibdev->lnks_deleted);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
- if (is_rmb) {
- if (buf_desc->mr_rx[SMC_SINGLE_LINK])
- smc_ib_put_memory_region(
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
- DMA_FROM_DEVICE);
- } else {
- smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
- DMA_TO_DEVICE);
- }
- sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
if (buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
kfree(buf_desc);
@@ -503,6 +818,18 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
+ int i;
+
+ if (!lgr->is_smcd) {
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+ smcr_link_clear(&lgr->lnk[i], false);
+ }
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_lgr_clear(lgr);
+ }
+
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!lgr->terminating) {
@@ -512,27 +839,12 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
- smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
- put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
kfree(lgr);
}
-void smc_lgr_forget(struct smc_link_group *lgr)
-{
- struct list_head *lgr_list;
- spinlock_t *lgr_lock;
-
- lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
- spin_lock_bh(lgr_lock);
- /* do not use this link group for new connections */
- if (!list_empty(lgr_list))
- list_del_init(lgr_list);
- spin_unlock_bh(lgr_lock);
-}
-
static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
{
int i;
@@ -587,10 +899,12 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ u32 rsn = lgr->llc_termination_rsn;
- if (lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
+ if (!rsn)
+ rsn = SMC_LLC_DEL_PROG_INIT_TERM;
+ smc_llc_send_link_delete_all(lgr, false, rsn);
+ smcr_lgr_link_deactivate_all(lgr);
}
}
@@ -606,11 +920,9 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
if (lgr->terminating)
return; /* lgr already terminating */
- if (!soft)
- cancel_delayed_work_sync(&lgr->free_work);
+ /* cancel free_work sync, will terminate when lgr->freeing is set */
+ cancel_delayed_work_sync(&lgr->free_work);
lgr->terminating = 1;
- if (!lgr->is_smcd)
- smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
/* kill remaining link group connections */
read_lock_bh(&lgr->conns_lock);
@@ -629,10 +941,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
}
read_unlock_bh(&lgr->conns_lock);
smc_lgr_cleanup(lgr);
- if (soft)
- smc_lgr_schedule_free_work_fast(lgr);
- else
- smc_lgr_free(lgr);
+ smc_lgr_free(lgr);
}
/* unlink link group and schedule termination */
@@ -647,33 +956,11 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
return; /* lgr already terminating */
}
list_del_init(&lgr->list);
+ lgr->freeing = 1;
spin_unlock_bh(lgr_lock);
schedule_work(&lgr->terminate_work);
}
-/* Called when IB port is terminated */
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
-{
- struct smc_link_group *lgr, *l;
- LIST_HEAD(lgr_free_list);
-
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
- if (!lgr->is_smcd &&
- lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
- lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
- list_move(&lgr->list, &lgr_free_list);
- lgr->freeing = 1;
- }
- }
- spin_unlock_bh(&smc_lgr_list.lock);
-
- list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
- list_del_init(&lgr->list);
- __smc_lgr_terminate(lgr, false);
- }
-}
-
/* Called when peer lgr shutdown (regularly or abnormally) is received */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
@@ -688,6 +975,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
if (peer_gid) /* peer triggered termination */
lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
+ lgr->freeing = 1;
}
}
spin_unlock_bh(&dev->lgr_lock);
@@ -728,6 +1016,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
+ int i;
spin_lock_bh(&smc_lgr_list.lock);
if (!smcibdev) {
@@ -736,9 +1025,9 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
lgr->freeing = 1;
} else {
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
- if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
- list_move(&lgr->list, &lgr_free_list);
- lgr->freeing = 1;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].smcibdev == smcibdev)
+ smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
@@ -746,6 +1035,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
list_del_init(&lgr->list);
+ smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
__smc_lgr_terminate(lgr, false);
}
@@ -759,6 +1049,225 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
}
}
+/* set new lgr type and clear all asymmetric link tagging */
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
+{
+ char *lgr_type = "";
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (smc_link_usable(&lgr->lnk[i]))
+ lgr->lnk[i].link_is_asym = false;
+ if (lgr->type == new_type)
+ return;
+ lgr->type = new_type;
+
+ switch (lgr->type) {
+ case SMC_LGR_NONE:
+ lgr_type = "NONE";
+ break;
+ case SMC_LGR_SINGLE:
+ lgr_type = "SINGLE";
+ break;
+ case SMC_LGR_SYMMETRIC:
+ lgr_type = "SYMMETRIC";
+ break;
+ case SMC_LGR_ASYMMETRIC_PEER:
+ lgr_type = "ASYMMETRIC_PEER";
+ break;
+ case SMC_LGR_ASYMMETRIC_LOCAL:
+ lgr_type = "ASYMMETRIC_LOCAL";
+ break;
+ }
+ pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
+ "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
+ lgr_type, lgr->pnet_id);
+}
+
+/* set new lgr type and tag a link as asymmetric */
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+ enum smc_lgr_type new_type, int asym_lnk_idx)
+{
+ smcr_lgr_set_type(lgr, new_type);
+ lgr->lnk[asym_lnk_idx].link_is_asym = true;
+}
+
+/* abort connection, abort_work scheduled from tasklet context */
+static void smc_conn_abort_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(work,
+ struct smc_connection,
+ abort_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ smc_conn_kill(conn, true);
+ sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
+}
+
+/* link is up - establish alternate link if applicable */
+static void smcr_link_up(struct smc_link_group *lgr,
+ struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link *link = NULL;
+
+ if (list_empty(&lgr->list) ||
+ lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ return;
+
+ if (lgr->role == SMC_SERV) {
+ /* trigger local add link processing */
+ link = smc_llc_usable_link(lgr);
+ if (!link)
+ return;
+ smc_llc_srv_add_link_local(link);
+ } else {
+ /* invite server to start add link processing */
+ u8 gid[SMC_GID_SIZE];
+
+ if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
+ NULL))
+ return;
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* some other llc task is ongoing */
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ SMC_LLC_WAIT_TIME);
+ }
+ if (list_empty(&lgr->list) ||
+ !smc_ib_port_active(smcibdev, ibport))
+ return; /* lgr or device no longer active */
+ link = smc_llc_usable_link(lgr);
+ if (!link)
+ return;
+ smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
+ NULL, SMC_LLC_REQ);
+ }
+}
+
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_ib_up_work *ib_work;
+ struct smc_link_group *lgr, *n;
+
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN) ||
+ lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ continue;
+ ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
+ if (!ib_work)
+ continue;
+ INIT_WORK(&ib_work->work, smc_link_up_work);
+ ib_work->lgr = lgr;
+ ib_work->smcibdev = smcibdev;
+ ib_work->ibport = ibport;
+ schedule_work(&ib_work->work);
+ }
+}
+
+/* link is down - switch connections to alternate link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+static void smcr_link_down(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_link *to_lnk;
+ int del_link_id;
+
+ if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+ return;
+
+ smc_ib_modify_qp_reset(lnk);
+ to_lnk = smc_switch_conns(lgr, lnk, true);
+ if (!to_lnk) { /* no backup link available */
+ smcr_link_clear(lnk, true);
+ return;
+ }
+ smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
+ del_link_id = lnk->link_id;
+
+ if (lgr->role == SMC_SERV) {
+ /* trigger local delete link processing */
+ smc_llc_srv_delete_link_local(to_lnk, del_link_id);
+ } else {
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* another llc task is ongoing */
+ mutex_unlock(&lgr->llc_conf_mutex);
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ SMC_LLC_WAIT_TIME);
+ mutex_lock(&lgr->llc_conf_mutex);
+ }
+ smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
+ SMC_LLC_DEL_LOST_PATH);
+ }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_down_cond(struct smc_link *lnk)
+{
+ if (smc_link_downing(&lnk->state))
+ smcr_link_down(lnk);
+}
+
+/* will get the lgr->llc_conf_mutex lock */
+void smcr_link_down_cond_sched(struct smc_link *lnk)
+{
+ if (smc_link_downing(&lnk->state))
+ schedule_work(&lnk->link_down_wrk);
+}
+
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *n;
+ int i;
+
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN))
+ continue; /* lgr is not affected */
+ if (list_empty(&lgr->list))
+ continue;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk) &&
+ lnk->smcibdev == smcibdev && lnk->ibport == ibport)
+ smcr_link_down_cond_sched(lnk);
+ }
+ }
+}
+
+static void smc_link_up_work(struct work_struct *work)
+{
+ struct smc_ib_up_work *ib_work = container_of(work,
+ struct smc_ib_up_work,
+ work);
+ struct smc_link_group *lgr = ib_work->lgr;
+
+ if (list_empty(&lgr->list))
+ goto out;
+ smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
+out:
+ kfree(ib_work);
+}
+
+static void smc_link_down_work(struct work_struct *work)
+{
+ struct smc_link *link = container_of(work, struct smc_link,
+ link_down_wrk);
+ struct smc_link_group *lgr = link->lgr;
+
+ if (list_empty(&lgr->list))
+ return;
+ wake_up_interruptible_all(&lgr->llc_waiter);
+ mutex_lock(&lgr->llc_conf_mutex);
+ smcr_link_down(link);
+ mutex_unlock(&lgr->llc_conf_mutex);
+}
+
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
@@ -810,15 +1319,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
enum smc_lgr_role role, u32 clcqpn)
{
- return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
- SMC_SYSTEMID_LEN) &&
- !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
- SMC_GID_SIZE) &&
- !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
- sizeof(lcl->mac)) &&
- lgr->role == role &&
- (lgr->role == SMC_SERV ||
- lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
+ int i;
+
+ if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+ lgr->role != role)
+ return false;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ continue;
+ if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
+ !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
+ !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+ return true;
+ }
+ return false;
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -859,15 +1374,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
- smc_lgr_register_conn(conn); /* add smc conn to lgr */
- if (delayed_work_pending(&lgr->free_work))
- cancel_delayed_work(&lgr->free_work);
+ rc = smc_lgr_register_conn(conn, false);
write_unlock_bh(&lgr->conns_lock);
+ if (!rc && delayed_work_pending(&lgr->free_work))
+ cancel_delayed_work(&lgr->free_work);
break;
}
write_unlock_bh(&lgr->conns_lock);
}
spin_unlock_bh(lgr_lock);
+ if (rc)
+ return rc;
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -885,12 +1402,15 @@ create:
goto out;
lgr = conn->lgr;
write_lock_bh(&lgr->conns_lock);
- smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ rc = smc_lgr_register_conn(conn, true);
write_unlock_bh(&lgr->conns_lock);
+ if (rc)
+ goto out;
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
conn->urg_state = SMC_URG_READ;
+ INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
if (ini->is_smcd) {
conn->rx_off = sizeof(struct smcd_cdc_msg);
smcd_cdc_rx_init(conn); /* init tasklet for this conn */
@@ -934,19 +1454,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- rwlock_t *lock,
+ struct mutex *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- read_lock_bh(lock);
+ mutex_lock(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- read_unlock_bh(lock);
+ mutex_unlock(lock);
return buf_slot;
}
}
- read_unlock_bh(lock);
+ mutex_unlock(lock);
return NULL;
}
@@ -959,12 +1479,135 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
+/* map an rmb buf to a link */
+static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+ struct smc_link *lnk)
+{
+ int rc;
+
+ if (buf_desc->is_map_ib[lnk->link_idx])
+ return 0;
+
+ rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+ if (rc)
+ return rc;
+ sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+ buf_desc->cpu_addr, buf_desc->len);
+
+ /* map sg table to DMA address */
+ rc = smc_ib_buf_map_sg(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ /* SMC protocol depends on mapping to one DMA address only */
+ if (rc != 1) {
+ rc = -EAGAIN;
+ goto free_table;
+ }
+
+ /* create a new memory region for the RMB */
+ if (is_rmb) {
+ rc = smc_ib_get_memory_region(lnk->roce_pd,
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_LOCAL_WRITE,
+ buf_desc, lnk->link_idx);
+ if (rc)
+ goto buf_unmap;
+ smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+ }
+ buf_desc->is_map_ib[lnk->link_idx] = true;
+ return 0;
+
+buf_unmap:
+ smc_ib_buf_unmap_sg(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+free_table:
+ sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+ return rc;
+}
+
+/* register a new rmb on IB device,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+ if (list_empty(&link->lgr->list))
+ return -ENOLINK;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ /* register memory region for new rmb */
+ if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
+ rmb_desc->is_reg_err = true;
+ return -EFAULT;
+ }
+ rmb_desc->is_reg_mr[link->link_idx] = true;
+ }
+ return 0;
+}
+
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+ struct list_head *lst, bool is_rmb)
+{
+ struct smc_buf_desc *buf_desc, *bf;
+ int rc = 0;
+
+ mutex_lock(lock);
+ list_for_each_entry_safe(buf_desc, bf, lst, list) {
+ if (!buf_desc->used)
+ continue;
+ rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
+ if (rc)
+ goto out;
+ }
+out:
+ mutex_unlock(lock);
+ return rc;
+}
+
+/* map all used buffers of lgr for a new link */
+int smcr_buf_map_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ int i, rc = 0;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
+ &lgr->rmbs[i], true);
+ if (rc)
+ return rc;
+ rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
+ &lgr->sndbufs[i], false);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+/* register all used buffers of lgr for a new link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_buf_reg_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_buf_desc *buf_desc, *bf;
+ int i, rc = 0;
+
+ mutex_lock(&lgr->rmbs_lock);
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
+ if (!buf_desc->used)
+ continue;
+ rc = smcr_link_reg_rmb(lnk, buf_desc);
+ if (rc)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+}
+
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
- struct smc_link *lnk;
- int rc;
/* try to alloc a new buffer */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
@@ -981,41 +1624,33 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
return ERR_PTR(-EAGAIN);
}
buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
+ buf_desc->len = bufsize;
+ return buf_desc;
+}
- /* build the sg table from the pages */
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
- GFP_KERNEL);
- if (rc) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(rc);
- }
- sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
- buf_desc->cpu_addr, bufsize);
+/* map buf_desc on all usable links,
+ * unused buffers stay mapped as long as the link is up
+ */
+static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ struct smc_buf_desc *buf_desc, bool is_rmb)
+{
+ int i, rc = 0;
- /* map sg table to DMA address */
- rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
- is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
- /* SMC protocol depends on mapping to one DMA address only */
- if (rc != 1) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(-EAGAIN);
- }
+ /* protect against parallel link reconfiguration */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
- /* create a new memory region for the RMB */
- if (is_rmb) {
- rc = smc_ib_get_memory_region(lnk->roce_pd,
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE,
- buf_desc);
- if (rc) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(rc);
+ if (!smc_link_usable(lnk))
+ continue;
+ if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
+ rc = -ENOMEM;
+ goto out;
}
}
-
- buf_desc->len = bufsize;
- return buf_desc;
+out:
+ mutex_unlock(&lgr->llc_conf_mutex);
+ return rc;
}
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
@@ -1062,8 +1697,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct mutex *lock; /* lock buffer list */
int sk_buf_size;
- rwlock_t *lock;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
@@ -1104,15 +1739,22 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
continue;
buf_desc->used = 1;
- write_lock_bh(lock);
+ mutex_lock(lock);
list_add(&buf_desc->list, buf_list);
- write_unlock_bh(lock);
+ mutex_unlock(lock);
break; /* found */
}
if (IS_ERR(buf_desc))
return -ENOMEM;
+ if (!is_smcd) {
+ if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
+ smcr_buf_unuse(buf_desc, lgr);
+ return -ENOMEM;
+ }
+ }
+
if (is_rmb) {
conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short;
@@ -1132,42 +1774,44 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
- smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->sndbuf_desc, DMA_TO_DEVICE);
+ smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
- smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->sndbuf_desc, DMA_TO_DEVICE);
+ smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
+ int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
- smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->rmb_desc, DMA_FROM_DEVICE);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&conn->lgr->lnk[i]))
+ continue;
+ smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
+ DMA_FROM_DEVICE);
+ }
}
void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
+ int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
- smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->rmb_desc, DMA_FROM_DEVICE);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&conn->lgr->lnk[i]))
+ continue;
+ smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
+ DMA_FROM_DEVICE);
+ }
}
/* create the send and receive buffer for an SMC socket;
@@ -1202,16 +1846,64 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
+static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
+ u32 rkey)
+{
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (test_bit(i, lgr->rtokens_used_mask) &&
+ lgr->rtokens[i][lnk_idx].rkey == rkey)
+ return i;
+ }
+ return -ENOENT;
+}
+
+/* set rtoken for a new link to an existing rmb */
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+ __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
+{
+ int rtok_idx;
+
+ rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
+ if (rtok_idx == -ENOENT)
+ return;
+ lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
+ lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
+}
+
+/* set rtoken for a new link whose link_id is given */
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+ __be64 nw_vaddr, __be32 nw_rkey)
+{
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
+ bool found = false;
+ int link_idx;
+
+ for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
+ if (lgr->lnk[link_idx].link_id == link_id) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
+ lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
+}
+
/* add a new rtoken from peer */
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
{
+ struct smc_link_group *lgr = smc_get_lgr(lnk);
u64 dma_addr = be64_to_cpu(nw_vaddr);
u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
- if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
- (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
+ if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
+ lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
test_bit(i, lgr->rtokens_used_mask)) {
/* already in list */
return i;
@@ -1220,23 +1912,25 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
i = smc_rmb_reserve_rtoken_idx(lgr);
if (i < 0)
return i;
- lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ lgr->rtokens[i][lnk->link_idx].rkey = rkey;
+ lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
return i;
}
-/* delete an rtoken */
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+/* delete an rtoken from all links */
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
{
+ struct smc_link_group *lgr = smc_get_lgr(lnk);
u32 rkey = ntohl(nw_rkey);
- int i;
+ int i, j;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
- if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
test_bit(i, lgr->rtokens_used_mask)) {
- lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
- lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
-
+ for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ lgr->rtokens[i][j].rkey = 0;
+ lgr->rtokens[i][j].dma_addr = 0;
+ }
clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
@@ -1246,9 +1940,10 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
- conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;