summaryrefslogtreecommitdiff
path: root/net/smc/smc_wr.c
diff options
context:
space:
mode:
authorKai Shen <KaiShen@linux.alibaba.com>2023-03-17 03:21:32 +0000
committerDavid S. Miller <davem@davemloft.net>2023-03-17 08:59:01 +0000
commit79a22238b4f22c45cadd3b4040d644f4de320d1b (patch)
treed2615b104eee4ca8404dfb4afd1eb488f64a0f81 /net/smc/smc_wr.c
parentd27d367d3b7861d9d6c852450d935507ba4044ee (diff)
net/smc: Use percpu ref for wr tx reference
The refcount wr_tx_refcnt may cause cache thrashing problems among cores and we can use percpu ref to mitigate this issue here. We gain some performance improvement with percpu ref here on our customized smc-r verion. Applying cache alignment may also mitigate this problem but it seem more reasonable to use percpu ref here. We can also replace wr_reg_refcnt with one percpu reference like wr_tx_refcnt. redis-benchmark on smc-r with atomic wr_tx_refcnt: SET: 525707.06 requests per second, p50=0.087 msec GET: 554877.38 requests per second, p50=0.087 msec redis-benchmark on the percpu_ref version: SET: 540482.06 requests per second, p50=0.087 msec GET: 570711.12 requests per second, p50=0.079 msec Cases are like "redis-benchmark -h x.x.x.x -q -t set,get -P 1 -n 5000000 -c 50 -d 10 --threads 4". Signed-off-by: Kai Shen <KaiShen@linux.alibaba.com> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/smc/smc_wr.c')
-rw-r--r--net/smc/smc_wr.c35
1 files changed, 28 insertions, 7 deletions
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index b0678a417e09..0021065a600a 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -377,12 +377,11 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
if (rc)
return rc;
- atomic_inc(&link->wr_reg_refcnt);
+ percpu_ref_get(&link->wr_reg_refs);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
- if (atomic_dec_and_test(&link->wr_reg_refcnt))
- wake_up_all(&link->wr_reg_wait);
+ percpu_ref_put(&link->wr_reg_refs);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
@@ -647,8 +646,10 @@ void smc_wr_free_link(struct smc_link *lnk)
smc_wr_wakeup_tx_wait(lnk);
smc_wr_tx_wait_no_pending_sends(lnk);
- wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
- wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
+ percpu_ref_kill(&lnk->wr_reg_refs);
+ wait_for_completion(&lnk->reg_ref_comp);
+ percpu_ref_kill(&lnk->wr_tx_refs);
+ wait_for_completion(&lnk->tx_ref_comp);
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -847,6 +848,20 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev)
tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn);
}
+static void smcr_wr_tx_refs_free(struct percpu_ref *ref)
+{
+ struct smc_link *lnk = container_of(ref, struct smc_link, wr_tx_refs);
+
+ complete(&lnk->tx_ref_comp);
+}
+
+static void smcr_wr_reg_refs_free(struct percpu_ref *ref)
+{
+ struct smc_link *lnk = container_of(ref, struct smc_link, wr_reg_refs);
+
+ complete(&lnk->reg_ref_comp);
+}
+
int smc_wr_create_link(struct smc_link *lnk)
{
struct ib_device *ibdev = lnk->smcibdev->ibdev;
@@ -890,9 +905,15 @@ int smc_wr_create_link(struct smc_link *lnk)
smc_wr_init_sge(lnk);
bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
init_waitqueue_head(&lnk->wr_tx_wait);
- atomic_set(&lnk->wr_tx_refcnt, 0);
+ rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL);
+ if (rc)
+ goto dma_unmap;
+ init_completion(&lnk->tx_ref_comp);
init_waitqueue_head(&lnk->wr_reg_wait);
- atomic_set(&lnk->wr_reg_refcnt, 0);
+ rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL);
+ if (rc)
+ goto dma_unmap;
+ init_completion(&lnk->reg_ref_comp);
init_waitqueue_head(&lnk->wr_rx_empty_wait);
return rc;