summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/sdma.c
diff options
context:
space:
mode:
authorKaike Wan <kaike.wan@intel.com>2017-07-24 07:45:37 -0700
committerDoug Ledford <dledford@redhat.com>2017-07-31 15:17:54 -0400
commitbcad29137a9731bfa5e16d64bf8e8a71a268ac88 (patch)
treef7c1706567ae5e887a5046607a5e8d11f837ab24 /drivers/infiniband/hw/hfi1/sdma.c
parentcb51c5d2cda855302910ab352f3d391c1a00aba0 (diff)
IB/hfi1: Serve the most starved iowait entry first
When an egress resource(SDMA descriptors, pio credits) is not available, a sending thread will be put on the resource's wait queue. When the resource becomes available again, up to a fixed number of sending threads can be awakened sequentially and removed from the wait queue, depending on the number of waiting threads and the number of free resources. Since each awakened sending thread will send as many packets as possible, it is highly likely that the first sending thread will consume all the egress resources. Subsequently, it will be put back to the end of the wait queue. Depending on the timing when the later sending threads wake up, they may not be able to send any packet and be again put back to the end of the wait queue sequentially, right behind the first sending thread. This starvation cycle continues until some sending threads exceed their retry limit and consequently fail. This patch fixes the issue by two simple approaches: (1) Any starved sending thread will be put to the head of the wait queue while a served sending thread will be put to the tail; (2) The most starved sending thread will be served first. Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Kaike Wan <kaike.wan@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/sdma.c')
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c34
1 files changed, 24 insertions, 10 deletions
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index d82ff57214c5..71b4258545c4 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -246,7 +246,7 @@ static void __sdma_process_event(
enum sdma_events event);
static void dump_sdma_state(struct sdma_engine *sde);
static void sdma_make_progress(struct sdma_engine *sde, u64 status);
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail);
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
static void sdma_flush_descq(struct sdma_engine *sde);
/**
@@ -1762,13 +1762,14 @@ retry:
*
* This is called with head_lock held.
*/
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
- unsigned i, n = 0, seq;
+ uint i, n = 0, seq, max_idx = 0;
struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
+ u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1803,6 +1804,9 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
if (num_desc > avail)
break;
avail -= num_desc;
+ /* Find the most starved wait memeber */
+ iowait_starve_find_max(wait, &max_starved_cnt,
+ n, &max_idx);
list_del_init(&wait->list);
waits[n++] = wait;
}
@@ -1811,8 +1815,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
}
} while (read_seqretry(&dev->iowait_lock, seq));
+ /* Schedule the most starved one first */
+ if (n)
+ waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON);
+
for (i = 0; i < n; i++)
- waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
+ if (i != max_idx)
+ waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
/* head_lock must be held */
@@ -2349,7 +2358,8 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
static int sdma_check_progress(
struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx)
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret;
@@ -2362,7 +2372,7 @@ static int sdma_check_progress(
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq);
+ ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@@ -2376,6 +2386,7 @@ static int sdma_check_progress(
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
+ * @pkts_sent: has any packet been sent yet?
*
* The call submits the tx into the ring. If a iowait structure is non-NULL
* the packet will be queued to the list in wait.
@@ -2387,7 +2398,8 @@ static int sdma_check_progress(
*/
int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait,
- struct sdma_txreq *tx)
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret = 0;
u16 tail;
@@ -2429,7 +2441,7 @@ unlock_noconn:
ret = -ECOMM;
goto unlock;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, pkts_sent);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
@@ -2498,8 +2510,10 @@ retry:
}
update_tail:
total_count = submit_count + flush_count;
- if (wait)
+ if (wait) {
iowait_sdma_add(wait, total_count);
+ iowait_starve_clear(submit_count > 0, wait);
+ }
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
@@ -2527,7 +2541,7 @@ unlock_noconn:
ret = -ECOMM;
goto update_tail;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
if (ret == -EAGAIN) {
ret = 0;
goto retry;