diff options
author | farah kassabri <fkassabri@habana.ai> | 2021-11-03 13:15:55 +0200 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2021-12-26 08:59:05 +0200 |
commit | 792512459fb2a62a5ea08264a0cdfb7e46a391a9 (patch) | |
tree | f8489dcb5af19a5a98ad780ca9d844bbb2885d70 /drivers/misc/habanalabs/gaudi | |
parent | 1679c7ee580fdaa2a5df398a526b2eddc857f2a1 (diff) |
habanalabs/gaudi: Fix collective wait bug
In Signaling-From-Graph case, the driver didn't set the hw_sob pointer
at the right place, which is needed for the cs completion
check prior to start sending all the master/slaves jobs to device.
Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc/habanalabs/gaudi')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b4814369062e..a9e279bfebae 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1276,6 +1276,7 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs) container_of(cs->signal_fence, struct hl_cs_compl, base_fence); struct hl_cs_compl *cs_cmpl = container_of(cs->fence, struct hl_cs_compl, base_fence); + struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; struct gaudi_collective_properties *cprop; u32 stream, queue_id, sob_group_offset; struct gaudi_device *gaudi; @@ -1288,10 +1289,16 @@ static int gaudi_collective_wait_init_cs(struct hl_cs *cs) gaudi = hdev->asic_specific; cprop = &gaudi->collective_props; - /* In encaps signals case the SOB info will be retrieved from - * the handle in gaudi_collective_slave_init_job. - */ - if (!cs->encaps_signals) { + if (cs->encaps_signals) { + cs_cmpl->hw_sob = handle->hw_sob; + /* at this checkpoint we only need the hw_sob pointer + * for the completion check before start going over the jobs + * of the master/slaves, the sob_value will be taken later on + * in gaudi_collective_slave_init_job depends on each + * job wait offset value. + */ + cs_cmpl->sob_val = 0; + } else { /* copy the SOB id and value of the signal CS */ cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; cs_cmpl->sob_val = signal_cs_cmpl->sob_val; |