diff options
Diffstat (limited to 'drivers/infiniband/hw/hns/hns_roce_hem.c')
-rw-r--r-- | drivers/infiniband/hw/hns/hns_roce_hem.c | 239 |
1 files changed, 106 insertions, 133 deletions
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c4ac06a33869..ca0798224e56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -249,85 +249,48 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, } static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, - int npages, unsigned long hem_alloc_size, gfp_t gfp_mask) { - struct hns_roce_hem_chunk *chunk = NULL; struct hns_roce_hem *hem; - struct scatterlist *mem; int order; void *buf; WARN_ON(gfp_mask & __GFP_HIGHMEM); + order = get_order(hem_alloc_size); + if (PAGE_SIZE << order != hem_alloc_size) { + dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n", + hem_alloc_size); + return NULL; + } + hem = kmalloc(sizeof(*hem), gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); if (!hem) return NULL; - INIT_LIST_HEAD(&hem->chunk_list); - - order = get_order(hem_alloc_size); - - while (npages > 0) { - if (!chunk) { - chunk = kmalloc(sizeof(*chunk), - gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); - if (!chunk) - goto fail; - - sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN); - chunk->npages = 0; - chunk->nsg = 0; - memset(chunk->buf, 0, sizeof(chunk->buf)); - list_add_tail(&chunk->list, &hem->chunk_list); - } + buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size, + &hem->dma, gfp_mask); + if (!buf) + goto fail; - while (1 << order > npages) - --order; - - /* - * Alloc memory one time. If failed, don't alloc small block - * memory, directly return fail. - */ - mem = &chunk->mem[chunk->npages]; - buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order, - &sg_dma_address(mem), gfp_mask); - if (!buf) - goto fail; - - chunk->buf[chunk->npages] = buf; - sg_dma_len(mem) = PAGE_SIZE << order; - - ++chunk->npages; - ++chunk->nsg; - npages -= 1 << order; - } + hem->buf = buf; + hem->size = hem_alloc_size; return hem; fail: - hns_roce_free_hem(hr_dev, hem); + kfree(hem); return NULL; } void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) { - struct hns_roce_hem_chunk *chunk, *tmp; - int i; - if (!hem) return; - list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) { - for (i = 0; i < chunk->npages; ++i) - dma_free_coherent(hr_dev->dev, - sg_dma_len(&chunk->mem[i]), - chunk->buf[i], - sg_dma_address(&chunk->mem[i])); - kfree(chunk); - } + dma_free_coherent(hr_dev->dev, hem->size, hem->buf, hem->dma); kfree(hem); } @@ -337,7 +300,7 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop, struct hns_roce_hem_index *index) { - struct ib_device *ibdev = &hr_dev->ib_dev; + struct device *dev = hr_dev->dev; unsigned long mhop_obj = obj; u32 l0_idx, l1_idx, l2_idx; u32 chunk_ba_num; @@ -368,14 +331,14 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support mhop.hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", - table->type, index->buf, table->num_hem); + dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n", + table->type, index->buf, table->num_hem); return -EINVAL; } @@ -415,7 +378,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, { u32 bt_size = mhop->bt_chunk_size; struct device *dev = hr_dev->dev; - struct hns_roce_hem_iter iter; gfp_t flag; u64 bt_ba; u32 size; @@ -456,16 +418,15 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, */ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size; flag = GFP_KERNEL | __GFP_NOWARN; - table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT, - size, flag); + table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag); if (!table->hem[index->buf]) { ret = -ENOMEM; goto err_alloc_hem; } index->inited |= HEM_INDEX_BUF; - hns_roce_hem_first(table->hem[index->buf], &iter); - bt_ba = hns_roce_hem_addr(&iter); + bt_ba = table->hem[index->buf]->dma; + if (table->type < HEM_TYPE_MTT) { if (mhop->hop_num == 2) *(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba; @@ -487,14 +448,14 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop, struct hns_roce_hem_index *index) { - struct ib_device *ibdev = &hr_dev->ib_dev; + struct device *dev = hr_dev->dev; u32 step_idx; int ret = 0; if (index->inited & HEM_INDEX_L0) { ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0); if (ret) { - ibdev_err(ibdev, "set HEM step 0 failed!\n"); + dev_err(dev, "set HEM step 0 failed!\n"); goto out; } } @@ -502,7 +463,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, if (index->inited & HEM_INDEX_L1) { ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1); if (ret) { - ibdev_err(ibdev, "set HEM step 1 failed!\n"); + dev_err(dev, "set HEM step 1 failed!\n"); goto out; } } @@ -514,7 +475,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = mhop->hop_num; ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx); if (ret) - ibdev_err(ibdev, "set HEM step last failed!\n"); + dev_err(dev, "set HEM step last failed!\n"); } out: return ret; @@ -524,14 +485,14 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_hem_index index = {}; struct hns_roce_hem_mhop mhop = {}; + struct device *dev = hr_dev->dev; int ret; ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); if (ret) { - ibdev_err(ibdev, "calc hem config failed!\n"); + dev_err(dev, "calc hem config failed!\n"); return ret; } @@ -543,7 +504,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, ret = alloc_mhop_hem(hr_dev, table, &mhop, &index); if (ret) { - ibdev_err(ibdev, "alloc mhop hem failed!\n"); + dev_err(dev, "alloc mhop hem failed!\n"); goto out; } @@ -551,7 +512,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, if (table->type < HEM_TYPE_MTT) { ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index); if (ret) { - ibdev_err(ibdev, "set HEM address to HW failed!\n"); + dev_err(dev, "set HEM address to HW failed!\n"); goto err_alloc; } } @@ -586,7 +547,6 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, } table->hem[i] = hns_roce_alloc_hem(hr_dev, - table->table_chunk_size >> PAGE_SHIFT, table->table_chunk_size, GFP_KERNEL | __GFP_NOWARN); if (!table->hem[i]) { @@ -615,7 +575,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop, struct hns_roce_hem_index *index) { - struct ib_device *ibdev = &hr_dev->ib_dev; + struct device *dev = hr_dev->dev; u32 hop_num = mhop->hop_num; u32 chunk_ba_num; u32 step_idx; @@ -645,21 +605,21 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx); if (ret) - ibdev_warn(ibdev, "failed to clear hop%u HEM, ret = %d.\n", - hop_num, ret); + dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n", + hop_num, ret); if (index->inited & HEM_INDEX_L1) { ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1); if (ret) - ibdev_warn(ibdev, "failed to clear HEM step 1, ret = %d.\n", - ret); + dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n", + ret); } if (index->inited & HEM_INDEX_L0) { ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0); if (ret) - ibdev_warn(ibdev, "failed to clear HEM step 0, ret = %d.\n", - ret); + dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n", + ret); } } } @@ -669,14 +629,14 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, unsigned long obj, int check_refcount) { - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_hem_index index = {}; struct hns_roce_hem_mhop mhop = {}; + struct device *dev = hr_dev->dev; int ret; ret = calc_hem_config(hr_dev, table, obj, &mhop, &index); if (ret) { - ibdev_err(ibdev, "calc hem config failed!\n"); + dev_err(dev, "calc hem config failed!\n"); return; } @@ -712,8 +672,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); if (ret) - dev_warn(dev, "failed to clear HEM base address, ret = %d.\n", - ret); + dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n", + ret); hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; @@ -725,7 +685,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj, dma_addr_t *dma_handle) { - struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; unsigned long mhop_obj = obj; @@ -734,7 +693,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, int offset, dma_offset; void *addr = NULL; u32 hem_idx = 0; - int length; int i, j; mutex_lock(&table->mutex); @@ -767,23 +725,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, if (!hem) goto out; - list_for_each_entry(chunk, &hem->chunk_list, list) { - for (i = 0; i < chunk->npages; ++i) { - length = sg_dma_len(&chunk->mem[i]); - if (dma_handle && dma_offset >= 0) { - if (length > (u32)dma_offset) - *dma_handle = sg_dma_address( - &chunk->mem[i]) + dma_offset; - dma_offset -= length; - } - - if (length > (u32)offset) { - addr = chunk->buf[i] + offset; - goto out; - } - offset -= length; - } - } + *dma_handle = hem->dma + dma_offset; + addr = hem->buf + offset; out: mutex_unlock(&table->mutex); @@ -934,6 +877,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, if (hns_roce_check_whether_mhop(hr_dev, table->type)) { hns_roce_cleanup_mhop_hem_table(hr_dev, table); + mutex_destroy(&table->mutex); return; } @@ -948,6 +892,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, hns_roce_free_hem(hr_dev, table->hem[i]); } + mutex_destroy(&table->mutex); kfree(table->hem); } @@ -986,6 +931,7 @@ struct hns_roce_hem_item { size_t count; /* max ba numbers */ int start; /* start buf offset in this hem */ int end; /* end buf offset in this hem */ + bool exist_bt; }; /* All HEM items are linked in a tree structure */ @@ -1014,6 +960,7 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } } + hem->exist_bt = exist_bt; hem->count = count; hem->start = start; hem->end = end; @@ -1024,34 +971,32 @@ hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, } static void hem_list_free_item(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, bool exist_bt) + struct hns_roce_hem_item *hem) { - if (exist_bt) + if (hem->exist_bt) dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN, hem->addr, hem->dma_addr); kfree(hem); } static void hem_list_free_all(struct hns_roce_dev *hr_dev, - struct list_head *head, bool exist_bt) + struct list_head *head) { struct hns_roce_hem_item *hem, *temp_hem; list_for_each_entry_safe(hem, temp_hem, head, list) { list_del(&hem->list); - hem_list_free_item(hr_dev, hem, exist_bt); + hem_list_free_item(hr_dev, hem); } } -static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr, - u64 table_addr) +static void hem_list_link_bt(void *base_addr, u64 table_addr) { *(u64 *)(base_addr) = table_addr; } /* assign L0 table address to hem from root bt */ -static void hem_list_assign_bt(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_item *hem, void *cpu_addr, +static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr, u64 phy_addr) { hem->addr = cpu_addr; @@ -1098,9 +1043,9 @@ static bool hem_list_is_bottom_bt(int hopnum, int bt_level) * @bt_level: base address table level * @unit: ba entries per bt page */ -static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) +static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) { - u32 step; + u64 step; int max; int i; @@ -1136,11 +1081,15 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, { struct hns_roce_buf_region *r; int total = 0; - int step; + u64 step; int i; for (i = 0; i < region_cnt; i++) { r = (struct hns_roce_buf_region *)®ions[i]; + /* when r->hopnum = 0, the region should not occupy root_ba. */ + if (!r->hopnum) + continue; + if (r->hopnum > 1) { step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step > 0) @@ -1167,7 +1116,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, int ret = 0; int max_ofs; int level; - u32 step; + u64 step; int end; if (hopnum <= 1) @@ -1191,10 +1140,12 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, /* config L1 bt to last bt and link them to corresponding parent */ for (level = 1; level < hopnum; level++) { - cur = hem_list_search_item(&mid_bt[level], offset); - if (cur) { - hem_ptrs[level] = cur; - continue; + if (!hem_list_is_bottom_bt(hopnum, level)) { + cur = hem_list_search_item(&mid_bt[level], offset); + if (cur) { + hem_ptrs[level] = cur; + continue; + } } step = hem_list_calc_ba_range(hopnum, level, unit); @@ -1204,7 +1155,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } start_aligned = (distance / step) * step + r->offset; - end = min_t(int, start_aligned + step - 1, max_ofs); + end = min_t(u64, start_aligned + step - 1, max_ofs); cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, true); if (!cur) { @@ -1220,8 +1171,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, if (level > 1) { pre = hem_ptrs[level - 1]; step = (cur->start - pre->start) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, pre->addr + step, - cur->dma_addr); + hem_list_link_bt(pre->addr + step, cur->dma_addr); } } @@ -1233,7 +1183,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, err_exit: for (level = 1; level < hopnum; level++) - hem_list_free_all(hr_dev, &temp_list[level], true); + hem_list_free_all(hr_dev, &temp_list[level]); return ret; } @@ -1274,16 +1224,26 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, { struct hns_roce_hem_item *hem; + /* This is on the has_mtt branch, if r->hopnum + * is 0, there is no root_ba to reuse for the + * region's fake hem, so a dma_alloc request is + * necessary here. + */ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, - r->count, false); + r->count, !r->hopnum); if (!hem) return -ENOMEM; - hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base); + /* The root_ba can be reused only when r->hopnum > 0. */ + if (r->hopnum) + hem_list_assign_bt(hem, cpu_base, phy_base); list_add(&hem->list, branch_head); list_add(&hem->sibling, leaf_head); - return r->count; + /* If r->hopnum == 0, 0 is returned, + * so that the root_bt entry is not occupied. + */ + return r->hopnum ? r->count : 0; } static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, @@ -1293,7 +1253,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, struct hns_roce_hem_item *hem, *temp_hem; int total = 0; int offset; - int step; + u64 step; step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step < 1) @@ -1302,7 +1262,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, branch_head, list) { offset = (hem->start - r->offset) / step * BA_BYTE_LEN; - hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); + hem_list_link_bt(cpu_base + offset, hem->dma_addr); total++; } @@ -1327,7 +1287,7 @@ setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, return -ENOMEM; total = 0; - for (i = 0; i < region_cnt && total < max_ba_num; i++) { + for (i = 0; i < region_cnt && total <= max_ba_num; i++) { r = ®ions[i]; if (!r->count) continue; @@ -1393,14 +1353,19 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, region_cnt); if (ret) { for (i = 0; i < region_cnt; i++) - hem_list_free_all(hr_dev, &head.branch[i], false); + hem_list_free_all(hr_dev, &head.branch[i]); - hem_list_free_all(hr_dev, &head.root, true); + hem_list_free_all(hr_dev, &head.root); } return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1409,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1426,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1458,10 +1427,9 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) - hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j], - j != 0); + hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]); - hem_list_free_all(hr_dev, &hem_list->root_bt, true); + hem_list_free_all(hr_dev, &hem_list->root_bt); INIT_LIST_HEAD(&hem_list->btm_bt); hem_list->root_ba = 0; } @@ -1484,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; |