summaryrefslogtreecommitdiff
path: root/drivers/vfio/pci/mlx5/cmd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci/mlx5/cmd.c')
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c505
1 files changed, 312 insertions, 193 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index efd1d252cdc9..5b919a0b2524 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
if (ret)
return ret;
- if (mvdev->saving_migf->state ==
- MLX5_MIGF_STATE_PRE_COPY_ERROR) {
+ /* Upon cleanup, ignore previous pre_copy error state */
+ if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR &&
+ !(query_flags & MLX5VF_QUERY_CLEANUP)) {
/*
* In case we had a PRE_COPY error, only query full
* image for final image
@@ -121,6 +122,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
}
query_flags &= ~MLX5VF_QUERY_INC;
}
+ /* Block incremental query which is state-dependent */
+ if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) {
+ complete(&mvdev->saving_migf->save_comp);
+ return -ENODEV;
+ }
}
MLX5_SET(query_vhca_migration_state_in, in, opcode,
@@ -149,6 +155,12 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
return 0;
}
+static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev)
+{
+ mvdev->tracker.object_changed = true;
+ complete(&mvdev->tracker_comp);
+}
+
static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev)
{
/* Mark the tracker under an error and wake it up if it's running */
@@ -189,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
/* Must be done outside the lock to let it progress */
set_tracker_error(mvdev);
mutex_lock(&mvdev->state_mutex);
- mlx5vf_disable_fds(mvdev);
+ mlx5vf_disable_fds(mvdev, NULL);
_mlx5vf_free_page_tracker_resources(mvdev);
mlx5vf_state_mutex_unlock(mvdev);
}
@@ -221,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
if (!MLX5_CAP_GEN(mvdev->mdev, migration))
goto end;
+ if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
+ MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
+ goto end;
+
mvdev->vf_id = pci_iov_vf_id(pdev);
if (mvdev->vf_id < 0)
goto end;
@@ -250,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
mvdev->migrate_cap = 1;
mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
- VFIO_MIGRATION_P2P;
+ VFIO_MIGRATION_P2P |
+ VFIO_MIGRATION_PRE_COPY;
+
mvdev->core_device.vdev.mig_ops = mig_ops;
init_completion(&mvdev->tracker_comp);
if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
mvdev->core_device.vdev.log_ops = log_ops;
- if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
- MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
- mvdev->core_device.vdev.migration_flags |=
- VFIO_MIGRATION_PRE_COPY;
-
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
mvdev->chunk_mode = 1;
@@ -300,40 +313,21 @@ err_exec:
return ret;
}
-static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- struct mlx5_vhca_data_buffer *buf,
- struct mlx5_vhca_recv_buf *recv_buf,
- u32 *mkey)
+static u32 *alloc_mkey_in(u32 npages, u32 pdn)
{
- size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) :
- recv_buf->npages;
- int err = 0, inlen;
- __be64 *mtt;
+ int inlen;
void *mkc;
u32 *in;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
- sizeof(*mtt) * round_up(npages, 2);
+ sizeof(__be64) * round_up(npages, 2);
- in = kvzalloc(inlen, GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT);
if (!in)
- return -ENOMEM;
+ return NULL;
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
DIV_ROUND_UP(npages, 2));
- mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
-
- if (buf) {
- struct sg_dma_page_iter dma_iter;
-
- for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
- *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
- } else {
- int i;
-
- for (i = 0; i < npages; i++)
- *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
- }
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
@@ -347,8 +341,81 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE);
- err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
- kvfree(in);
+
+ return in;
+}
+
+static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in,
+ u32 *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(__be64) * round_up(npages, 2);
+
+ return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
+}
+
+static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
+ u32 *mkey_in, struct dma_iova_state *state,
+ enum dma_data_direction dir)
+{
+ dma_addr_t addr;
+ __be64 *mtt;
+ int i;
+
+ if (dma_use_iova(state)) {
+ dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir,
+ 0);
+ } else {
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in,
+ klm_pas_mtt);
+ for (i = npages - 1; i >= 0; i--) {
+ addr = be64_to_cpu(mtt[i]);
+ dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir);
+ }
+ }
+}
+
+static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
+ struct page **page_list, u32 *mkey_in,
+ struct dma_iova_state *state,
+ enum dma_data_direction dir)
+{
+ dma_addr_t addr;
+ size_t mapped = 0;
+ __be64 *mtt;
+ int i, err;
+
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
+
+ if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) {
+ addr = state->addr;
+ for (i = 0; i < npages; i++) {
+ err = dma_iova_link(mdev->device, state,
+ page_to_phys(page_list[i]), mapped,
+ PAGE_SIZE, dir, 0);
+ if (err)
+ goto error;
+ *mtt++ = cpu_to_be64(addr);
+ addr += PAGE_SIZE;
+ mapped += PAGE_SIZE;
+ }
+ err = dma_iova_sync(mdev->device, state, 0, mapped);
+ if (err)
+ goto error;
+ } else {
+ for (i = 0; i < npages; i++) {
+ addr = dma_map_page(mdev->device, page_list[i], 0,
+ PAGE_SIZE, dir);
+ err = dma_mapping_error(mdev->device, addr);
+ if (err)
+ goto error;
+ *mtt++ = cpu_to_be64(addr);
+ }
+ }
+ return 0;
+
+error:
+ unregister_dma_pages(mdev, i, mkey_in, state, dir);
return err;
}
@@ -362,49 +429,97 @@ static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
if (mvdev->mdev_detach)
return -ENOTCONN;
- if (buf->dmaed || !buf->allocated_length)
+ if (buf->mkey_in || !buf->npages)
return -EINVAL;
- ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
- if (ret)
- return ret;
+ buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn);
+ if (!buf->mkey_in)
+ return -ENOMEM;
- ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey);
+ ret = register_dma_pages(mdev, buf->npages, buf->page_list,
+ buf->mkey_in, &buf->state, buf->dma_dir);
if (ret)
- goto err;
+ goto err_register_dma;
- buf->dmaed = true;
+ ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey);
+ if (ret)
+ goto err_create_mkey;
return 0;
-err:
- dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
+
+err_create_mkey:
+ unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state,
+ buf->dma_dir);
+err_register_dma:
+ kvfree(buf->mkey_in);
+ buf->mkey_in = NULL;
return ret;
}
+static void free_page_list(u32 npages, struct page **page_list)
+{
+ int i;
+
+ /* Undo alloc_pages_bulk() */
+ for (i = npages - 1; i >= 0; i--)
+ __free_page(page_list[i]);
+
+ kvfree(page_list);
+}
+
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
{
- struct mlx5_vf_migration_file *migf = buf->migf;
- struct sg_page_iter sg_iter;
+ struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
+ struct mlx5_core_dev *mdev = mvdev->mdev;
- lockdep_assert_held(&migf->mvdev->state_mutex);
- WARN_ON(migf->mvdev->mdev_detach);
+ lockdep_assert_held(&mvdev->state_mutex);
+ WARN_ON(mvdev->mdev_detach);
- if (buf->dmaed) {
- mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
- dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
- buf->dma_dir, 0);
+ if (buf->mkey_in) {
+ mlx5_core_destroy_mkey(mdev, buf->mkey);
+ unregister_dma_pages(mdev, buf->npages, buf->mkey_in,
+ &buf->state, buf->dma_dir);
+ kvfree(buf->mkey_in);
}
- /* Undo alloc_pages_bulk_array() */
- for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
- __free_page(sg_page_iter_page(&sg_iter));
- sg_free_append_table(&buf->table);
+ free_page_list(buf->npages, buf->page_list);
kfree(buf);
}
+static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages)
+{
+ unsigned int filled, done = 0;
+ int i;
+
+ *page_list =
+ kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
+ if (!*page_list)
+ return -ENOMEM;
+
+ for (;;) {
+ filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done,
+ *page_list + done);
+ if (!filled)
+ goto err;
+
+ done += filled;
+ if (done == npages)
+ break;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < done; i++)
+ __free_page(*page_list[i]);
+
+ kvfree(*page_list);
+ *page_list = NULL;
+ return -ENOMEM;
+}
+
struct mlx5_vhca_data_buffer *
-mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
- size_t length,
+mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
enum dma_data_direction dma_dir)
{
struct mlx5_vhca_data_buffer *buf;
@@ -416,12 +531,13 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
buf->dma_dir = dma_dir;
buf->migf = migf;
- if (length) {
- ret = mlx5vf_add_migration_pages(buf,
- DIV_ROUND_UP_ULL(length, PAGE_SIZE));
+ if (npages) {
+ ret = mlx5vf_add_pages(&buf->page_list, npages);
if (ret)
goto end;
+ buf->npages = npages;
+
if (dma_dir != DMA_NONE) {
ret = mlx5vf_dma_data_buffer(buf);
if (ret)
@@ -444,8 +560,8 @@ void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
}
struct mlx5_vhca_data_buffer *
-mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
- size_t length, enum dma_data_direction dma_dir)
+mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
+ enum dma_data_direction dma_dir)
{
struct mlx5_vhca_data_buffer *buf, *temp_buf;
struct list_head free_list;
@@ -460,7 +576,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
if (buf->dma_dir == dma_dir) {
list_del_init(&buf->buf_elm);
- if (buf->allocated_length >= length) {
+ if (buf->npages >= npages) {
spin_unlock_irq(&migf->list_lock);
goto found;
}
@@ -474,7 +590,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
}
}
spin_unlock_irq(&migf->list_lock);
- buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir);
+ buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir);
found:
while ((temp_buf = list_first_entry_or_null(&free_list,
@@ -608,8 +724,13 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
err:
/* The error flow can't run from an interrupt context */
- if (status == -EREMOTEIO)
+ if (status == -EREMOTEIO) {
status = MLX5_GET(save_vhca_state_out, async_data->out, status);
+ /* Failed in FW, print cmd out failure details */
+ mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0,
+ async_data->out);
+ }
+
async_data->status = status;
queue_work(migf->mvdev->cb_wq, &async_data->work);
}
@@ -623,6 +744,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
struct mlx5_vhca_data_buffer *header_buf = NULL;
struct mlx5vf_async_data *async_data;
+ bool pre_copy_cleanup = false;
int err;
lockdep_assert_held(&mvdev->state_mutex);
@@ -633,6 +755,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (err)
return err;
+ if ((migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+ migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc)
+ pre_copy_cleanup = true;
+
if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
/*
* In case we had a PRE_COPY error, SAVE is triggered only for
@@ -645,35 +771,36 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey);
- MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
+ MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE);
MLX5_SET(save_vhca_state_in, in, incremental, inc);
MLX5_SET(save_vhca_state_in, in, set_track, track);
async_data = &migf->async_data;
async_data->buf = buf;
- async_data->stop_copy_chunk = !track;
+ async_data->stop_copy_chunk = (!track && !pre_copy_cleanup);
async_data->out = kvzalloc(out_size, GFP_KERNEL);
if (!async_data->out) {
err = -ENOMEM;
goto err_out;
}
- if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
- if (async_data->stop_copy_chunk) {
- u8 header_idx = buf->stop_copy_chunk_num ?
- buf->stop_copy_chunk_num - 1 : 0;
+ if (async_data->stop_copy_chunk) {
+ u8 header_idx = buf->stop_copy_chunk_num ?
+ buf->stop_copy_chunk_num - 1 : 0;
- header_buf = migf->buf_header[header_idx];
- migf->buf_header[header_idx] = NULL;
- }
+ header_buf = migf->buf_header[header_idx];
+ migf->buf_header[header_idx] = NULL;
+ }
- if (!header_buf) {
- header_buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
- if (IS_ERR(header_buf)) {
- err = PTR_ERR(header_buf);
- goto err_free;
- }
+ if (!header_buf) {
+ header_buf = mlx5vf_get_data_buffer(
+ migf,
+ DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
+ PAGE_SIZE),
+ DMA_NONE);
+ if (IS_ERR(header_buf)) {
+ err = PTR_ERR(header_buf);
+ goto err_free;
}
}
@@ -714,7 +841,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (mvdev->mdev_detach)
return -ENOTCONN;
- if (!buf->dmaed) {
+ if (!buf->mkey_in) {
err = mlx5vf_dma_data_buffer(buf);
if (err)
return err;
@@ -900,6 +1027,29 @@ static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev,
return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
}
+static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_page_tracker *tracker)
+{
+ u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ void *obj_context;
+ void *cmd_hdr;
+ int err;
+
+ cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context);
+ tracker->status = MLX5_GET(page_track, obj_context, state);
+ return 0;
+}
+
static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
struct mlx5_vhca_cq_buf *buf, int nent,
int cqe_size)
@@ -957,9 +1107,11 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb);
struct mlx5vf_pci_core_device *mvdev = container_of(
tracker, struct mlx5vf_pci_core_device, tracker);
+ struct mlx5_eqe_obj_change *object;
struct mlx5_eqe *eqe = data;
u8 event_type = (u8)type;
u8 queue_type;
+ u32 obj_id;
int qp_num;
switch (event_type) {
@@ -975,6 +1127,12 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
break;
set_tracker_error(mvdev);
break;
+ case MLX5_EVENT_TYPE_OBJECT_CHANGE:
+ object = &eqe->data.obj_change;
+ obj_id = be32_to_cpu(object->obj_id);
+ if (obj_id == tracker->id)
+ set_tracker_change_event(mvdev);
+ break;
default:
break;
}
@@ -1238,103 +1396,16 @@ static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
kfree(qp);
}
-static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
-{
- int i;
-
- /* Undo alloc_pages_bulk_array() */
- for (i = 0; i < recv_buf->npages; i++)
- __free_page(recv_buf->page_list[i]);
-
- kvfree(recv_buf->page_list);
-}
-
-static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
- unsigned int npages)
-{
- unsigned int filled = 0, done = 0;
- int i;
-
- recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
- GFP_KERNEL_ACCOUNT);
- if (!recv_buf->page_list)
- return -ENOMEM;
-
- for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT,
- npages - done,
- recv_buf->page_list + done);
- if (!filled)
- goto err;
-
- done += filled;
- if (done == npages)
- break;
- }
-
- recv_buf->npages = npages;
- return 0;
-
-err:
- for (i = 0; i < npages; i++) {
- if (recv_buf->page_list[i])
- __free_page(recv_buf->page_list[i]);
- }
-
- kvfree(recv_buf->page_list);
- return -ENOMEM;
-}
-
-static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_recv_buf *recv_buf)
-{
- int i, j;
-
- recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
- sizeof(*recv_buf->dma_addrs),
- GFP_KERNEL_ACCOUNT);
- if (!recv_buf->dma_addrs)
- return -ENOMEM;
-
- for (i = 0; i < recv_buf->npages; i++) {
- recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
- recv_buf->page_list[i],
- 0, PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
- goto error;
- }
- return 0;
-
-error:
- for (j = 0; j < i; j++)
- dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
- PAGE_SIZE, DMA_FROM_DEVICE);
-
- kvfree(recv_buf->dma_addrs);
- return -ENOMEM;
-}
-
-static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_recv_buf *recv_buf)
-{
- int i;
-
- for (i = 0; i < recv_buf->npages; i++)
- dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
- PAGE_SIZE, DMA_FROM_DEVICE);
-
- kvfree(recv_buf->dma_addrs);
-}
-
static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
struct mlx5_vhca_qp *qp)
{
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
- unregister_dma_recv_pages(mdev, recv_buf);
- free_recv_pages(&qp->recv_buf);
+ unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in,
+ &recv_buf->state, DMA_FROM_DEVICE);
+ kvfree(recv_buf->mkey_in);
+ free_page_list(recv_buf->npages, recv_buf->page_list);
}
static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
@@ -1345,24 +1416,38 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
int err;
- err = alloc_recv_pages(recv_buf, npages);
- if (err < 0)
+ err = mlx5vf_add_pages(&recv_buf->page_list, npages);
+ if (err)
return err;
- err = register_dma_recv_pages(mdev, recv_buf);
- if (err)
+ recv_buf->npages = npages;
+
+ recv_buf->mkey_in = alloc_mkey_in(npages, pdn);
+ if (!recv_buf->mkey_in) {
+ err = -ENOMEM;
goto end;
+ }
+
+ err = register_dma_pages(mdev, npages, recv_buf->page_list,
+ recv_buf->mkey_in, &recv_buf->state,
+ DMA_FROM_DEVICE);
+ if (err)
+ goto err_register_dma;
- err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
+ err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey);
if (err)
goto err_create_mkey;
return 0;
err_create_mkey:
- unregister_dma_recv_pages(mdev, recv_buf);
+ unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state,
+ DMA_FROM_DEVICE);
+err_register_dma:
+ kvfree(recv_buf->mkey_in);
+ recv_buf->mkey_in = NULL;
end:
- free_recv_pages(recv_buf);
+ free_page_list(npages, recv_buf->page_list);
return err;
}
@@ -1417,7 +1502,8 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
struct mlx5_vhca_qp *host_qp;
struct mlx5_vhca_qp *fw_qp;
struct mlx5_core_dev *mdev;
- u32 max_msg_size = PAGE_SIZE;
+ u32 log_max_msg_size;
+ u32 max_msg_size;
u64 rq_size = SZ_2M;
u32 max_recv_wr;
int err;
@@ -1434,6 +1520,12 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
}
mdev = mvdev->mdev;
+ log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size);
+ max_msg_size = (1ULL << log_max_msg_size);
+ /* The RQ must hold at least 4 WQEs/messages for successful QP creation */
+ if (rq_size < 4 * max_msg_size)
+ rq_size = 4 * max_msg_size;
+
memset(tracker, 0, sizeof(*tracker));
tracker->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(tracker->uar)) {
@@ -1523,25 +1615,41 @@ set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
{
u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
u32 nent = size / entry_size;
+ u32 nent_in_page;
+ u32 nent_to_set;
struct page *page;
+ u32 page_offset;
+ u32 page_index;
+ u32 buf_offset;
+ void *kaddr;
u64 addr;
u64 *buf;
int i;
- if (WARN_ON(index >= qp->recv_buf.npages ||
+ buf_offset = index * qp->max_msg_size;
+ if (WARN_ON(buf_offset + size >= qp->recv_buf.npages * PAGE_SIZE ||
(nent > qp->max_msg_size / entry_size)))
return;
- page = qp->recv_buf.page_list[index];
- buf = kmap_local_page(page);
- for (i = 0; i < nent; i++) {
- addr = MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_low);
- addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_high) << 32;
- iova_bitmap_set(dirty, addr, qp->tracked_page_size);
- }
- kunmap_local(buf);
+ do {
+ page_index = buf_offset / PAGE_SIZE;
+ page_offset = buf_offset % PAGE_SIZE;
+ nent_in_page = (PAGE_SIZE - page_offset) / entry_size;
+ page = qp->recv_buf.page_list[page_index];
+ kaddr = kmap_local_page(page);
+ buf = kaddr + page_offset;
+ nent_to_set = min(nent, nent_in_page);
+ for (i = 0; i < nent_to_set; i++) {
+ addr = MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_low);
+ addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_high) << 32;
+ iova_bitmap_set(dirty, addr, qp->tracked_page_size);
+ }
+ kunmap_local(kaddr);
+ buf_offset += (nent_to_set * entry_size);
+ nent -= nent_to_set;
+ } while (nent);
}
static void
@@ -1634,6 +1742,11 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
goto end;
}
+ if (tracker->is_err) {
+ err = -EIO;
+ goto end;
+ }
+
mdev = mvdev->mdev;
err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length,
MLX5_PAGE_TRACK_STATE_REPORTING);
@@ -1652,6 +1765,12 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
dirty, &tracker->status);
if (poll_err == CQ_EMPTY) {
wait_for_completion(&mvdev->tracker_comp);
+ if (tracker->object_changed) {
+ tracker->object_changed = false;
+ err = mlx5vf_cmd_query_tracker(mdev, tracker);
+ if (err)
+ goto end;
+ }
continue;
}
}