summaryrefslogtreecommitdiff
path: root/drivers/vfio/pci/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci/mlx5')
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c416
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h35
-rw-r--r--drivers/vfio/pci/mlx5/main.c126
3 files changed, 289 insertions, 288 deletions
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 41a4b0cf4297..5b919a0b2524 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -313,40 +313,21 @@ err_exec:
return ret;
}
-static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- struct mlx5_vhca_data_buffer *buf,
- struct mlx5_vhca_recv_buf *recv_buf,
- u32 *mkey)
+static u32 *alloc_mkey_in(u32 npages, u32 pdn)
{
- size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) :
- recv_buf->npages;
- int err = 0, inlen;
- __be64 *mtt;
+ int inlen;
void *mkc;
u32 *in;
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
- sizeof(*mtt) * round_up(npages, 2);
+ sizeof(__be64) * round_up(npages, 2);
- in = kvzalloc(inlen, GFP_KERNEL);
+ in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT);
if (!in)
- return -ENOMEM;
+ return NULL;
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
DIV_ROUND_UP(npages, 2));
- mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
-
- if (buf) {
- struct sg_dma_page_iter dma_iter;
-
- for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
- *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
- } else {
- int i;
-
- for (i = 0; i < npages; i++)
- *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
- }
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
@@ -360,8 +341,81 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE);
- err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
- kvfree(in);
+
+ return in;
+}
+
+static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in,
+ u32 *mkey)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+ sizeof(__be64) * round_up(npages, 2);
+
+ return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
+}
+
+static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
+ u32 *mkey_in, struct dma_iova_state *state,
+ enum dma_data_direction dir)
+{
+ dma_addr_t addr;
+ __be64 *mtt;
+ int i;
+
+ if (dma_use_iova(state)) {
+ dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir,
+ 0);
+ } else {
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in,
+ klm_pas_mtt);
+ for (i = npages - 1; i >= 0; i--) {
+ addr = be64_to_cpu(mtt[i]);
+ dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir);
+ }
+ }
+}
+
+static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
+ struct page **page_list, u32 *mkey_in,
+ struct dma_iova_state *state,
+ enum dma_data_direction dir)
+{
+ dma_addr_t addr;
+ size_t mapped = 0;
+ __be64 *mtt;
+ int i, err;
+
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
+
+ if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) {
+ addr = state->addr;
+ for (i = 0; i < npages; i++) {
+ err = dma_iova_link(mdev->device, state,
+ page_to_phys(page_list[i]), mapped,
+ PAGE_SIZE, dir, 0);
+ if (err)
+ goto error;
+ *mtt++ = cpu_to_be64(addr);
+ addr += PAGE_SIZE;
+ mapped += PAGE_SIZE;
+ }
+ err = dma_iova_sync(mdev->device, state, 0, mapped);
+ if (err)
+ goto error;
+ } else {
+ for (i = 0; i < npages; i++) {
+ addr = dma_map_page(mdev->device, page_list[i], 0,
+ PAGE_SIZE, dir);
+ err = dma_mapping_error(mdev->device, addr);
+ if (err)
+ goto error;
+ *mtt++ = cpu_to_be64(addr);
+ }
+ }
+ return 0;
+
+error:
+ unregister_dma_pages(mdev, i, mkey_in, state, dir);
return err;
}
@@ -375,93 +429,97 @@ static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
if (mvdev->mdev_detach)
return -ENOTCONN;
- if (buf->dmaed || !buf->allocated_length)
+ if (buf->mkey_in || !buf->npages)
return -EINVAL;
- ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
- if (ret)
- return ret;
+ buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn);
+ if (!buf->mkey_in)
+ return -ENOMEM;
- ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey);
+ ret = register_dma_pages(mdev, buf->npages, buf->page_list,
+ buf->mkey_in, &buf->state, buf->dma_dir);
if (ret)
- goto err;
+ goto err_register_dma;
- buf->dmaed = true;
+ ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey);
+ if (ret)
+ goto err_create_mkey;
return 0;
-err:
- dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
+
+err_create_mkey:
+ unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state,
+ buf->dma_dir);
+err_register_dma:
+ kvfree(buf->mkey_in);
+ buf->mkey_in = NULL;
return ret;
}
+static void free_page_list(u32 npages, struct page **page_list)
+{
+ int i;
+
+ /* Undo alloc_pages_bulk() */
+ for (i = npages - 1; i >= 0; i--)
+ __free_page(page_list[i]);
+
+ kvfree(page_list);
+}
+
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
{
- struct mlx5_vf_migration_file *migf = buf->migf;
- struct sg_page_iter sg_iter;
+ struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
+ struct mlx5_core_dev *mdev = mvdev->mdev;
- lockdep_assert_held(&migf->mvdev->state_mutex);
- WARN_ON(migf->mvdev->mdev_detach);
+ lockdep_assert_held(&mvdev->state_mutex);
+ WARN_ON(mvdev->mdev_detach);
- if (buf->dmaed) {
- mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
- dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
- buf->dma_dir, 0);
+ if (buf->mkey_in) {
+ mlx5_core_destroy_mkey(mdev, buf->mkey);
+ unregister_dma_pages(mdev, buf->npages, buf->mkey_in,
+ &buf->state, buf->dma_dir);
+ kvfree(buf->mkey_in);
}
- /* Undo alloc_pages_bulk_array() */
- for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
- __free_page(sg_page_iter_page(&sg_iter));
- sg_free_append_table(&buf->table);
+ free_page_list(buf->npages, buf->page_list);
kfree(buf);
}
-static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
- unsigned int npages)
+static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages)
{
- unsigned int to_alloc = npages;
- struct page **page_list;
- unsigned long filled;
- unsigned int to_fill;
- int ret;
+ unsigned int filled, done = 0;
+ int i;
- to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
- page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
- if (!page_list)
+ *page_list =
+ kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
+ if (!*page_list)
return -ENOMEM;
- do {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
- page_list);
- if (!filled) {
- ret = -ENOMEM;
+ for (;;) {
+ filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done,
+ *page_list + done);
+ if (!filled)
goto err;
- }
- to_alloc -= filled;
- ret = sg_alloc_append_table_from_pages(
- &buf->table, page_list, filled, 0,
- filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
- GFP_KERNEL_ACCOUNT);
- if (ret)
- goto err;
- buf->allocated_length += filled * PAGE_SIZE;
- /* clean input for another bulk allocation */
- memset(page_list, 0, filled * sizeof(*page_list));
- to_fill = min_t(unsigned int, to_alloc,
- PAGE_SIZE / sizeof(*page_list));
- } while (to_alloc > 0);
+ done += filled;
+ if (done == npages)
+ break;
+ }
- kvfree(page_list);
return 0;
err:
- kvfree(page_list);
- return ret;
+ for (i = 0; i < done; i++)
+ __free_page(*page_list[i]);
+
+ kvfree(*page_list);
+ *page_list = NULL;
+ return -ENOMEM;
}
struct mlx5_vhca_data_buffer *
-mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
- size_t length,
+mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
enum dma_data_direction dma_dir)
{
struct mlx5_vhca_data_buffer *buf;
@@ -473,12 +531,13 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
buf->dma_dir = dma_dir;
buf->migf = migf;
- if (length) {
- ret = mlx5vf_add_migration_pages(buf,
- DIV_ROUND_UP_ULL(length, PAGE_SIZE));
+ if (npages) {
+ ret = mlx5vf_add_pages(&buf->page_list, npages);
if (ret)
goto end;
+ buf->npages = npages;
+
if (dma_dir != DMA_NONE) {
ret = mlx5vf_dma_data_buffer(buf);
if (ret)
@@ -501,8 +560,8 @@ void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
}
struct mlx5_vhca_data_buffer *
-mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
- size_t length, enum dma_data_direction dma_dir)
+mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
+ enum dma_data_direction dma_dir)
{
struct mlx5_vhca_data_buffer *buf, *temp_buf;
struct list_head free_list;
@@ -517,7 +576,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
if (buf->dma_dir == dma_dir) {
list_del_init(&buf->buf_elm);
- if (buf->allocated_length >= length) {
+ if (buf->npages >= npages) {
spin_unlock_irq(&migf->list_lock);
goto found;
}
@@ -531,7 +590,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
}
}
spin_unlock_irq(&migf->list_lock);
- buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir);
+ buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir);
found:
while ((temp_buf = list_first_entry_or_null(&free_list,
@@ -712,7 +771,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey);
- MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
+ MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE);
MLX5_SET(save_vhca_state_in, in, incremental, inc);
MLX5_SET(save_vhca_state_in, in, set_track, track);
@@ -734,8 +793,11 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
}
if (!header_buf) {
- header_buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ header_buf = mlx5vf_get_data_buffer(
+ migf,
+ DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
+ PAGE_SIZE),
+ DMA_NONE);
if (IS_ERR(header_buf)) {
err = PTR_ERR(header_buf);
goto err_free;
@@ -779,7 +841,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (mvdev->mdev_detach)
return -ENOTCONN;
- if (!buf->dmaed) {
+ if (!buf->mkey_in) {
err = mlx5vf_dma_data_buffer(buf);
if (err)
return err;
@@ -1334,103 +1396,16 @@ static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
kfree(qp);
}
-static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
-{
- int i;
-
- /* Undo alloc_pages_bulk_array() */
- for (i = 0; i < recv_buf->npages; i++)
- __free_page(recv_buf->page_list[i]);
-
- kvfree(recv_buf->page_list);
-}
-
-static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
- unsigned int npages)
-{
- unsigned int filled = 0, done = 0;
- int i;
-
- recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
- GFP_KERNEL_ACCOUNT);
- if (!recv_buf->page_list)
- return -ENOMEM;
-
- for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT,
- npages - done,
- recv_buf->page_list + done);
- if (!filled)
- goto err;
-
- done += filled;
- if (done == npages)
- break;
- }
-
- recv_buf->npages = npages;
- return 0;
-
-err:
- for (i = 0; i < npages; i++) {
- if (recv_buf->page_list[i])
- __free_page(recv_buf->page_list[i]);
- }
-
- kvfree(recv_buf->page_list);
- return -ENOMEM;
-}
-
-static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_recv_buf *recv_buf)
-{
- int i, j;
-
- recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
- sizeof(*recv_buf->dma_addrs),
- GFP_KERNEL_ACCOUNT);
- if (!recv_buf->dma_addrs)
- return -ENOMEM;
-
- for (i = 0; i < recv_buf->npages; i++) {
- recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
- recv_buf->page_list[i],
- 0, PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
- goto error;
- }
- return 0;
-
-error:
- for (j = 0; j < i; j++)
- dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
- PAGE_SIZE, DMA_FROM_DEVICE);
-
- kvfree(recv_buf->dma_addrs);
- return -ENOMEM;
-}
-
-static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
- struct mlx5_vhca_recv_buf *recv_buf)
-{
- int i;
-
- for (i = 0; i < recv_buf->npages; i++)
- dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
- PAGE_SIZE, DMA_FROM_DEVICE);
-
- kvfree(recv_buf->dma_addrs);
-}
-
static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
struct mlx5_vhca_qp *qp)
{
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
- unregister_dma_recv_pages(mdev, recv_buf);
- free_recv_pages(&qp->recv_buf);
+ unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in,
+ &recv_buf->state, DMA_FROM_DEVICE);
+ kvfree(recv_buf->mkey_in);
+ free_page_list(recv_buf->npages, recv_buf->page_list);
}
static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
@@ -1441,24 +1416,38 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
int err;
- err = alloc_recv_pages(recv_buf, npages);
- if (err < 0)
+ err = mlx5vf_add_pages(&recv_buf->page_list, npages);
+ if (err)
return err;
- err = register_dma_recv_pages(mdev, recv_buf);
- if (err)
+ recv_buf->npages = npages;
+
+ recv_buf->mkey_in = alloc_mkey_in(npages, pdn);
+ if (!recv_buf->mkey_in) {
+ err = -ENOMEM;
goto end;
+ }
+
+ err = register_dma_pages(mdev, npages, recv_buf->page_list,
+ recv_buf->mkey_in, &recv_buf->state,
+ DMA_FROM_DEVICE);
+ if (err)
+ goto err_register_dma;
- err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
+ err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey);
if (err)
goto err_create_mkey;
return 0;
err_create_mkey:
- unregister_dma_recv_pages(mdev, recv_buf);
+ unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state,
+ DMA_FROM_DEVICE);
+err_register_dma:
+ kvfree(recv_buf->mkey_in);
+ recv_buf->mkey_in = NULL;
end:
- free_recv_pages(recv_buf);
+ free_page_list(npages, recv_buf->page_list);
return err;
}
@@ -1513,7 +1502,8 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
struct mlx5_vhca_qp *host_qp;
struct mlx5_vhca_qp *fw_qp;
struct mlx5_core_dev *mdev;
- u32 max_msg_size = PAGE_SIZE;
+ u32 log_max_msg_size;
+ u32 max_msg_size;
u64 rq_size = SZ_2M;
u32 max_recv_wr;
int err;
@@ -1530,6 +1520,12 @@ int mlx5vf_start_page_tracker(struct vfio_device *vdev,
}
mdev = mvdev->mdev;
+ log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size);
+ max_msg_size = (1ULL << log_max_msg_size);
+ /* The RQ must hold at least 4 WQEs/messages for successful QP creation */
+ if (rq_size < 4 * max_msg_size)
+ rq_size = 4 * max_msg_size;
+
memset(tracker, 0, sizeof(*tracker));
tracker->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(tracker->uar)) {
@@ -1619,25 +1615,41 @@ set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
{
u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
u32 nent = size / entry_size;
+ u32 nent_in_page;
+ u32 nent_to_set;
struct page *page;
+ u32 page_offset;
+ u32 page_index;
+ u32 buf_offset;
+ void *kaddr;
u64 addr;
u64 *buf;
int i;
- if (WARN_ON(index >= qp->recv_buf.npages ||
+ buf_offset = index * qp->max_msg_size;
+ if (WARN_ON(buf_offset + size >= qp->recv_buf.npages * PAGE_SIZE ||
(nent > qp->max_msg_size / entry_size)))
return;
- page = qp->recv_buf.page_list[index];
- buf = kmap_local_page(page);
- for (i = 0; i < nent; i++) {
- addr = MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_low);
- addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
- dirty_address_high) << 32;
- iova_bitmap_set(dirty, addr, qp->tracked_page_size);
- }
- kunmap_local(buf);
+ do {
+ page_index = buf_offset / PAGE_SIZE;
+ page_offset = buf_offset % PAGE_SIZE;
+ nent_in_page = (PAGE_SIZE - page_offset) / entry_size;
+ page = qp->recv_buf.page_list[page_index];
+ kaddr = kmap_local_page(page);
+ buf = kaddr + page_offset;
+ nent_to_set = min(nent, nent_in_page);
+ for (i = 0; i < nent_to_set; i++) {
+ addr = MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_low);
+ addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_high) << 32;
+ iova_bitmap_set(dirty, addr, qp->tracked_page_size);
+ }
+ kunmap_local(kaddr);
+ buf_offset += (nent_to_set * entry_size);
+ nent -= nent_to_set;
+ } while (nent);
}
static void
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index df421dc6de04..d7821b5ca772 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -53,20 +53,17 @@ struct mlx5_vf_migration_header {
};
struct mlx5_vhca_data_buffer {
- struct sg_append_table table;
+ struct page **page_list;
+ struct dma_iova_state state;
loff_t start_pos;
u64 length;
- u64 allocated_length;
+ u32 npages;
u32 mkey;
+ u32 *mkey_in;
enum dma_data_direction dma_dir;
- u8 dmaed:1;
u8 stop_copy_chunk_num;
struct list_head buf_elm;
struct mlx5_vf_migration_file *migf;
- /* Optimize mlx5vf_get_migration_page() for sequential access */
- struct scatterlist *last_offset_sg;
- unsigned int sg_last_entry;
- unsigned long last_offset;
};
struct mlx5vf_async_data {
@@ -133,8 +130,9 @@ struct mlx5_vhca_cq {
struct mlx5_vhca_recv_buf {
u32 npages;
struct page **page_list;
- dma_addr_t *dma_addrs;
+ struct dma_iova_state state;
u32 next_rq_offset;
+ u32 *mkey_in;
u32 mkey;
};
@@ -217,15 +215,24 @@ int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
struct mlx5_vhca_data_buffer *
-mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
- size_t length, enum dma_data_direction dma_dir);
+mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
+ enum dma_data_direction dma_dir);
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf);
struct mlx5_vhca_data_buffer *
-mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
- size_t length, enum dma_data_direction dma_dir);
+mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
+ enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
-struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
- unsigned long offset);
+static inline struct page *
+mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
+ unsigned long offset)
+{
+ int page_entry = offset / PAGE_SIZE;
+
+ if (page_entry >= buf->npages)
+ return NULL;
+
+ return buf->page_list[page_entry];
+}
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
enum mlx5_vf_migf_state *last_save_state);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 61d9b0f9146d..93f894fe60d2 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -34,37 +34,6 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
core_device);
}
-struct page *
-mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
- unsigned long offset)
-{
- unsigned long cur_offset = 0;
- struct scatterlist *sg;
- unsigned int i;
-
- /* All accesses are sequential */
- if (offset < buf->last_offset || !buf->last_offset_sg) {
- buf->last_offset = 0;
- buf->last_offset_sg = buf->table.sgt.sgl;
- buf->sg_last_entry = 0;
- }
-
- cur_offset = buf->last_offset;
-
- for_each_sg(buf->last_offset_sg, sg,
- buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
- if (offset < sg->length + cur_offset) {
- buf->last_offset_sg = sg;
- buf->sg_last_entry += i;
- buf->last_offset = cur_offset;
- return nth_page(sg_page(sg),
- (offset - cur_offset) / PAGE_SIZE);
- }
- cur_offset += sg->length;
- }
- return NULL;
-}
-
static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
{
mutex_lock(&migf->lock);
@@ -308,6 +277,7 @@ static struct mlx5_vhca_data_buffer *
mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
u8 index, size_t required_length)
{
+ u32 npages = DIV_ROUND_UP(required_length, PAGE_SIZE);
struct mlx5_vhca_data_buffer *buf = migf->buf[index];
u8 chunk_num;
@@ -315,12 +285,11 @@ mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
chunk_num = buf->stop_copy_chunk_num;
buf->migf->buf[index] = NULL;
/* Checking whether the pre-allocated buffer can fit */
- if (buf->allocated_length >= required_length)
+ if (buf->npages >= npages)
return buf;
mlx5vf_put_data_buffer(buf);
- buf = mlx5vf_get_data_buffer(buf->migf, required_length,
- DMA_FROM_DEVICE);
+ buf = mlx5vf_get_data_buffer(buf->migf, npages, DMA_FROM_DEVICE);
if (IS_ERR(buf))
return buf;
@@ -373,7 +342,8 @@ static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
u8 *to_buff;
int ret;
- header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE);
+ header_buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(size, PAGE_SIZE),
+ DMA_NONE);
if (IS_ERR(header_buf))
return PTR_ERR(header_buf);
@@ -388,7 +358,7 @@ static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
to_buff = kmap_local_page(page);
memcpy(to_buff, &header, sizeof(header));
header_buf->length = sizeof(header);
- data.stop_copy_size = cpu_to_le64(migf->buf[0]->allocated_length);
+ data.stop_copy_size = cpu_to_le64(migf->buf[0]->npages * PAGE_SIZE);
memcpy(to_buff + sizeof(header), &data, sizeof(data));
header_buf->length += sizeof(data);
kunmap_local(to_buff);
@@ -437,15 +407,20 @@ static int mlx5vf_prep_stop_copy(struct mlx5vf_pci_core_device *mvdev,
num_chunks = mvdev->chunk_mode ? MAX_NUM_CHUNKS : 1;
for (i = 0; i < num_chunks; i++) {
- buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
+ buf = mlx5vf_get_data_buffer(
+ migf, DIV_ROUND_UP(inc_state_size, PAGE_SIZE),
+ DMA_FROM_DEVICE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto err;
}
migf->buf[i] = buf;
- buf = mlx5vf_get_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ buf = mlx5vf_get_data_buffer(
+ migf,
+ DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
+ PAGE_SIZE),
+ DMA_NONE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto err;
@@ -553,7 +528,8 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
* We finished transferring the current state and the device has a
* dirty state, save a new state to be ready for.
*/
- buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE);
+ buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(inc_length, PAGE_SIZE),
+ DMA_FROM_DEVICE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
mlx5vf_mark_err(migf);
@@ -587,7 +563,6 @@ static const struct file_operations mlx5vf_save_fops = {
.unlocked_ioctl = mlx5vf_precopy_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.release = mlx5vf_release_file,
- .llseek = no_llseek,
};
static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
@@ -641,14 +616,11 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
O_RDONLY);
if (IS_ERR(migf->filp)) {
ret = PTR_ERR(migf->filp);
- goto end;
+ kfree(migf);
+ return ERR_PTR(ret);
}
migf->mvdev = mvdev;
- ret = mlx5vf_cmd_alloc_pd(migf);
- if (ret)
- goto out_free;
-
stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock);
init_waitqueue_head(&migf->poll_wait);
@@ -664,6 +636,11 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
INIT_LIST_HEAD(&migf->buf_list);
INIT_LIST_HEAD(&migf->avail_list);
spin_lock_init(&migf->list_lock);
+
+ ret = mlx5vf_cmd_alloc_pd(migf);
+ if (ret)
+ goto out;
+
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, &full_size, 0);
if (ret)
goto out_pd;
@@ -674,8 +651,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
if (track) {
/* leave the allocated buffer ready for the stop-copy phase */
- buf = mlx5vf_alloc_data_buffer(migf,
- migf->buf[0]->allocated_length, DMA_FROM_DEVICE);
+ buf = mlx5vf_alloc_data_buffer(migf, migf->buf[0]->npages,
+ DMA_FROM_DEVICE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto out_pd;
@@ -693,10 +670,8 @@ out_save:
mlx5vf_free_data_buffer(buf);
out_pd:
mlx5fv_cmd_clean_migf_resources(migf);
-out_free:
+out:
fput(migf->filp);
-end:
- kfree(migf);
return ERR_PTR(ret);
}
@@ -918,11 +893,14 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
goto out_unlock;
break;
case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA:
- if (vhca_buf_header->allocated_length < migf->record_size) {
+ {
+ u32 npages = DIV_ROUND_UP(migf->record_size, PAGE_SIZE);
+
+ if (vhca_buf_header->npages < npages) {
mlx5vf_free_data_buffer(vhca_buf_header);
- migf->buf_header[0] = mlx5vf_alloc_data_buffer(migf,
- migf->record_size, DMA_NONE);
+ migf->buf_header[0] = mlx5vf_alloc_data_buffer(
+ migf, npages, DMA_NONE);
if (IS_ERR(migf->buf_header[0])) {
ret = PTR_ERR(migf->buf_header[0]);
migf->buf_header[0] = NULL;
@@ -935,6 +913,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
vhca_buf_header->start_pos = migf->max_pos;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA;
break;
+ }
case MLX5_VF_LOAD_STATE_READ_HEADER_DATA:
ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header,
&buf, &len, pos, &done);
@@ -945,12 +924,13 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
{
u64 size = max(migf->record_size,
migf->stop_copy_prep_size);
+ u32 npages = DIV_ROUND_UP(size, PAGE_SIZE);
- if (vhca_buf->allocated_length < size) {
+ if (vhca_buf->npages < npages) {
mlx5vf_free_data_buffer(vhca_buf);
- migf->buf[0] = mlx5vf_alloc_data_buffer(migf,
- size, DMA_TO_DEVICE);
+ migf->buf[0] = mlx5vf_alloc_data_buffer(
+ migf, npages, DMA_TO_DEVICE);
if (IS_ERR(migf->buf[0])) {
ret = PTR_ERR(migf->buf[0]);
migf->buf[0] = NULL;
@@ -1000,7 +980,6 @@ static const struct file_operations mlx5vf_resume_fops = {
.owner = THIS_MODULE,
.write = mlx5vf_resume_write,
.release = mlx5vf_release_file,
- .llseek = no_llseek,
};
static struct mlx5_vf_migration_file *
@@ -1018,13 +997,19 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
O_WRONLY);
if (IS_ERR(migf->filp)) {
ret = PTR_ERR(migf->filp);
- goto end;
+ kfree(migf);
+ return ERR_PTR(ret);
}
+ stream_open(migf->filp->f_inode, migf->filp);
+ mutex_init(&migf->lock);
+ INIT_LIST_HEAD(&migf->buf_list);
+ INIT_LIST_HEAD(&migf->avail_list);
+ spin_lock_init(&migf->list_lock);
migf->mvdev = mvdev;
ret = mlx5vf_cmd_alloc_pd(migf);
if (ret)
- goto out_free;
+ goto out;
buf = mlx5vf_alloc_data_buffer(migf, 0, DMA_TO_DEVICE);
if (IS_ERR(buf)) {
@@ -1033,8 +1018,11 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
}
migf->buf[0] = buf;
- buf = mlx5vf_alloc_data_buffer(migf,
- sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+ buf = mlx5vf_alloc_data_buffer(
+ migf,
+ DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
+ PAGE_SIZE),
+ DMA_NONE);
if (IS_ERR(buf)) {
ret = PTR_ERR(buf);
goto out_buf;
@@ -1043,20 +1031,13 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
migf->buf_header[0] = buf;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
- stream_open(migf->filp->f_inode, migf->filp);
- mutex_init(&migf->lock);
- INIT_LIST_HEAD(&migf->buf_list);
- INIT_LIST_HEAD(&migf->avail_list);
- spin_lock_init(&migf->list_lock);
return migf;
out_buf:
mlx5vf_free_data_buffer(migf->buf[0]);
out_pd:
mlx5vf_cmd_dealloc_pd(migf);
-out_free:
+out:
fput(migf->filp);
-end:
- kfree(migf);
return ERR_PTR(ret);
}
@@ -1151,7 +1132,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP);
if (ret)
return ERR_PTR(ret);
- buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE);
+ buf = mlx5vf_get_data_buffer(migf,
+ DIV_ROUND_UP(size, PAGE_SIZE), DMA_FROM_DEVICE);
if (IS_ERR(buf))
return ERR_CAST(buf);
/* pre_copy cleanup */
@@ -1449,7 +1431,7 @@ static struct pci_driver mlx5vf_pci_driver = {
module_pci_driver(mlx5vf_pci_driver);
-MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS("IOMMUFD");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Max Gurtovoy <mgurtovoy@nvidia.com>");
MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");