diff options
Diffstat (limited to 'drivers/infiniband/hw/efa/efa_main.c')
| -rw-r--r-- | drivers/infiniband/hw/efa/efa_main.c | 277 |
1 files changed, 218 insertions, 59 deletions
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 203e6ddcacbc..6c415b9adb5f 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/module.h> @@ -9,15 +9,20 @@ #include <linux/version.h> #include <rdma/ib_user_verbs.h> +#include <rdma/uverbs_ioctl.h> #include "efa.h" #define PCI_DEV_ID_EFA0_VF 0xefa0 #define PCI_DEV_ID_EFA1_VF 0xefa1 +#define PCI_DEV_ID_EFA2_VF 0xefa2 +#define PCI_DEV_ID_EFA3_VF 0xefa3 static const struct pci_device_id efa_pci_tbl[] = { { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA3_VF) }, { } }; @@ -34,6 +39,8 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) +extern const struct uapi_definition efa_uapi_defs[]; + /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -67,6 +74,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask) pci_release_selected_regions(pdev, release_bars); } +static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) +{ + u16 cqn = eqe->u.comp_event.cqn; + struct efa_cq *cq; + + /* Safe to load as we're in irq and removal calls synchronize_irq() */ + cq = xa_load(&dev->cqs_xa, cqn); + if (unlikely(!cq)) { + ibdev_err_ratelimited(&dev->ibdev, + "Completion event on non-existent CQ[%u]", + cqn); + return; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) +{ + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); + + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) + efa_process_comp_eqe(dev, eqe); + else + ibdev_err_ratelimited(&dev->ibdev, + "Unknown event type received %lu", + EFA_GET(&eqe->common, + EFA_ADMIN_EQE_EVENT_TYPE)); +} + +static irqreturn_t efa_intr_msix_comp(int irq, void *data) +{ + struct efa_eq *eq = data; + struct efa_com_dev *edev = eq->eeq.edev; + + efa_com_eq_comp_intr_handler(edev, &eq->eeq); + + return IRQ_HANDLED; +} + static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) { struct efa_dev *dev = data; @@ -77,27 +125,42 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) return IRQ_HANDLED; } -static int efa_request_mgmnt_irq(struct efa_dev *dev) +static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) { - struct efa_irq *irq; int err; - irq = &dev->admin_irq; - err = request_irq(irq->vector, irq->handler, 0, irq->name, - irq->data); + err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); if (err) { - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", - err); + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", + irq->name, err); return err; } - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", - nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector); - irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); return 0; } +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector) +{ + u32 cpu; + + cpu = vector - EFA_COMP_EQS_VEC_BASE; + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, + pci_name(dev->pdev)); + eq->irq.handler = efa_intr_msix_comp; + eq->irq.data = eq; + eq->irq.vector = vector; + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); +} + +static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) +{ + irq_set_affinity_hint(irq->irqn, NULL); + free_irq(irq->irqn, irq->data); +} + static void efa_setup_mgmnt_irq(struct efa_dev *dev) { u32 cpu; @@ -106,47 +169,45 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) "efa-mgmnt@pci:%s", pci_name(dev->pdev)); dev->admin_irq.handler = efa_intr_msix_mgmnt; dev->admin_irq.data = dev; - dev->admin_irq.vector = - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + dev->admin_irq.vector = dev->admin_msix_vector_idx; + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, + dev->admin_msix_vector_idx); cpu = cpumask_first(cpu_online_mask); - dev->admin_irq.cpu = cpu; cpumask_set_cpu(cpu, &dev->admin_irq.affinity_hint_mask); - dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n", - &dev->admin_irq, - dev->admin_irq.vector, + dev_info(&dev->pdev->dev, "Setup irq:%d name:%s\n", + dev->admin_irq.irqn, dev->admin_irq.name); } -static void efa_free_mgmnt_irq(struct efa_dev *dev) -{ - struct efa_irq *irq; - - irq = &dev->admin_irq; - irq_set_affinity_hint(irq->vector, NULL); - free_irq(irq->vector, irq->data); -} - static int efa_set_mgmnt_irq(struct efa_dev *dev) { efa_setup_mgmnt_irq(dev); - return efa_request_mgmnt_irq(dev); + return efa_request_irq(dev, &dev->admin_irq); } static int efa_request_doorbell_bar(struct efa_dev *dev) { u8 db_bar_idx = dev->dev_attr.db_bar; struct pci_dev *pdev = dev->pdev; - int bars; + int pci_mem_bars; + int db_bar; int err; - if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) { - bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx); + db_bar = BIT(db_bar_idx); + if (!(db_bar & EFA_BASE_BAR_MASK)) { + pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM); + if (db_bar & ~pci_mem_bars) { + dev_err(&pdev->dev, + "Doorbells BAR unavailable. Requested %#x, available %#x\n", + db_bar, pci_mem_bars); + return -ENODEV; + } - err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + err = pci_request_selected_regions(pdev, db_bar, DRV_MODULE_NAME); if (err) { - dev_err(&dev->pdev->dev, + dev_err(&pdev->dev, "pci_request_selected_regions for bar %d failed %d\n", db_bar_idx, err); return err; @@ -237,6 +298,70 @@ static void efa_set_host_info(struct efa_dev *dev) dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); } +static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) +{ + efa_com_eq_destroy(&dev->edev, &eq->eeq); + efa_free_irq(dev, &eq->irq); +} + +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec) +{ + int err; + + efa_setup_comp_irq(dev, eq, msix_vec); + err = efa_request_irq(dev, &eq->irq); + if (err) + return err; + + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, + dev->dev_attr.max_eq_depth, msix_vec); + if (err) + goto err_free_comp_irq; + + return 0; + +err_free_comp_irq: + efa_free_irq(dev, &eq->irq); + return err; +} + +static int efa_create_eqs(struct efa_dev *dev) +{ + u32 neqs = dev->dev_attr.max_eq; + int err, i; + + neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE); + dev->neqs = neqs; + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); + if (!dev->eqs) + return -ENOMEM; + + for (i = 0; i < neqs; i++) { + err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE); + if (err) + goto err_destroy_eqs; + } + + return 0; + +err_destroy_eqs: + for (i--; i >= 0; i--) + efa_destroy_eq(dev, &dev->eqs[i]); + kfree(dev->eqs); + + return err; +} + +static void efa_destroy_eqs(struct efa_dev *dev) +{ + int i; + + for (i = 0; i < dev->neqs; i++) + efa_destroy_eq(dev, &dev->eqs[i]); + + kfree(dev->eqs); +} + static const struct ib_device_ops efa_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_EFA, @@ -247,6 +372,7 @@ static const struct ib_device_ops efa_dev_ops = { .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, .create_cq = efa_create_cq, + .create_cq_umem = efa_create_cq_umem, .create_qp = efa_create_qp, .create_user_ah = efa_create_ah, .dealloc_pd = efa_dealloc_pd, @@ -267,10 +393,12 @@ static const struct ib_device_ops efa_dev_ops = { .query_port = efa_query_port, .query_qp = efa_query_qp, .reg_user_mr = efa_reg_mr, + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, efa_qp, ibqp), INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext), }; @@ -302,23 +430,32 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; + err = efa_create_eqs(dev); + if (err) + goto err_release_doorbell_bar; + efa_set_host_info(dev); dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; + dev->ibdev.node_guid = dev->dev_attr.guid; dev->ibdev.phys_port_cnt = 1; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; dev->ibdev.dev.parent = &pdev->dev; ib_set_device_ops(&dev->ibdev, &efa_dev_ops); + dev->ibdev.driver_def = efa_uapi_defs; + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) - goto err_release_doorbell_bar; + goto err_destroy_eqs; ibdev_info(&dev->ibdev, "IB device registered\n"); return 0; +err_destroy_eqs: + efa_destroy_eqs(dev); err_release_doorbell_bar: efa_release_doorbell_bar(dev); return err; @@ -326,9 +463,9 @@ err_release_doorbell_bar: static void efa_ib_device_remove(struct efa_dev *dev) { - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); + efa_destroy_eqs(dev); efa_release_doorbell_bar(dev); } @@ -339,29 +476,30 @@ static void efa_disable_msix(struct efa_dev *dev) static int efa_enable_msix(struct efa_dev *dev) { - int msix_vecs, irq_num; - - /* Reserve the max msix vectors we might need */ - msix_vecs = EFA_NUM_MSIX_VEC; + int max_vecs, num_vecs; + + /* + * Reserve the max msix vectors we might need, one vector is reserved + * for admin. + */ + max_vecs = min_t(int, pci_msix_vec_count(dev->pdev), + num_online_cpus() + 1); dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", - msix_vecs); + max_vecs); dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX; - irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs, - msix_vecs, PCI_IRQ_MSIX); + num_vecs = pci_alloc_irq_vectors(dev->pdev, 1, + max_vecs, PCI_IRQ_MSIX); - if (irq_num < 0) { - dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n", - irq_num); + if (num_vecs < 0) { + dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n", + num_vecs); return -ENOSPC; } - if (irq_num != msix_vecs) { - dev_err(&dev->pdev->dev, - "Allocated %d MSI-X (out of %d requested)\n", - irq_num, msix_vecs); - return -ENOSPC; - } + dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs); + + dev->num_irq_vectors = num_vecs; return 0; } @@ -399,7 +537,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) { struct efa_com_dev *edev; struct efa_dev *dev; - int bars; + int pci_mem_bars; int err; err = pci_enable_device_mem(pdev); @@ -422,9 +560,16 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) edev->efa_dev = dev; edev->dmadev = &pdev->dev; dev->pdev = pdev; + xa_init(&dev->cqs_xa); - bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; - err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM); + if (EFA_BASE_BAR_MASK & ~pci_mem_bars) { + dev_err(&pdev->dev, "BARs unavailable. Requested %#x, available %#x\n", + (int)EFA_BASE_BAR_MASK, pci_mem_bars); + err = -ENODEV; + goto err_ibdev_destroy; + } + err = pci_request_selected_regions(pdev, EFA_BASE_BAR_MASK, DRV_MODULE_NAME); if (err) { dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", err); @@ -477,7 +622,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) return dev; err_free_mgmnt_irq: - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); err_disable_msix: efa_disable_msix(dev); err_reg_read_destroy: @@ -493,18 +638,21 @@ err_disable_device: return ERR_PTR(err); } -static void efa_remove_device(struct pci_dev *pdev) +static void efa_remove_device(struct pci_dev *pdev, + enum efa_regs_reset_reason_types reset_reason) { struct efa_dev *dev = pci_get_drvdata(pdev); struct efa_com_dev *edev; edev = &dev->edev; + efa_com_dev_reset(edev, reset_reason); efa_com_admin_destroy(edev); - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); efa_com_mmio_reg_read_destroy(edev); devm_iounmap(&pdev->dev, edev->reg_bar); efa_release_bars(dev, EFA_BASE_BAR_MASK); + xa_destroy(&dev->cqs_xa); ib_dealloc_device(&dev->ibdev); pci_disable_device(pdev); } @@ -525,7 +673,7 @@ static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_remove_device: - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR); return err; } @@ -534,7 +682,17 @@ static void efa_remove(struct pci_dev *pdev) struct efa_dev *dev = pci_get_drvdata(pdev); efa_ib_device_remove(dev); - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_NORMAL); +} + +static void efa_shutdown(struct pci_dev *pdev) +{ + struct efa_dev *dev = pci_get_drvdata(pdev); + + efa_destroy_eqs(dev); + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_SHUTDOWN); + efa_free_irq(dev, &dev->admin_irq); + efa_disable_msix(dev); } static struct pci_driver efa_pci_driver = { @@ -542,6 +700,7 @@ static struct pci_driver efa_pci_driver = { .id_table = efa_pci_tbl, .probe = efa_probe, .remove = efa_remove, + .shutdown = efa_shutdown, }; module_pci_driver(efa_pci_driver); |
