From 140eb5227767c6754742020a16d2691222b9c19b Mon Sep 17 00:00:00 2001 From: Doug Meyer Date: Tue, 17 Oct 2017 13:22:53 -0700 Subject: NTB: ntb_hw_switchtec: Fix peer BAR bug in switchtec_ntb_init_shared_mw This resolves a bug which may incorrectly configure the peer host's LUT for shared memory window access. The code was using the local host's first BAR number, rather than the peer hosts's first BAR number, to determine what peer NT control register to program. The bug will cause the Switchtec NTB link to work only if both peers have the same first NTB BAR configured. In all other configurations, the link will not come up, failing silently. When both hosts have the same first BAR, the configuration works only because the first BAR numbers happent to be the same. When the hosts do not have the same first BAR, then the LUT translation will not be configured in the correct peer LUT and will not give the peer the shared memory window access required for the link to operate. Signed-off-by: Doug Meyer Reviewed-by: Logan Gunthorpe Fixes: 678784a44ae8 ("NTB: switchtec_ntb: Initialize hardware for memory windows") Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index afe8ed6f3b23..ca0334a6b759 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -964,7 +964,8 @@ static void switchtec_ntb_init_shared(struct switchtec_ntb *sndev) static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) { struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl; - int bar = sndev->direct_mw_to_bar[0]; + int self_bar = sndev->direct_mw_to_bar[0]; + int peer_bar = sndev->peer_direct_mw_to_bar[0]; u32 ctl_val; int rc; @@ -985,12 +986,12 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) if (rc) goto unalloc_and_exit; - ctl_val = ioread32(&ctl->bar_entry[bar].ctl); + ctl_val = ioread32(&ctl->bar_entry[peer_bar].ctl); ctl_val &= 0xFF; ctl_val |= NTB_CTRL_BAR_LUT_WIN_EN; ctl_val |= ilog2(LUT_SIZE) << 8; ctl_val |= (sndev->nr_lut_mw - 1) << 14; - iowrite32(ctl_val, &ctl->bar_entry[bar].ctl); + iowrite32(ctl_val, &ctl->bar_entry[peer_bar].ctl); iowrite64((NTB_CTRL_LUT_EN | (sndev->self_partition << 1) | sndev->self_shared_dma), @@ -1009,7 +1010,7 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) goto unalloc_and_exit; } - sndev->peer_shared = pci_iomap(sndev->stdev->pdev, bar, LUT_SIZE); + sndev->peer_shared = pci_iomap(sndev->stdev->pdev, self_bar, LUT_SIZE); if (!sndev->peer_shared) { rc = -ENOMEM; goto unalloc_and_exit; -- cgit From 0ed08f829b10531c35887fd781d80ef3bfbb1cd9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Nov 2017 15:20:38 +0100 Subject: ntb: remove unneeded DRIVER_LICENSE #defines There is no need to #define the license of the driver, just put it in the MODULE_LICENSE() line directly as a text string. This allows tools that check that the module license matches the source code license to work properly, as there is no need to unwind the unneeded dereference, especially when the string is defined just a few lines above the usage of it. Reported-and-reviewed-by: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Cc: Dave Jiang Cc: Allen Hubbe Cc: Gary R Hook Cc: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/ntb.c | 3 +-- drivers/ntb/test/ntb_perf.c | 3 +-- drivers/ntb/test/ntb_pingpong.c | 3 +-- drivers/ntb/test/ntb_tool.c | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c index 03b80d89b980..bdcd59b13c1f 100644 --- a/drivers/ntb/ntb.c +++ b/drivers/ntb/ntb.c @@ -63,12 +63,11 @@ #define DRIVER_NAME "ntb" #define DRIVER_DESCRIPTION "PCIe NTB Driver Framework" -#define DRIVER_LICENSE "Dual BSD/GPL" #define DRIVER_VERSION "1.0" #define DRIVER_RELDATE "24 March 2015" #define DRIVER_AUTHOR "Allen Hubbe " -MODULE_LICENSE(DRIVER_LICENSE); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 427112cf101a..6f6c602d04af 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -63,7 +63,6 @@ #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" -#define DRIVER_LICENSE "Dual BSD/GPL" #define DRIVER_VERSION "1.0" #define DRIVER_AUTHOR "Dave Jiang " @@ -78,7 +77,7 @@ #define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ #define PIDX NTB_DEF_PEER_IDX -MODULE_LICENSE(DRIVER_LICENSE); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 3f5a92bae6f8..e700873e03fb 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -68,12 +68,11 @@ #define DRIVER_NAME "ntb_pingpong" #define DRIVER_DESCRIPTION "PCIe NTB Simple Pingpong Client" -#define DRIVER_LICENSE "Dual BSD/GPL" #define DRIVER_VERSION "1.0" #define DRIVER_RELDATE "24 March 2015" #define DRIVER_AUTHOR "Allen Hubbe " -MODULE_LICENSE(DRIVER_LICENSE); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 91526a986caa..e490bbc8726c 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -109,12 +109,11 @@ #define DRIVER_NAME "ntb_tool" #define DRIVER_DESCRIPTION "PCIe NTB Debugging Tool" -#define DRIVER_LICENSE "Dual BSD/GPL" #define DRIVER_VERSION "1.0" #define DRIVER_RELDATE "22 April 2015" #define DRIVER_AUTHOR "Allen Hubbe " -MODULE_LICENSE(DRIVER_LICENSE); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); -- cgit From 3f7756728ef4b0155e4f42d6b8a862dd7c38a9c2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 20 Nov 2017 10:24:08 -0700 Subject: ntb: remove Intel Atom NTB driver support Removing dead code since this is not being used. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 309 +----------------------------------- drivers/ntb/hw/intel/ntb_hw_intel.h | 58 ------- 2 files changed, 4 insertions(+), 363 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 4de074a86073..341a3d5baa3f 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -74,12 +74,6 @@ MODULE_AUTHOR("Intel Corporation"); #define bar0_off(base, bar) ((base) + ((bar) << 2)) #define bar2_off(base, bar) bar0_off(base, (bar) - 2) -static const struct intel_ntb_reg atom_reg; -static const struct intel_ntb_alt_reg atom_pri_reg; -static const struct intel_ntb_alt_reg atom_sec_reg; -static const struct intel_ntb_alt_reg atom_b2b_reg; -static const struct intel_ntb_xlat_reg atom_pri_xlat; -static const struct intel_ntb_xlat_reg atom_sec_xlat; static const struct intel_ntb_reg xeon_reg; static const struct intel_ntb_alt_reg xeon_pri_reg; static const struct intel_ntb_alt_reg xeon_sec_reg; @@ -184,15 +178,6 @@ static inline void _iowrite64(u64 val, void __iomem *mmio) #endif #endif -static inline int pdev_is_atom(struct pci_dev *pdev) -{ - switch (pdev->device) { - case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD: - return 1; - } - return 0; -} - static inline int pdev_is_xeon(struct pci_dev *pdev) { switch (pdev->device) { @@ -1006,8 +991,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, { struct intel_ntb_dev *ndev = filp->private_data; - if (pdev_is_xeon(ndev->ntb.pdev) || - pdev_is_atom(ndev->ntb.pdev)) + if (pdev_is_xeon(ndev->ntb.pdev)) return ndev_ntb_debugfs_read(filp, ubuf, count, offp); else if (pdev_is_skx_xeon(ndev->ntb.pdev)) return ndev_ntb3_debugfs_read(filp, ubuf, count, offp); @@ -1439,242 +1423,6 @@ static int intel_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx, ndev->peer_reg->spad); } -/* ATOM */ - -static u64 atom_db_ioread(void __iomem *mmio) -{ - return ioread64(mmio); -} - -static void atom_db_iowrite(u64 bits, void __iomem *mmio) -{ - iowrite64(bits, mmio); -} - -static int atom_poll_link(struct intel_ntb_dev *ndev) -{ - u32 ntb_ctl; - - ntb_ctl = ioread32(ndev->self_mmio + ATOM_NTBCNTL_OFFSET); - - if (ntb_ctl == ndev->ntb_ctl) - return 0; - - ndev->ntb_ctl = ntb_ctl; - - ndev->lnk_sta = ioread32(ndev->self_mmio + ATOM_LINK_STATUS_OFFSET); - - return 1; -} - -static int atom_link_is_up(struct intel_ntb_dev *ndev) -{ - return ATOM_NTB_CTL_ACTIVE(ndev->ntb_ctl); -} - -static int atom_link_is_err(struct intel_ntb_dev *ndev) -{ - if (ioread32(ndev->self_mmio + ATOM_LTSSMSTATEJMP_OFFSET) - & ATOM_LTSSMSTATEJMP_FORCEDETECT) - return 1; - - if (ioread32(ndev->self_mmio + ATOM_IBSTERRRCRVSTS0_OFFSET) - & ATOM_IBIST_ERR_OFLOW) - return 1; - - return 0; -} - -static inline enum ntb_topo atom_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) -{ - struct device *dev = &ndev->ntb.pdev->dev; - - switch (ppd & ATOM_PPD_TOPO_MASK) { - case ATOM_PPD_TOPO_B2B_USD: - dev_dbg(dev, "PPD %d B2B USD\n", ppd); - return NTB_TOPO_B2B_USD; - - case ATOM_PPD_TOPO_B2B_DSD: - dev_dbg(dev, "PPD %d B2B DSD\n", ppd); - return NTB_TOPO_B2B_DSD; - - case ATOM_PPD_TOPO_PRI_USD: - case ATOM_PPD_TOPO_PRI_DSD: /* accept bogus PRI_DSD */ - case ATOM_PPD_TOPO_SEC_USD: - case ATOM_PPD_TOPO_SEC_DSD: /* accept bogus SEC_DSD */ - dev_dbg(dev, "PPD %d non B2B disabled\n", ppd); - return NTB_TOPO_NONE; - } - - dev_dbg(dev, "PPD %d invalid\n", ppd); - return NTB_TOPO_NONE; -} - -static void atom_link_hb(struct work_struct *work) -{ - struct intel_ntb_dev *ndev = hb_ndev(work); - struct device *dev = &ndev->ntb.pdev->dev; - unsigned long poll_ts; - void __iomem *mmio; - u32 status32; - - poll_ts = ndev->last_ts + ATOM_LINK_HB_TIMEOUT; - - /* Delay polling the link status if an interrupt was received, - * unless the cached link status says the link is down. - */ - if (time_after(poll_ts, jiffies) && atom_link_is_up(ndev)) { - schedule_delayed_work(&ndev->hb_timer, poll_ts - jiffies); - return; - } - - if (atom_poll_link(ndev)) - ntb_link_event(&ndev->ntb); - - if (atom_link_is_up(ndev) || !atom_link_is_err(ndev)) { - schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_HB_TIMEOUT); - return; - } - - /* Link is down with error: recover the link! */ - - mmio = ndev->self_mmio; - - /* Driver resets the NTB ModPhy lanes - magic! */ - iowrite8(0xe0, mmio + ATOM_MODPHY_PCSREG6); - iowrite8(0x40, mmio + ATOM_MODPHY_PCSREG4); - iowrite8(0x60, mmio + ATOM_MODPHY_PCSREG4); - iowrite8(0x60, mmio + ATOM_MODPHY_PCSREG6); - - /* Driver waits 100ms to allow the NTB ModPhy to settle */ - msleep(100); - - /* Clear AER Errors, write to clear */ - status32 = ioread32(mmio + ATOM_ERRCORSTS_OFFSET); - dev_dbg(dev, "ERRCORSTS = %x\n", status32); - status32 &= PCI_ERR_COR_REP_ROLL; - iowrite32(status32, mmio + ATOM_ERRCORSTS_OFFSET); - - /* Clear unexpected electrical idle event in LTSSM, write to clear */ - status32 = ioread32(mmio + ATOM_LTSSMERRSTS0_OFFSET); - dev_dbg(dev, "LTSSMERRSTS0 = %x\n", status32); - status32 |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI; - iowrite32(status32, mmio + ATOM_LTSSMERRSTS0_OFFSET); - - /* Clear DeSkew Buffer error, write to clear */ - status32 = ioread32(mmio + ATOM_DESKEWSTS_OFFSET); - dev_dbg(dev, "DESKEWSTS = %x\n", status32); - status32 |= ATOM_DESKEWSTS_DBERR; - iowrite32(status32, mmio + ATOM_DESKEWSTS_OFFSET); - - status32 = ioread32(mmio + ATOM_IBSTERRRCRVSTS0_OFFSET); - dev_dbg(dev, "IBSTERRRCRVSTS0 = %x\n", status32); - status32 &= ATOM_IBIST_ERR_OFLOW; - iowrite32(status32, mmio + ATOM_IBSTERRRCRVSTS0_OFFSET); - - /* Releases the NTB state machine to allow the link to retrain */ - status32 = ioread32(mmio + ATOM_LTSSMSTATEJMP_OFFSET); - dev_dbg(dev, "LTSSMSTATEJMP = %x\n", status32); - status32 &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT; - iowrite32(status32, mmio + ATOM_LTSSMSTATEJMP_OFFSET); - - /* There is a potential race between the 2 NTB devices recovering at the - * same time. If the times are the same, the link will not recover and - * the driver will be stuck in this loop forever. Add a random interval - * to the recovery time to prevent this race. - */ - schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_RECOVERY_TIME - + prandom_u32() % ATOM_LINK_RECOVERY_TIME); -} - -static int atom_init_isr(struct intel_ntb_dev *ndev) -{ - int rc; - - rc = ndev_init_isr(ndev, 1, ATOM_DB_MSIX_VECTOR_COUNT, - ATOM_DB_MSIX_VECTOR_SHIFT, ATOM_DB_TOTAL_SHIFT); - if (rc) - return rc; - - /* ATOM doesn't have link status interrupt, poll on that platform */ - ndev->last_ts = jiffies; - INIT_DELAYED_WORK(&ndev->hb_timer, atom_link_hb); - schedule_delayed_work(&ndev->hb_timer, ATOM_LINK_HB_TIMEOUT); - - return 0; -} - -static void atom_deinit_isr(struct intel_ntb_dev *ndev) -{ - cancel_delayed_work_sync(&ndev->hb_timer); - ndev_deinit_isr(ndev); -} - -static int atom_init_ntb(struct intel_ntb_dev *ndev) -{ - ndev->mw_count = ATOM_MW_COUNT; - ndev->spad_count = ATOM_SPAD_COUNT; - ndev->db_count = ATOM_DB_COUNT; - - switch (ndev->ntb.topo) { - case NTB_TOPO_B2B_USD: - case NTB_TOPO_B2B_DSD: - ndev->self_reg = &atom_pri_reg; - ndev->peer_reg = &atom_b2b_reg; - ndev->xlat_reg = &atom_sec_xlat; - - /* Enable Bus Master and Memory Space on the secondary side */ - iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, - ndev->self_mmio + ATOM_SPCICMD_OFFSET); - - break; - - default: - return -EINVAL; - } - - ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; - - return 0; -} - -static int atom_init_dev(struct intel_ntb_dev *ndev) -{ - u32 ppd; - int rc; - - rc = pci_read_config_dword(ndev->ntb.pdev, ATOM_PPD_OFFSET, &ppd); - if (rc) - return -EIO; - - ndev->ntb.topo = atom_ppd_topo(ndev, ppd); - if (ndev->ntb.topo == NTB_TOPO_NONE) - return -EINVAL; - - rc = atom_init_ntb(ndev); - if (rc) - return rc; - - rc = atom_init_isr(ndev); - if (rc) - return rc; - - if (ndev->ntb.topo != NTB_TOPO_SEC) { - /* Initiate PCI-E link training */ - rc = pci_write_config_dword(ndev->ntb.pdev, ATOM_PPD_OFFSET, - ppd | ATOM_PPD_INIT_LINK); - if (rc) - return rc; - } - - return 0; -} - -static void atom_deinit_dev(struct intel_ntb_dev *ndev) -{ - atom_deinit_isr(ndev); -} - /* Skylake Xeon NTB */ static int skx_poll_link(struct intel_ntb_dev *ndev) @@ -2658,24 +2406,7 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, node = dev_to_node(&pdev->dev); - if (pdev_is_atom(pdev)) { - ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); - if (!ndev) { - rc = -ENOMEM; - goto err_ndev; - } - - ndev_init_struct(ndev, pdev); - - rc = intel_ntb_init_pci(ndev, pdev); - if (rc) - goto err_init_pci; - - rc = atom_init_dev(ndev); - if (rc) - goto err_init_dev; - - } else if (pdev_is_xeon(pdev)) { + if (pdev_is_xeon(pdev)) { ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); if (!ndev) { rc = -ENOMEM; @@ -2731,9 +2462,7 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, err_register: ndev_deinit_debugfs(ndev); - if (pdev_is_atom(pdev)) - atom_deinit_dev(ndev); - else if (pdev_is_xeon(pdev) || pdev_is_skx_xeon(pdev)) + if (pdev_is_xeon(pdev) || pdev_is_skx_xeon(pdev)) xeon_deinit_dev(ndev); err_init_dev: intel_ntb_deinit_pci(ndev); @@ -2749,41 +2478,12 @@ static void intel_ntb_pci_remove(struct pci_dev *pdev) ntb_unregister_device(&ndev->ntb); ndev_deinit_debugfs(ndev); - if (pdev_is_atom(pdev)) - atom_deinit_dev(ndev); - else if (pdev_is_xeon(pdev) || pdev_is_skx_xeon(pdev)) + if (pdev_is_xeon(pdev) || pdev_is_skx_xeon(pdev)) xeon_deinit_dev(ndev); intel_ntb_deinit_pci(ndev); kfree(ndev); } -static const struct intel_ntb_reg atom_reg = { - .poll_link = atom_poll_link, - .link_is_up = atom_link_is_up, - .db_ioread = atom_db_ioread, - .db_iowrite = atom_db_iowrite, - .db_size = sizeof(u64), - .ntb_ctl = ATOM_NTBCNTL_OFFSET, - .mw_bar = {2, 4}, -}; - -static const struct intel_ntb_alt_reg atom_pri_reg = { - .db_bell = ATOM_PDOORBELL_OFFSET, - .db_mask = ATOM_PDBMSK_OFFSET, - .spad = ATOM_SPAD_OFFSET, -}; - -static const struct intel_ntb_alt_reg atom_b2b_reg = { - .db_bell = ATOM_B2B_DOORBELL_OFFSET, - .spad = ATOM_B2B_SPAD_OFFSET, -}; - -static const struct intel_ntb_xlat_reg atom_sec_xlat = { - /* FIXME : .bar0_base = ATOM_SBAR0BASE_OFFSET, */ - /* FIXME : .bar2_limit = ATOM_SBAR2LMT_OFFSET, */ - .bar2_xlat = ATOM_SBAR2XLAT_OFFSET, -}; - static const struct intel_ntb_reg xeon_reg = { .poll_link = xeon_poll_link, .link_is_up = xeon_link_is_up, @@ -2940,7 +2640,6 @@ static const struct file_operations intel_ntb_debugfs_info = { }; static const struct pci_device_id intel_ntb_pci_tbl[] = { - {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index 2d6c38afb128..4415aa7ea775 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -66,7 +66,6 @@ #define PCI_DEVICE_ID_INTEL_NTB_B2B_HSX 0x2F0D #define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E #define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F -#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E #define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D #define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E #define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F @@ -196,63 +195,6 @@ #define SKX_DB_TOTAL_SHIFT 33 #define SKX_SPAD_COUNT 16 -/* Intel Atom hardware */ - -#define ATOM_SBAR2XLAT_OFFSET 0x0008 -#define ATOM_PDOORBELL_OFFSET 0x0020 -#define ATOM_PDBMSK_OFFSET 0x0028 -#define ATOM_NTBCNTL_OFFSET 0x0060 -#define ATOM_SPAD_OFFSET 0x0080 -#define ATOM_PPD_OFFSET 0x00d4 -#define ATOM_PBAR2XLAT_OFFSET 0x8008 -#define ATOM_B2B_DOORBELL_OFFSET 0x8020 -#define ATOM_B2B_SPAD_OFFSET 0x8080 -#define ATOM_SPCICMD_OFFSET 0xb004 -#define ATOM_LINK_STATUS_OFFSET 0xb052 -#define ATOM_ERRCORSTS_OFFSET 0xb110 -#define ATOM_IP_BASE 0xc000 -#define ATOM_DESKEWSTS_OFFSET (ATOM_IP_BASE + 0x3024) -#define ATOM_LTSSMERRSTS0_OFFSET (ATOM_IP_BASE + 0x3180) -#define ATOM_LTSSMSTATEJMP_OFFSET (ATOM_IP_BASE + 0x3040) -#define ATOM_IBSTERRRCRVSTS0_OFFSET (ATOM_IP_BASE + 0x3324) -#define ATOM_MODPHY_PCSREG4 0x1c004 -#define ATOM_MODPHY_PCSREG6 0x1c006 - -#define ATOM_PPD_INIT_LINK 0x0008 -#define ATOM_PPD_CONN_MASK 0x0300 -#define ATOM_PPD_CONN_TRANSPARENT 0x0000 -#define ATOM_PPD_CONN_B2B 0x0100 -#define ATOM_PPD_CONN_RP 0x0200 -#define ATOM_PPD_DEV_MASK 0x1000 -#define ATOM_PPD_DEV_USD 0x0000 -#define ATOM_PPD_DEV_DSD 0x1000 -#define ATOM_PPD_TOPO_MASK (ATOM_PPD_CONN_MASK | ATOM_PPD_DEV_MASK) -#define ATOM_PPD_TOPO_PRI_USD (ATOM_PPD_CONN_TRANSPARENT | ATOM_PPD_DEV_USD) -#define ATOM_PPD_TOPO_PRI_DSD (ATOM_PPD_CONN_TRANSPARENT | ATOM_PPD_DEV_DSD) -#define ATOM_PPD_TOPO_SEC_USD (ATOM_PPD_CONN_RP | ATOM_PPD_DEV_USD) -#define ATOM_PPD_TOPO_SEC_DSD (ATOM_PPD_CONN_RP | ATOM_PPD_DEV_DSD) -#define ATOM_PPD_TOPO_B2B_USD (ATOM_PPD_CONN_B2B | ATOM_PPD_DEV_USD) -#define ATOM_PPD_TOPO_B2B_DSD (ATOM_PPD_CONN_B2B | ATOM_PPD_DEV_DSD) - -#define ATOM_MW_COUNT 2 -#define ATOM_DB_COUNT 34 -#define ATOM_DB_VALID_MASK (BIT_ULL(ATOM_DB_COUNT) - 1) -#define ATOM_DB_MSIX_VECTOR_COUNT 34 -#define ATOM_DB_MSIX_VECTOR_SHIFT 1 -#define ATOM_DB_TOTAL_SHIFT 34 -#define ATOM_SPAD_COUNT 16 - -#define ATOM_NTB_CTL_DOWN_BIT BIT(16) -#define ATOM_NTB_CTL_ACTIVE(x) !(x & ATOM_NTB_CTL_DOWN_BIT) - -#define ATOM_DESKEWSTS_DBERR BIT(15) -#define ATOM_LTSSMERRSTS0_UNEXPECTEDEI BIT(20) -#define ATOM_LTSSMSTATEJMP_FORCEDETECT BIT(2) -#define ATOM_IBIST_ERR_OFLOW 0x7FFF7FFF - -#define ATOM_LINK_HB_TIMEOUT msecs_to_jiffies(1000) -#define ATOM_LINK_RECOVERY_TIME msecs_to_jiffies(500) - /* Ntb control and link status */ #define NTB_CTL_CFG_LOCK BIT(0) -- cgit From c5ec8b451a02674882c75f51073a4f0323b37550 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 21 Nov 2017 22:59:45 +0000 Subject: NTB: switchtec_ntb: fix spelling mistake: "peforming" -> "performing" Trivial fix to spelling mistake in dev_err error message Signed-off-by: Colin Ian King Reviewed-By: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index ca0334a6b759..c23b4e3f280f 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -172,7 +172,7 @@ static int switchtec_ntb_part_op(struct switchtec_ntb *sndev, if (ps == status) { dev_err(&sndev->stdev->dev, - "Timed out while peforming %s (%d). (%08x)", + "Timed out while performing %s (%d). (%08x)", op_text[op], op, ioread32(&ctl->partition_status)); -- cgit From 2dd0f6a64a36bf7176c6055c919da3f53b7924f3 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 27 Nov 2017 11:03:57 -0500 Subject: NTB: switchtec_ntb: Add new line on appropriate printks Trivial addition of "\n" to the dev_* prints where necessary Signed-off-by: Joe Perches Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index c23b4e3f280f..709f37fbe232 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -172,7 +172,7 @@ static int switchtec_ntb_part_op(struct switchtec_ntb *sndev, if (ps == status) { dev_err(&sndev->stdev->dev, - "Timed out while performing %s (%d). (%08x)", + "Timed out while performing %s (%d). (%08x)\n", op_text[op], op, ioread32(&ctl->partition_status)); @@ -306,7 +306,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (pidx != NTB_DEF_PEER_IDX) return -EINVAL; - dev_dbg(&sndev->stdev->dev, "MW %d: part %d addr %pad size %pap", + dev_dbg(&sndev->stdev->dev, "MW %d: part %d addr %pad size %pap\n", widx, pidx, &addr, &size); if (widx >= switchtec_ntb_mw_count(ntb, pidx)) @@ -337,7 +337,7 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (rc == -EIO) { dev_err(&sndev->stdev->dev, - "Hardware reported an error configuring mw %d: %08x", + "Hardware reported an error configuring mw %d: %08x\n", widx, ioread32(&ctl->bar_error)); if (widx < nr_direct_mw) @@ -491,7 +491,7 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev) if (link_sta != old) { switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_CHECK_LINK); ntb_link_event(&sndev->ntb); - dev_info(&sndev->stdev->dev, "ntb link %s", + dev_info(&sndev->stdev->dev, "ntb link %s\n", link_sta ? "up" : "down"); } } @@ -523,7 +523,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb, { struct switchtec_ntb *sndev = ntb_sndev(ntb); - dev_dbg(&sndev->stdev->dev, "enabling link"); + dev_dbg(&sndev->stdev->dev, "enabling link\n"); sndev->self_shared->link_sta = 1; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP); @@ -537,7 +537,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb) { struct switchtec_ntb *sndev = ntb_sndev(ntb); - dev_dbg(&sndev->stdev->dev, "disabling link"); + dev_dbg(&sndev->stdev->dev, "disabling link\n"); sndev->self_shared->link_sta = 0; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP); @@ -792,7 +792,7 @@ static void switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) part_map &= ~(1 << sndev->self_partition); sndev->peer_partition = ffs(part_map) - 1; - dev_dbg(&sndev->stdev->dev, "Partition ID %d of %d (%llx)", + dev_dbg(&sndev->stdev->dev, "Partition ID %d of %d (%llx)\n", sndev->self_partition, sndev->stdev->partition_count, part_map); @@ -829,7 +829,7 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev) sndev->nr_lut_mw = ioread16(&sndev->mmio_self_ctrl->lut_table_entries); sndev->nr_lut_mw = rounddown_pow_of_two(sndev->nr_lut_mw); - dev_dbg(&sndev->stdev->dev, "MWs: %d direct, %d lut", + dev_dbg(&sndev->stdev->dev, "MWs: %d direct, %d lut\n", sndev->nr_direct_mw, sndev->nr_lut_mw); sndev->peer_nr_direct_mw = map_bars(sndev->peer_direct_mw_to_bar, @@ -839,7 +839,7 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev) ioread16(&sndev->mmio_peer_ctrl->lut_table_entries); sndev->peer_nr_lut_mw = rounddown_pow_of_two(sndev->peer_nr_lut_mw); - dev_dbg(&sndev->stdev->dev, "Peer MWs: %d direct, %d lut", + dev_dbg(&sndev->stdev->dev, "Peer MWs: %d direct, %d lut\n", sndev->peer_nr_direct_mw, sndev->peer_nr_lut_mw); } @@ -897,7 +897,7 @@ static int switchtec_ntb_init_req_id_table(struct switchtec_ntb *sndev) if (ioread32(&sndev->mmio_self_ctrl->req_id_table_size) < 2) { dev_err(&sndev->stdev->dev, - "Not enough requester IDs available."); + "Not enough requester IDs available\n"); return -EFAULT; } @@ -928,7 +928,7 @@ static int switchtec_ntb_init_req_id_table(struct switchtec_ntb *sndev) if (rc == -EIO) { error = ioread32(&sndev->mmio_self_ctrl->req_id_error); dev_err(&sndev->stdev->dev, - "Error setting up the requester ID table: %08x", + "Error setting up the requester ID table: %08x\n", error); } @@ -975,7 +975,7 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) GFP_KERNEL); if (!sndev->self_shared) { dev_err(&sndev->stdev->dev, - "unable to allocate memory for shared mw"); + "unable to allocate memory for shared mw\n"); return -ENOMEM; } @@ -1005,7 +1005,7 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) bar_error = ioread32(&ctl->bar_error); lut_error = ioread32(&ctl->lut_error); dev_err(&sndev->stdev->dev, - "Error setting up shared MW: %08x / %08x", + "Error setting up shared MW: %08x / %08x\n", bar_error, lut_error); goto unalloc_and_exit; } @@ -1016,7 +1016,7 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) goto unalloc_and_exit; } - dev_dbg(&sndev->stdev->dev, "Shared MW Ready"); + dev_dbg(&sndev->stdev->dev, "Shared MW Ready\n"); return 0; unalloc_and_exit: @@ -1057,8 +1057,8 @@ static irqreturn_t switchtec_ntb_message_isr(int irq, void *dev) u64 msg = ioread64(&sndev->mmio_self_dbmsg->imsg[i]); if (msg & NTB_DBMSG_IMSG_STATUS) { - dev_dbg(&sndev->stdev->dev, "message: %d %08x\n", i, - (u32)msg); + dev_dbg(&sndev->stdev->dev, "message: %d %08x\n", + i, (u32)msg); iowrite8(1, &sndev->mmio_self_dbmsg->imsg[i].status); if (i == LINK_MESSAGE) @@ -1086,7 +1086,7 @@ static int switchtec_ntb_init_db_msg_irq(struct switchtec_ntb *sndev) message_irq == event_irq) message_irq++; - dev_dbg(&sndev->stdev->dev, "irqs - event: %d, db: %d, msgs: %d", + dev_dbg(&sndev->stdev->dev, "irqs - event: %d, db: %d, msgs: %d\n", event_irq, doorbell_irq, message_irq); for (i = 0; i < idb_vecs - 4; i++) @@ -1136,7 +1136,7 @@ static int switchtec_ntb_add(struct device *dev, return -ENODEV; if (stdev->partition_count != 2) - dev_warn(dev, "ntb driver only supports 2 partitions"); + dev_warn(dev, "ntb driver only supports 2 partitions\n"); sndev = kzalloc_node(sizeof(*sndev), GFP_KERNEL, dev_to_node(dev)); if (!sndev) @@ -1166,7 +1166,7 @@ static int switchtec_ntb_add(struct device *dev, stdev->sndev = sndev; stdev->link_notifier = switchtec_ntb_link_notification; - dev_info(dev, "NTB device registered"); + dev_info(dev, "NTB device registered\n"); return 0; @@ -1176,7 +1176,7 @@ deinit_shared_and_exit: switchtec_ntb_deinit_shared_mw(sndev); free_and_exit: kfree(sndev); - dev_err(dev, "failed to register ntb device: %d", rc); + dev_err(dev, "failed to register ntb device: %d\n", rc); return rc; } @@ -1195,7 +1195,7 @@ void switchtec_ntb_remove(struct device *dev, switchtec_ntb_deinit_db_msg_irq(sndev); switchtec_ntb_deinit_shared_mw(sndev); kfree(sndev); - dev_info(dev, "ntb device unregistered"); + dev_info(dev, "ntb device unregistered\n"); } static struct class_interface switchtec_interface = { -- cgit From 3df54c870f52b4c47b53eead8d22a109f741b91c Mon Sep 17 00:00:00 2001 From: Kelvin Cao Date: Wed, 29 Nov 2017 10:55:24 -0700 Subject: ntb_hw_switchtec: Allow using Switchtec NTB in multi-partition setups Allow using Switchtec NTB in setups that have more than two partitions. Note: this does not enable having multi-host communication, it only allows for a single NTB link between two hosts in a network that might have more than two. Use following logic to determine the NT peer partition: 1) If there are 2 partitions, and the target vector is set in the Switchtec configuration, use the partition specified in target vector. 2) If there are 2 partitions and target vector is unset use the only other partition as specified in the NT EP map. 3) If there are more than 2 partitions and target vector is set use the other partition specified in target vector. 4) If there are more than 2 partitions and target vector is unset, this is invalid and report an error. Signed-off-by: Kelvin Cao [logang@deltatee.com: commit message fleshed out] Signed-off-by: Logan Gunthorpe Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 57 ++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 709f37fbe232..088ae220ecb4 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -777,9 +777,12 @@ static const struct ntb_dev_ops switchtec_ntb_ops = { .peer_spad_addr = switchtec_ntb_peer_spad_addr, }; -static void switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) +static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) { + u64 tpart_vec; + int self; u64 part_map; + int bit; sndev->ntb.pdev = sndev->stdev->pdev; sndev->ntb.topo = NTB_TOPO_SWITCH; @@ -788,13 +791,47 @@ static void switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) sndev->self_partition = sndev->stdev->partition; sndev->mmio_ntb = sndev->stdev->mmio_ntb; + + self = sndev->self_partition; + tpart_vec = ioread32(&sndev->mmio_ntb->ntp_info[self].target_part_high); + tpart_vec <<= 32; + tpart_vec |= ioread32(&sndev->mmio_ntb->ntp_info[self].target_part_low); + part_map = ioread64(&sndev->mmio_ntb->ep_map); part_map &= ~(1 << sndev->self_partition); - sndev->peer_partition = ffs(part_map) - 1; - dev_dbg(&sndev->stdev->dev, "Partition ID %d of %d (%llx)\n", - sndev->self_partition, sndev->stdev->partition_count, - part_map); + if (!ffs(tpart_vec)) { + if (sndev->stdev->partition_count != 2) { + dev_err(&sndev->stdev->dev, + "ntb target partition not defined\n"); + return -ENODEV; + } + + bit = ffs(part_map); + if (!bit) { + dev_err(&sndev->stdev->dev, + "peer partition is not NT partition\n"); + return -ENODEV; + } + + sndev->peer_partition = bit - 1; + } else { + if (ffs(tpart_vec) != fls(tpart_vec)) { + dev_err(&sndev->stdev->dev, + "ntb driver only supports 1 pair of 1-1 ntb mapping\n"); + return -ENODEV; + } + + sndev->peer_partition = ffs(tpart_vec) - 1; + if (!(part_map && (1 << sndev->peer_partition))) { + dev_err(&sndev->stdev->dev, + "ntb target partition is not NT partition\n"); + return -ENODEV; + } + } + + dev_dbg(&sndev->stdev->dev, "Partition ID %d of %d\n", + sndev->self_partition, sndev->stdev->partition_count); sndev->mmio_ctrl = (void * __iomem)sndev->mmio_ntb + SWITCHTEC_NTB_REG_CTRL_OFFSET; @@ -804,6 +841,8 @@ static void switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) sndev->mmio_self_ctrl = &sndev->mmio_ctrl[sndev->self_partition]; sndev->mmio_peer_ctrl = &sndev->mmio_ctrl[sndev->peer_partition]; sndev->mmio_self_dbmsg = &sndev->mmio_dbmsg[sndev->self_partition]; + + return 0; } static int map_bars(int *map, struct ntb_ctrl_regs __iomem *ctrl) @@ -1135,15 +1174,15 @@ static int switchtec_ntb_add(struct device *dev, if (stdev->pdev->class != MICROSEMI_NTB_CLASSCODE) return -ENODEV; - if (stdev->partition_count != 2) - dev_warn(dev, "ntb driver only supports 2 partitions\n"); - sndev = kzalloc_node(sizeof(*sndev), GFP_KERNEL, dev_to_node(dev)); if (!sndev) return -ENOMEM; sndev->stdev = stdev; - switchtec_ntb_init_sndev(sndev); + rc = switchtec_ntb_init_sndev(sndev); + if (rc) + goto free_and_exit; + switchtec_ntb_init_mw(sndev); switchtec_ntb_init_db(sndev); switchtec_ntb_init_msgs(sndev); -- cgit From c3585cd8708edb1c16fa84f8f3dee31741a66a9e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:25 -0700 Subject: ntb_hw_switchtec: Keep track of the number of LUT windows used by the driver This is a prep patch in order to support the crosslink feature which will require the driver to use another reserved LUT window. To simplify this, we add some code to track the number of reserved LUT windows in use instead of assuming this is always 1. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 088ae220ecb4..51fec6497164 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -109,6 +109,7 @@ struct switchtec_ntb { int nr_direct_mw; int nr_lut_mw; + int nr_rsvd_luts; int direct_mw_to_bar[MAX_DIRECT_MW]; int peer_nr_direct_mw; @@ -197,7 +198,7 @@ static int switchtec_ntb_mw_count(struct ntb_dev *ntb, int pidx) { struct switchtec_ntb *sndev = ntb_sndev(ntb); int nr_direct_mw = sndev->peer_nr_direct_mw; - int nr_lut_mw = sndev->peer_nr_lut_mw - 1; + int nr_lut_mw = sndev->peer_nr_lut_mw - sndev->nr_rsvd_luts; if (pidx != NTB_DEF_PEER_IDX) return -EINVAL; @@ -210,12 +211,12 @@ static int switchtec_ntb_mw_count(struct ntb_dev *ntb, int pidx) static int lut_index(struct switchtec_ntb *sndev, int mw_idx) { - return mw_idx - sndev->nr_direct_mw + 1; + return mw_idx - sndev->nr_direct_mw + sndev->nr_rsvd_luts; } static int peer_lut_index(struct switchtec_ntb *sndev, int mw_idx) { - return mw_idx - sndev->peer_nr_direct_mw + 1; + return mw_idx - sndev->peer_nr_direct_mw + sndev->nr_rsvd_luts; } static int switchtec_ntb_mw_get_align(struct ntb_dev *ntb, int pidx, @@ -355,8 +356,9 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, static int switchtec_ntb_peer_mw_count(struct ntb_dev *ntb) { struct switchtec_ntb *sndev = ntb_sndev(ntb); + int nr_lut_mw = sndev->nr_lut_mw - sndev->nr_rsvd_luts; - return sndev->nr_direct_mw + (use_lut_mws ? sndev->nr_lut_mw - 1 : 0); + return sndev->nr_direct_mw + (use_lut_mws ? nr_lut_mw : 0); } static int switchtec_ntb_direct_get_addr(struct switchtec_ntb *sndev, @@ -1008,6 +1010,7 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) u32 ctl_val; int rc; + sndev->nr_rsvd_luts++; sndev->self_shared = dma_zalloc_coherent(&sndev->stdev->pdev->dev, LUT_SIZE, &sndev->self_shared_dma, @@ -1074,6 +1077,7 @@ static void switchtec_ntb_deinit_shared_mw(struct switchtec_ntb *sndev) dma_free_coherent(&sndev->stdev->pdev->dev, LUT_SIZE, sndev->self_shared, sndev->self_shared_dma); + sndev->nr_rsvd_luts--; } static irqreturn_t switchtec_ntb_doorbell_isr(int irq, void *dev) -- cgit From 12cb203b1b3e2a43d6e3f5f5c6e2071636334fc2 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:26 -0700 Subject: ntb_hw_switchtec: Create helper function to setup reserved LUT MWs This is a prep patch in order to support the crosslink feature which will require the driver to use another reserved LUT window. To simplify this we move the code which sets up the reserved LUT window into a helper function which will be used by the crosslink initialization. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 72 ++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 51fec6497164..b18e938312e1 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -847,6 +847,46 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) return 0; } +static int config_rsvd_lut_win(struct switchtec_ntb *sndev, + struct ntb_ctrl_regs __iomem *ctl, + int lut_idx, int partition, + dma_addr_t addr) +{ + int peer_bar = sndev->peer_direct_mw_to_bar[0]; + u32 ctl_val; + int rc; + + rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK, + NTB_CTRL_PART_STATUS_LOCKED); + if (rc) + return rc; + + ctl_val = ioread32(&ctl->bar_entry[peer_bar].ctl); + ctl_val &= 0xFF; + ctl_val |= NTB_CTRL_BAR_LUT_WIN_EN; + ctl_val |= ilog2(LUT_SIZE) << 8; + ctl_val |= (sndev->nr_lut_mw - 1) << 14; + iowrite32(ctl_val, &ctl->bar_entry[peer_bar].ctl); + + iowrite64((NTB_CTRL_LUT_EN | (partition << 1) | addr), + &ctl->lut_entry[lut_idx]); + + rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG, + NTB_CTRL_PART_STATUS_NORMAL); + if (rc) { + u32 bar_error, lut_error; + + bar_error = ioread32(&ctl->bar_error); + lut_error = ioread32(&ctl->lut_error); + dev_err(&sndev->stdev->dev, + "Error setting up reserved lut window: %08x / %08x\n", + bar_error, lut_error); + return rc; + } + + return 0; +} + static int map_bars(int *map, struct ntb_ctrl_regs __iomem *ctrl) { int i; @@ -1004,10 +1044,7 @@ static void switchtec_ntb_init_shared(struct switchtec_ntb *sndev) static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) { - struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_peer_ctrl; int self_bar = sndev->direct_mw_to_bar[0]; - int peer_bar = sndev->peer_direct_mw_to_bar[0]; - u32 ctl_val; int rc; sndev->nr_rsvd_luts++; @@ -1023,35 +1060,12 @@ static int switchtec_ntb_init_shared_mw(struct switchtec_ntb *sndev) switchtec_ntb_init_shared(sndev); - rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK, - NTB_CTRL_PART_STATUS_LOCKED); + rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0, + sndev->self_partition, + sndev->self_shared_dma); if (rc) goto unalloc_and_exit; - ctl_val = ioread32(&ctl->bar_entry[peer_bar].ctl); - ctl_val &= 0xFF; - ctl_val |= NTB_CTRL_BAR_LUT_WIN_EN; - ctl_val |= ilog2(LUT_SIZE) << 8; - ctl_val |= (sndev->nr_lut_mw - 1) << 14; - iowrite32(ctl_val, &ctl->bar_entry[peer_bar].ctl); - - iowrite64((NTB_CTRL_LUT_EN | (sndev->self_partition << 1) | - sndev->self_shared_dma), - &ctl->lut_entry[0]); - - rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG, - NTB_CTRL_PART_STATUS_NORMAL); - if (rc) { - u32 bar_error, lut_error; - - bar_error = ioread32(&ctl->bar_error); - lut_error = ioread32(&ctl->lut_error); - dev_err(&sndev->stdev->dev, - "Error setting up shared MW: %08x / %08x\n", - bar_error, lut_error); - goto unalloc_and_exit; - } - sndev->peer_shared = pci_iomap(sndev->stdev->pdev, self_bar, LUT_SIZE); if (!sndev->peer_shared) { rc = -ENOMEM; -- cgit From bbe35ca5aa2b9e7413c3b14c4887e05829bcd822 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:27 -0700 Subject: ntb_hw_switchtec: Make switchtec_ntb_init_req_id_table() more general This is a prep patch in order to support the crosslink feature which will require the driver to setup the requester ID table in another partition as well as it's own. To aid this, create a helper function which sets up the requester IDs from an array. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 92 +++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index b18e938312e1..4adc32fe035a 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -887,6 +887,55 @@ static int config_rsvd_lut_win(struct switchtec_ntb *sndev, return 0; } +static int config_req_id_table(struct switchtec_ntb *sndev, + struct ntb_ctrl_regs __iomem *mmio_ctrl, + int *req_ids, int count) +{ + int i, rc = 0; + u32 error; + u32 proxy_id; + + if (ioread32(&mmio_ctrl->req_id_table_size) < count) { + dev_err(&sndev->stdev->dev, + "Not enough requester IDs available.\n"); + return -EFAULT; + } + + rc = switchtec_ntb_part_op(sndev, mmio_ctrl, + NTB_CTRL_PART_OP_LOCK, + NTB_CTRL_PART_STATUS_LOCKED); + if (rc) + return rc; + + iowrite32(NTB_PART_CTRL_ID_PROT_DIS, + &mmio_ctrl->partition_ctrl); + + for (i = 0; i < count; i++) { + iowrite32(req_ids[i] << 16 | NTB_CTRL_REQ_ID_EN, + &mmio_ctrl->req_id_table[i]); + + proxy_id = ioread32(&mmio_ctrl->req_id_table[i]); + dev_dbg(&sndev->stdev->dev, + "Requester ID %02X:%02X.%X -> BB:%02X.%X\n", + req_ids[i] >> 8, (req_ids[i] >> 3) & 0x1F, + req_ids[i] & 0x7, (proxy_id >> 4) & 0x1F, + (proxy_id >> 1) & 0x7); + } + + rc = switchtec_ntb_part_op(sndev, mmio_ctrl, + NTB_CTRL_PART_OP_CFG, + NTB_CTRL_PART_STATUS_NORMAL); + + if (rc == -EIO) { + error = ioread32(&mmio_ctrl->req_id_error); + dev_err(&sndev->stdev->dev, + "Error setting up the requester ID table: %08x\n", + error); + } + + return 0; +} + static int map_bars(int *map, struct ntb_ctrl_regs __iomem *ctrl) { int i; @@ -968,52 +1017,23 @@ static void switchtec_ntb_init_msgs(struct switchtec_ntb *sndev) &sndev->mmio_self_dbmsg->imsg[i]); } -static int switchtec_ntb_init_req_id_table(struct switchtec_ntb *sndev) +static int +switchtec_ntb_init_req_id_table(struct switchtec_ntb *sndev) { - int rc = 0; - u16 req_id; - u32 error; - - req_id = ioread16(&sndev->mmio_ntb->requester_id); - - if (ioread32(&sndev->mmio_self_ctrl->req_id_table_size) < 2) { - dev_err(&sndev->stdev->dev, - "Not enough requester IDs available\n"); - return -EFAULT; - } - - rc = switchtec_ntb_part_op(sndev, sndev->mmio_self_ctrl, - NTB_CTRL_PART_OP_LOCK, - NTB_CTRL_PART_STATUS_LOCKED); - if (rc) - return rc; - - iowrite32(NTB_PART_CTRL_ID_PROT_DIS, - &sndev->mmio_self_ctrl->partition_ctrl); + int req_ids[2]; /* * Root Complex Requester ID (which is 0:00.0) */ - iowrite32(0 << 16 | NTB_CTRL_REQ_ID_EN, - &sndev->mmio_self_ctrl->req_id_table[0]); + req_ids[0] = 0; /* * Host Bridge Requester ID (as read from the mmap address) */ - iowrite32(req_id << 16 | NTB_CTRL_REQ_ID_EN, - &sndev->mmio_self_ctrl->req_id_table[1]); - - rc = switchtec_ntb_part_op(sndev, sndev->mmio_self_ctrl, - NTB_CTRL_PART_OP_CFG, - NTB_CTRL_PART_STATUS_NORMAL); - if (rc == -EIO) { - error = ioread32(&sndev->mmio_self_ctrl->req_id_error); - dev_err(&sndev->stdev->dev, - "Error setting up the requester ID table: %08x\n", - error); - } + req_ids[1] = ioread16(&sndev->mmio_ntb->requester_id); - return rc; + return config_req_id_table(sndev, sndev->mmio_self_ctrl, req_ids, + ARRAY_SIZE(req_ids)); } static void switchtec_ntb_init_shared(struct switchtec_ntb *sndev) -- cgit From 01752501820277d217a7b52548d9c948f98d2c56 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:29 -0700 Subject: ntb_hw_switchtec: Add initialization code for crosslink Crosslink is a feature of the Switchtec switches that is similar to the B2B mode of other NTB devices. It allows a system to be designed that is perfectly symmetric with two identical switches that link two hosts together. In order for the system to be symmetric, there is an empty host-less partition between the two switches which the host must enumerate and assign BAR addresses to. The firmware in the switch manages this specially so that the BAR addresses on both sides of the empty partition will be identical despite being in the same partition with the same address space. The driver determines whether crosslink is enabled by a flag set in the NTB partition info registers which are set by the switch's configuration file. When crosslink is enabled, a reserved LUT window is setup to point to the peer's switch's NTB registers and the local MWs are set to forward to the host-less partition's BARs. (Yes, this hurts my brain too.) Once this is setup, largely the same NTB infrastructure is used to communicate between the two hosts. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 202 ++++++++++++++++++++++++++++++++- 1 file changed, 197 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 4adc32fe035a..17db0f50bb22 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -95,6 +95,8 @@ struct switchtec_ntb { struct ntb_ctrl_regs __iomem *mmio_peer_ctrl; struct ntb_dbmsg_regs __iomem *mmio_self_dbmsg; + void __iomem *mmio_xlink_win; + struct shared_mw *self_shared; struct shared_mw __iomem *peer_shared; dma_addr_t self_shared_dma; @@ -465,6 +467,13 @@ static void switchtec_ntb_set_link_speed(struct switchtec_ntb *sndev) sndev->link_width = min(self_width, peer_width); } +static int crosslink_is_enabled(struct switchtec_ntb *sndev) +{ + struct ntb_info_regs __iomem *inf = sndev->mmio_ntb; + + return ioread8(&inf->ntp_info[sndev->peer_partition].xlink_enabled); +} + enum { LINK_MESSAGE = 0, MSG_LINK_UP = 1, @@ -849,8 +858,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) static int config_rsvd_lut_win(struct switchtec_ntb *sndev, struct ntb_ctrl_regs __iomem *ctl, - int lut_idx, int partition, - dma_addr_t addr) + int lut_idx, int partition, u64 addr) { int peer_bar = sndev->peer_direct_mw_to_bar[0]; u32 ctl_val; @@ -936,6 +944,182 @@ static int config_req_id_table(struct switchtec_ntb *sndev, return 0; } +static int crosslink_setup_mws(struct switchtec_ntb *sndev, int ntb_lut_idx, + u64 *mw_addrs, int mw_count) +{ + int rc, i; + struct ntb_ctrl_regs __iomem *ctl = sndev->mmio_self_ctrl; + u64 addr; + size_t size, offset; + int bar; + int xlate_pos; + u32 ctl_val; + + rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK, + NTB_CTRL_PART_STATUS_LOCKED); + if (rc) + return rc; + + for (i = 0; i < sndev->nr_lut_mw; i++) { + if (i == ntb_lut_idx) + continue; + + addr = mw_addrs[0] + LUT_SIZE * i; + + iowrite64((NTB_CTRL_LUT_EN | (sndev->peer_partition << 1) | + addr), + &ctl->lut_entry[i]); + } + + sndev->nr_direct_mw = min_t(int, sndev->nr_direct_mw, mw_count); + + for (i = 0; i < sndev->nr_direct_mw; i++) { + bar = sndev->direct_mw_to_bar[i]; + offset = (i == 0) ? LUT_SIZE * sndev->nr_lut_mw : 0; + addr = mw_addrs[i] + offset; + size = pci_resource_len(sndev->ntb.pdev, bar) - offset; + xlate_pos = ilog2(size); + + if (offset && size > offset) + size = offset; + + ctl_val = ioread32(&ctl->bar_entry[bar].ctl); + ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN; + + iowrite32(ctl_val, &ctl->bar_entry[bar].ctl); + iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size); + iowrite64(sndev->peer_partition | addr, + &ctl->bar_entry[bar].xlate_addr); + } + + rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_CFG, + NTB_CTRL_PART_STATUS_NORMAL); + if (rc) { + u32 bar_error, lut_error; + + bar_error = ioread32(&ctl->bar_error); + lut_error = ioread32(&ctl->lut_error); + dev_err(&sndev->stdev->dev, + "Error setting up cross link windows: %08x / %08x\n", + bar_error, lut_error); + return rc; + } + + return 0; +} + +static int crosslink_setup_req_ids(struct switchtec_ntb *sndev, + struct ntb_ctrl_regs __iomem *mmio_ctrl) +{ + int req_ids[16]; + int i; + u32 proxy_id; + + for (i = 0; i < ARRAY_SIZE(req_ids); i++) { + proxy_id = ioread32(&sndev->mmio_self_ctrl->req_id_table[i]); + + if (!(proxy_id & NTB_CTRL_REQ_ID_EN)) + break; + + req_ids[i] = ((proxy_id >> 1) & 0xFF); + } + + return config_req_id_table(sndev, mmio_ctrl, req_ids, i); +} + +/* + * In crosslink configuration there is a virtual partition in the + * middle of the two switches. The BARs in this partition have to be + * enumerated and assigned addresses. + */ +static int crosslink_enum_partition(struct switchtec_ntb *sndev, + u64 *bar_addrs) +{ + struct part_cfg_regs __iomem *part_cfg = + &sndev->stdev->mmio_part_cfg_all[sndev->peer_partition]; + u32 pff = ioread32(&part_cfg->vep_pff_inst_id); + struct pff_csr_regs __iomem *mmio_pff = + &sndev->stdev->mmio_pff_csr[pff]; + const u64 bar_space = 0x1000000000LL; + u64 bar_addr; + int bar_cnt = 0; + int i; + + iowrite16(0x6, &mmio_pff->pcicmd); + + for (i = 0; i < ARRAY_SIZE(mmio_pff->pci_bar64); i++) { + iowrite64(bar_space * i, &mmio_pff->pci_bar64[i]); + bar_addr = ioread64(&mmio_pff->pci_bar64[i]); + bar_addr &= ~0xf; + + dev_dbg(&sndev->stdev->dev, + "Crosslink BAR%d addr: %llx\n", + i, bar_addr); + + if (bar_addr != bar_space * i) + continue; + + bar_addrs[bar_cnt++] = bar_addr; + } + + return bar_cnt; +} + +static int switchtec_ntb_init_crosslink(struct switchtec_ntb *sndev) +{ + int rc; + int bar = sndev->direct_mw_to_bar[0]; + const int ntb_lut_idx = 1; + u64 bar_addrs[6]; + u64 addr; + int bar_cnt; + + if (!crosslink_is_enabled(sndev)) + return 0; + + dev_info(&sndev->stdev->dev, "Using crosslink configuration\n"); + sndev->ntb.topo = NTB_TOPO_CROSSLINK; + + bar_cnt = crosslink_enum_partition(sndev, bar_addrs); + if (bar_cnt < sndev->nr_direct_mw + 1) { + dev_err(&sndev->stdev->dev, + "Error enumerating crosslink partition\n"); + return -EINVAL; + } + + addr = bar_addrs[0]; + rc = config_rsvd_lut_win(sndev, sndev->mmio_self_ctrl, ntb_lut_idx, + sndev->peer_partition, addr); + if (rc) + return rc; + + rc = crosslink_setup_mws(sndev, ntb_lut_idx, &bar_addrs[1], + bar_cnt - 1); + if (rc) + return rc; + + rc = crosslink_setup_req_ids(sndev, sndev->mmio_peer_ctrl); + if (rc) + return rc; + + sndev->mmio_xlink_win = pci_iomap_range(sndev->stdev->pdev, bar, + LUT_SIZE, LUT_SIZE); + if (!sndev->mmio_xlink_win) { + rc = -ENOMEM; + return rc; + } + + sndev->nr_rsvd_luts++; + + return 0; +} + +static void switchtec_ntb_deinit_crosslink(struct switchtec_ntb *sndev) +{ + if (sndev->mmio_xlink_win) + pci_iounmap(sndev->stdev->pdev, sndev->mmio_xlink_win); +} + static int map_bars(int *map, struct ntb_ctrl_regs __iomem *ctrl) { int i; @@ -1222,17 +1406,22 @@ static int switchtec_ntb_add(struct device *dev, goto free_and_exit; switchtec_ntb_init_mw(sndev); - switchtec_ntb_init_db(sndev); - switchtec_ntb_init_msgs(sndev); rc = switchtec_ntb_init_req_id_table(sndev); if (rc) goto free_and_exit; - rc = switchtec_ntb_init_shared_mw(sndev); + rc = switchtec_ntb_init_crosslink(sndev); if (rc) goto free_and_exit; + switchtec_ntb_init_db(sndev); + switchtec_ntb_init_msgs(sndev); + + rc = switchtec_ntb_init_shared_mw(sndev); + if (rc) + goto deinit_crosslink; + rc = switchtec_ntb_init_db_msg_irq(sndev); if (rc) goto deinit_shared_and_exit; @@ -1251,6 +1440,8 @@ deinit_and_exit: switchtec_ntb_deinit_db_msg_irq(sndev); deinit_shared_and_exit: switchtec_ntb_deinit_shared_mw(sndev); +deinit_crosslink: + switchtec_ntb_deinit_crosslink(sndev); free_and_exit: kfree(sndev); dev_err(dev, "failed to register ntb device: %d\n", rc); @@ -1271,6 +1462,7 @@ void switchtec_ntb_remove(struct device *dev, ntb_unregister_device(&sndev->ntb); switchtec_ntb_deinit_db_msg_irq(sndev); switchtec_ntb_deinit_shared_mw(sndev); + switchtec_ntb_deinit_crosslink(sndev); kfree(sndev); dev_info(dev, "ntb device unregistered\n"); } -- cgit From 270d32e63c70c808a91449da24324e0009827c5f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 29 Nov 2017 10:55:30 -0700 Subject: ntb_hw_switchtec: Crosslink doorbells and messages In a crosslink configuration doorbells and messages largely work the same but the NTB registers must be accessed through the reserved LUT window. Also, as a bonus, seeing there are now two independent sets of NTB links, both partitions can actually use all 60 doorbell registers instead of them having to be split into two for each partition. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 65 ++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 17db0f50bb22..145b31209f20 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -94,6 +94,7 @@ struct switchtec_ntb { struct ntb_ctrl_regs __iomem *mmio_self_ctrl; struct ntb_ctrl_regs __iomem *mmio_peer_ctrl; struct ntb_dbmsg_regs __iomem *mmio_self_dbmsg; + struct ntb_dbmsg_regs __iomem *mmio_peer_dbmsg; void __iomem *mmio_xlink_win; @@ -188,10 +189,10 @@ static int switchtec_ntb_part_op(struct switchtec_ntb *sndev, static int switchtec_ntb_send_msg(struct switchtec_ntb *sndev, int idx, u32 val) { - if (idx < 0 || idx >= ARRAY_SIZE(sndev->mmio_self_dbmsg->omsg)) + if (idx < 0 || idx >= ARRAY_SIZE(sndev->mmio_peer_dbmsg->omsg)) return -EINVAL; - iowrite32(val, &sndev->mmio_self_dbmsg->omsg[idx].msg); + iowrite32(val, &sndev->mmio_peer_dbmsg->omsg[idx].msg); return 0; } @@ -474,6 +475,25 @@ static int crosslink_is_enabled(struct switchtec_ntb *sndev) return ioread8(&inf->ntp_info[sndev->peer_partition].xlink_enabled); } +static void crosslink_init_dbmsgs(struct switchtec_ntb *sndev) +{ + int i; + u32 msg_map = 0; + + if (!crosslink_is_enabled(sndev)) + return; + + for (i = 0; i < ARRAY_SIZE(sndev->mmio_peer_dbmsg->imsg); i++) { + int m = i | sndev->self_partition << 2; + + msg_map |= m << i * 8; + } + + iowrite32(msg_map, &sndev->mmio_peer_dbmsg->msg_map); + iowrite64(sndev->db_valid_mask << sndev->db_peer_shift, + &sndev->mmio_peer_dbmsg->odb_mask); +} + enum { LINK_MESSAGE = 0, MSG_LINK_UP = 1, @@ -504,6 +524,9 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev) ntb_link_event(&sndev->ntb); dev_info(&sndev->stdev->dev, "ntb link %s\n", link_sta ? "up" : "down"); + + if (link_sta) + crosslink_init_dbmsgs(sndev); } } @@ -649,7 +672,7 @@ static int switchtec_ntb_peer_db_addr(struct ntb_dev *ntb, struct switchtec_ntb *sndev = ntb_sndev(ntb); unsigned long offset; - offset = (unsigned long)sndev->mmio_self_dbmsg->odb - + offset = (unsigned long)sndev->mmio_peer_dbmsg->odb - (unsigned long)sndev->stdev->mmio; offset += sndev->db_shift / 8; @@ -667,7 +690,7 @@ static int switchtec_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) struct switchtec_ntb *sndev = ntb_sndev(ntb); iowrite64(db_bits << sndev->db_peer_shift, - &sndev->mmio_self_dbmsg->odb); + &sndev->mmio_peer_dbmsg->odb); return 0; } @@ -852,6 +875,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) sndev->mmio_self_ctrl = &sndev->mmio_ctrl[sndev->self_partition]; sndev->mmio_peer_ctrl = &sndev->mmio_ctrl[sndev->peer_partition]; sndev->mmio_self_dbmsg = &sndev->mmio_dbmsg[sndev->self_partition]; + sndev->mmio_peer_dbmsg = sndev->mmio_self_dbmsg; return 0; } @@ -1072,6 +1096,7 @@ static int switchtec_ntb_init_crosslink(struct switchtec_ntb *sndev) const int ntb_lut_idx = 1; u64 bar_addrs[6]; u64 addr; + int offset; int bar_cnt; if (!crosslink_is_enabled(sndev)) @@ -1087,7 +1112,13 @@ static int switchtec_ntb_init_crosslink(struct switchtec_ntb *sndev) return -EINVAL; } - addr = bar_addrs[0]; + addr = (bar_addrs[0] + SWITCHTEC_GAS_NTB_OFFSET + + SWITCHTEC_NTB_REG_DBMSG_OFFSET + + sizeof(struct ntb_dbmsg_regs) * sndev->peer_partition); + + offset = addr & (LUT_SIZE - 1); + addr -= offset; + rc = config_rsvd_lut_win(sndev, sndev->mmio_self_ctrl, ntb_lut_idx, sndev->peer_partition, addr); if (rc) @@ -1109,8 +1140,11 @@ static int switchtec_ntb_init_crosslink(struct switchtec_ntb *sndev) return rc; } + sndev->mmio_peer_dbmsg = sndev->mmio_xlink_win + offset; sndev->nr_rsvd_luts++; + crosslink_init_dbmsgs(sndev); + return 0; } @@ -1163,24 +1197,35 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb *sndev) * shared among all partitions. So we must split them in half * (32 for each partition). However, the message interrupts are * also shared with the top 4 doorbells so we just limit this to - * 28 doorbells per partition + * 28 doorbells per partition. + * + * In crosslink mode, each side has it's own dbmsg register so + * they can each use all 60 of the available doorbells. */ static void switchtec_ntb_init_db(struct switchtec_ntb *sndev) { - sndev->db_valid_mask = 0x0FFFFFFF; + sndev->db_mask = 0x0FFFFFFFFFFFFFFFULL; - if (sndev->self_partition < sndev->peer_partition) { + if (sndev->mmio_peer_dbmsg != sndev->mmio_self_dbmsg) { + sndev->db_shift = 0; + sndev->db_peer_shift = 0; + sndev->db_valid_mask = sndev->db_mask; + } else if (sndev->self_partition < sndev->peer_partition) { sndev->db_shift = 0; sndev->db_peer_shift = 32; + sndev->db_valid_mask = 0x0FFFFFFF; } else { sndev->db_shift = 32; sndev->db_peer_shift = 0; + sndev->db_valid_mask = 0x0FFFFFFF; } - sndev->db_mask = 0x0FFFFFFFFFFFFFFFULL; iowrite64(~sndev->db_mask, &sndev->mmio_self_dbmsg->idb_mask); iowrite64(sndev->db_valid_mask << sndev->db_peer_shift, - &sndev->mmio_self_dbmsg->odb_mask); + &sndev->mmio_peer_dbmsg->odb_mask); + + dev_dbg(&sndev->stdev->dev, "dbs: shift %d/%d, mask %016llx\n", + sndev->db_shift, sndev->db_peer_shift, sndev->db_valid_mask); } static void switchtec_ntb_init_msgs(struct switchtec_ntb *sndev) -- cgit From d04be142b8b61ffb3c9cc5c6d1abda8fc59a16c9 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 4 Dec 2017 10:57:21 -0700 Subject: ntb_hw_switchtec: Force down the link before initializing If one host crashes and soft reboots, the other host may not see a link down event. Then when the crashed host comes back up, the surviving host may not know the link was reset and the NTB clients may not work without being reset. To solve this, we send a LINK_FORCE_DOWN message to each peer every time we come up, before we register the NTB device. If a surviving host still thinks the link is up it will take it down immediately. In this way, once the crashed host comes up fully, it will send a regular link up event as per usual and the link will be properly restarted. While we are in the area, this also fixes the MSG_LINK_UP message that was in the link down function that was reported by Doug Meyers. Signed-off-by: Logan Gunthorpe Reported-by: ThanhTuThai Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 57 +++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 145b31209f20..bcd5b6fb3800 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -122,6 +122,7 @@ struct switchtec_ntb { bool link_is_up; enum ntb_speed link_speed; enum ntb_width link_width; + struct work_struct link_reinit_work; }; static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb) @@ -494,18 +495,43 @@ static void crosslink_init_dbmsgs(struct switchtec_ntb *sndev) &sndev->mmio_peer_dbmsg->odb_mask); } -enum { +enum switchtec_msg { LINK_MESSAGE = 0, MSG_LINK_UP = 1, MSG_LINK_DOWN = 2, MSG_CHECK_LINK = 3, + MSG_LINK_FORCE_DOWN = 4, }; -static void switchtec_ntb_check_link(struct switchtec_ntb *sndev) +static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev); + +static void link_reinit_work(struct work_struct *work) +{ + struct switchtec_ntb *sndev; + + sndev = container_of(work, struct switchtec_ntb, link_reinit_work); + + switchtec_ntb_reinit_peer(sndev); +} + +static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, + enum switchtec_msg msg) { int link_sta; int old = sndev->link_is_up; + if (msg == MSG_LINK_FORCE_DOWN) { + schedule_work(&sndev->link_reinit_work); + + if (sndev->link_is_up) { + sndev->link_is_up = 0; + ntb_link_event(&sndev->ntb); + dev_info(&sndev->stdev->dev, "ntb link forced down\n"); + } + + return; + } + link_sta = sndev->self_shared->link_sta; if (link_sta) { u64 peer = ioread64(&sndev->peer_shared->magic); @@ -534,7 +560,7 @@ static void switchtec_ntb_link_notification(struct switchtec_dev *stdev) { struct switchtec_ntb *sndev = stdev->sndev; - switchtec_ntb_check_link(sndev); + switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); } static u64 switchtec_ntb_link_is_up(struct ntb_dev *ntb, @@ -562,7 +588,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb, sndev->self_shared->link_sta = 1; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP); - switchtec_ntb_check_link(sndev); + switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); return 0; } @@ -574,9 +600,9 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb) dev_dbg(&sndev->stdev->dev, "disabling link\n"); sndev->self_shared->link_sta = 0; - switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP); + switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN); - switchtec_ntb_check_link(sndev); + switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); return 0; } @@ -822,6 +848,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) sndev->ntb.topo = NTB_TOPO_SWITCH; sndev->ntb.ops = &switchtec_ntb_ops; + INIT_WORK(&sndev->link_reinit_work, link_reinit_work); + sndev->self_partition = sndev->stdev->partition; sndev->mmio_ntb = sndev->stdev->mmio_ntb; @@ -1368,7 +1396,7 @@ static irqreturn_t switchtec_ntb_message_isr(int irq, void *dev) iowrite8(1, &sndev->mmio_self_dbmsg->imsg[i].status); if (i == LINK_MESSAGE) - switchtec_ntb_check_link(sndev); + switchtec_ntb_check_link(sndev, msg); } } @@ -1429,6 +1457,14 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev) free_irq(sndev->message_irq, sndev); } +static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev) +{ + dev_info(&sndev->stdev->dev, "peer reinitialized\n"); + switchtec_ntb_deinit_shared_mw(sndev); + switchtec_ntb_init_mw(sndev); + return switchtec_ntb_init_shared_mw(sndev); +} + static int switchtec_ntb_add(struct device *dev, struct class_interface *class_intf) { @@ -1471,6 +1507,13 @@ static int switchtec_ntb_add(struct device *dev, if (rc) goto deinit_shared_and_exit; + /* + * If this host crashed, the other host may think the link is + * still up. Tell them to force it down (it will go back up + * once we register the ntb device). + */ + switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_FORCE_DOWN); + rc = ntb_register_device(&sndev->ntb); if (rc) goto deinit_and_exit; -- cgit From cbd27448faff4843ac4b66cc71445a10623ff48d Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 18 Dec 2017 11:25:05 -0700 Subject: ntb_transport: Fix bug with max_mw_size parameter When using the max_mw_size parameter of ntb_transport to limit the size of the Memory windows, communication cannot be established and the queues freeze. This is because the mw_size that's reported to the peer is correctly limited but the size used locally is not. So the MW is initialized with a buffer smaller than the window but the TX side is using the full window. This means the TX side will be writing to a region of the window that points nowhere. This is easily fixed by applying the same limit to tx_size in ntb_transport_init_queue(). Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Cc: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 045e3dd4750e..9878c48826e3 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1003,6 +1003,9 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, mw_base = nt->mw_vec[mw_num].phys_addr; mw_size = nt->mw_vec[mw_num].phys_size; + if (max_mw_size && mw_size > max_mw_size) + mw_size = max_mw_size; + tx_size = (unsigned int)mw_size / num_qps_mw; qp_offset = tx_size * (qp_num / mw_count); -- cgit From 1e2fd202f8593985cdadca32e0c322f98e7fe7cb Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 18 Dec 2017 11:25:06 -0700 Subject: ntb_hw_switchtec: Check for alignment of the buffer in mw_set_trans() With Switchtec hardware, the buffer used for a memory window must be aligned to its size (the hardware only replaces the lower bits). In certain circumstances dma_alloc_coherent() will not provide a buffer that adheres to this requirement like when using the CMA and CONFIG_CMA_ALIGNMENT is set lower than the buffer size. When we get an unaligned buffer mw_set_trans() should return an error. We also log an error so we know the cause of the problem. Signed-off-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index bcd5b6fb3800..6c6f991999b5 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -320,6 +320,19 @@ static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx, if (xlate_pos < 12) return -EINVAL; + if (!IS_ALIGNED(addr, BIT_ULL(xlate_pos))) { + /* + * In certain circumstances we can get a buffer that is + * not aligned to its size. (Most of the time + * dma_alloc_coherent ensures this). This can happen when + * using large buffers allocated by the CMA + * (see CMA_CONFIG_ALIGNMENT) + */ + dev_err(&sndev->stdev->dev, + "ERROR: Memory window address is not aligned to it's size!\n"); + return -EINVAL; + } + rc = switchtec_ntb_part_op(sndev, ctl, NTB_CTRL_PART_OP_LOCK, NTB_CTRL_PART_STATUS_LOCKED); if (rc) -- cgit From c6fad21a8d03167a47fc376a64df785d8f6e7385 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jan 2018 14:50:51 +0100 Subject: ntb_hw_switchtec: fix logic error Newer gcc (version 7 and 8 presumably) warn about a statement mixing the << operator with logical and: drivers/ntb/hw/mscc/ntb_hw_switchtec.c: In function 'switchtec_ntb_init_sndev': drivers/ntb/hw/mscc/ntb_hw_switchtec.c:888:24: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context] My interpretation here is that the author must have intended a bitmask rather than a comparison, so I'm changing the '&&' to '&', which makes a lot more sense in the context. Fixes: 1b249475275d ("ntb_hw_switchtec: Allow using Switchtec NTB in multi-partition setups") Reviewed-by: Logan Gunthorpe Signed-off-by: Arnd Bergmann Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index 6c6f991999b5..a1d547b6aa12 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -898,7 +898,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) } sndev->peer_partition = ffs(tpart_vec) - 1; - if (!(part_map && (1 << sndev->peer_partition))) { + if (!(part_map & (1 << sndev->peer_partition))) { dev_err(&sndev->stdev->dev, "ntb target partition is not NT partition\n"); return -ENODEV; -- cgit From b87ab21935d76922362ff98a5a78f16e2e956ead Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:52 +0300 Subject: NTB: Rename NTB messaging API methods There is a common methods signature form used over all the NTB API like functions naming scheme, arguments names and order, etc. Recently added NTB messaging API IO callbacks were named a bit different so should be renamed to be in compliance with the rest of the API. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/hw/idt/ntb_hw_idt.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 0cd79f367f7c..24040317bb9c 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -1744,20 +1744,19 @@ static int idt_ntb_msg_clear_mask(struct ntb_dev *ntb, u64 mask_bits) * idt_ntb_msg_read() - read message register with specified index * (NTB API callback) * @ntb: NTB device context. - * @midx: Message register index * @pidx: OUT - Port index of peer device a message retrieved from - * @msg: OUT - Data + * @midx: Message register index * * Read data from the specified message register and source register. * - * Return: zero on success, negative error if invalid argument passed. + * Return: inbound message register value. */ -static int idt_ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg) +static u32 idt_ntb_msg_read(struct ntb_dev *ntb, int *pidx, int midx) { struct idt_ntb_dev *ndev = to_ndev_ntb(ntb); if (midx < 0 || IDT_MSG_CNT <= midx) - return -EINVAL; + return ~(u32)0; /* Retrieve source port index of the message */ if (pidx != NULL) { @@ -1772,18 +1771,15 @@ static int idt_ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg) } /* Retrieve data of the corresponding message register */ - if (msg != NULL) - *msg = idt_nt_read(ndev, ntdata_tbl.msgs[midx].in); - - return 0; + return idt_nt_read(ndev, ntdata_tbl.msgs[midx].in); } /* - * idt_ntb_msg_write() - write data to the specified message register - * (NTB API callback) + * idt_ntb_peer_msg_write() - write data to the specified message register + * (NTB API callback) * @ntb: NTB device context. - * @midx: Message register index * @pidx: Port index of peer device a message being sent to + * @midx: Message register index * @msg: Data to send * * Just try to send data to a peer. Message status register should be @@ -1791,7 +1787,8 @@ static int idt_ntb_msg_read(struct ntb_dev *ntb, int midx, int *pidx, u32 *msg) * * Return: zero on success, negative error if invalid argument passed. */ -static int idt_ntb_msg_write(struct ntb_dev *ntb, int midx, int pidx, u32 msg) +static int idt_ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx, + u32 msg) { struct idt_ntb_dev *ndev = to_ndev_ntb(ntb); unsigned long irqflags; @@ -2058,7 +2055,7 @@ static const struct ntb_dev_ops idt_ntb_ops = { .msg_set_mask = idt_ntb_msg_set_mask, .msg_clear_mask = idt_ntb_msg_clear_mask, .msg_read = idt_ntb_msg_read, - .msg_write = idt_ntb_msg_write + .peer_msg_write = idt_ntb_peer_msg_write }; /* @@ -2269,7 +2266,7 @@ static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf, "Message data:\n"); for (idx = 0; idx < IDT_MSG_CNT; idx++) { int src; - (void)idt_ntb_msg_read(&ndev->ntb, idx, &src, &data); + data = idt_ntb_msg_read(&ndev->ntb, &src, idx); off += scnprintf(strbuf + off, size - off, "\t%hhu. 0x%08x from peer %hhu (Port %hhu)\n", idx, data, src, ndev->peers[src].port); -- cgit From 417cf39cfea9c680aa7c278c8d8a0ca879cacf0a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:53 +0300 Subject: NTB: Set dma mask and dma coherent mask to NTB devices The dma_mask and dma_coherent_mask fields of the NTB struct device weren't initialized in hardware drivers. In fact it should be done instead of PCIe interface usage, since NTB clients are supposed to use NTB API and left unaware of real hardware implementation. In addition to that ntb_device_register() method shouldn't clear the passed ntb_dev structure, since it dma_mask is initialized by hardware drivers. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/hw/amd/ntb_hw_amd.c | 4 ++++ drivers/ntb/hw/idt/ntb_hw_idt.c | 8 +++++++- drivers/ntb/hw/intel/ntb_hw_intel.c | 4 ++++ drivers/ntb/ntb.c | 1 - 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index f0788aae05c9..3cfa46876239 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1020,6 +1020,10 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev, goto err_dma_mask; dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n"); } + rc = dma_coerce_mask_and_coherent(&ndev->ntb.dev, + dma_get_mask(&pdev->dev)); + if (rc) + goto err_dma_mask; ndev->self_mmio = pci_iomap(pdev, 0, 0); if (!ndev->self_mmio) { diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 24040317bb9c..93d4c9d2a9ad 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2426,7 +2426,7 @@ static int idt_init_pci(struct idt_ntb_dev *ndev) struct pci_dev *pdev = ndev->ntb.pdev; int ret; - /* Initialize the bit mask of DMA */ + /* Initialize the bit mask of PCI/NTB DMA */ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (ret != 0) { ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); @@ -2447,6 +2447,12 @@ static int idt_init_pci(struct idt_ntb_dev *ndev) dev_warn(&pdev->dev, "Cannot set consistent DMA highmem bit mask\n"); } + ret = dma_coerce_mask_and_coherent(&ndev->ntb.dev, + dma_get_mask(&pdev->dev)); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to set NTB device DMA bit mask\n"); + return ret; + } /* * Enable the device advanced error reporting. It's not critical to diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 341a3d5baa3f..156b45cd4a19 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -2334,6 +2334,10 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) goto err_dma_mask; dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n"); } + rc = dma_coerce_mask_and_coherent(&ndev->ntb.dev, + dma_get_mask(&pdev->dev)); + if (rc) + goto err_dma_mask; ndev->self_mmio = pci_iomap(pdev, 0, 0); if (!ndev->self_mmio) { diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c index bdcd59b13c1f..2581ab724c34 100644 --- a/drivers/ntb/ntb.c +++ b/drivers/ntb/ntb.c @@ -111,7 +111,6 @@ int ntb_register_device(struct ntb_dev *ntb) init_completion(&ntb->released); - memset(&ntb->dev, 0, sizeof(ntb->dev)); ntb->dev.bus = &ntb_bus; ntb->dev.parent = &ntb->pdev->dev; ntb->dev.release = ntb_dev_release; -- cgit From c7aeb0afdcc2d1ec5945e164d3fb97c5ae3edd1a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:55 +0300 Subject: NTB: ntb_pp: Add full multi-port NTB API support Current Ping Pong driver can't truly work with multi-port devices. Additionally it requires the Scratchpad registers being available on NTB device. This patches rewrites the driver so one would perform the cyclic Ping-Pong algorithm around all the available NTB peers and makes it working with NTB hardware, which doesn't support Scratchpads, but such alternative as NTB Message register. Additional cleanups are also added here. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 447 +++++++++++++++++++++++++--------------- 1 file changed, 282 insertions(+), 165 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index e700873e03fb..65865e460ab8 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -1,10 +1,11 @@ /* - * This file is provided under a dual BSD/GPLv2 license. When using or + * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2017 T-Platforms. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,6 +19,7 @@ * BSD LICENSE * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2017 T-Platforms. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -46,36 +48,45 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * PCIe NTB Pingpong Linux driver - * - * Contact Information: - * Allen Hubbe */ -/* Note: load this module with option 'dyndbg=+p' */ +/* + * How to use this tool, by example. + * + * Assuming $DBG_DIR is something like: + * '/sys/kernel/debug/ntb_perf/0000:00:03.0' + * Suppose aside from local device there is at least one remote device + * connected to NTB with index 0. + *----------------------------------------------------------------------------- + * Eg: install driver with specified delay between doorbell event and response + * + * root@self# insmod ntb_pingpong.ko delay_ms=1000 + *----------------------------------------------------------------------------- + * Eg: get number of ping-pong cycles performed + * + * root@self# cat $DBG_DIR/count + */ #include #include #include +#include +#include -#include #include #include -#include +#include #include #include -#define DRIVER_NAME "ntb_pingpong" -#define DRIVER_DESCRIPTION "PCIe NTB Simple Pingpong Client" - -#define DRIVER_VERSION "1.0" -#define DRIVER_RELDATE "24 March 2015" -#define DRIVER_AUTHOR "Allen Hubbe " +#define DRIVER_NAME "ntb_pingpong" +#define DRIVER_VERSION "2.0" MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +MODULE_AUTHOR("Allen Hubbe "); +MODULE_DESCRIPTION("PCIe NTB Simple Pingpong Client"); static unsigned int unsafe; module_param(unsafe, uint, 0644); @@ -85,237 +96,343 @@ static unsigned int delay_ms = 1000; module_param(delay_ms, uint, 0644); MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer"); -static unsigned long db_init = 0x7; -module_param(db_init, ulong, 0644); -MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer"); - -/* Only two-ports NTB devices are supported */ -#define PIDX NTB_DEF_PEER_IDX - struct pp_ctx { - struct ntb_dev *ntb; - u64 db_bits; - /* synchronize access to db_bits by ping and pong */ - spinlock_t db_lock; - struct timer_list db_timer; - unsigned long db_delay; - struct dentry *debugfs_node_dir; - struct dentry *debugfs_count; - atomic_t count; + struct ntb_dev *ntb; + struct hrtimer timer; + u64 in_db; + u64 out_db; + int out_pidx; + u64 nmask; + u64 pmask; + atomic_t count; + spinlock_t lock; + struct dentry *dbgfs_dir; }; +#define to_pp_timer(__timer) \ + container_of(__timer, struct pp_ctx, timer) -static struct dentry *pp_debugfs_dir; +static struct dentry *pp_dbgfs_topdir; -static void pp_ping(struct timer_list *t) +static int pp_find_next_peer(struct pp_ctx *pp) { - struct pp_ctx *pp = from_timer(pp, t, db_timer); - unsigned long irqflags; - u64 db_bits, db_mask; - u32 spad_rd, spad_wr; + u64 link, out_db; + int pidx; + + link = ntb_link_is_up(pp->ntb, NULL, NULL); + + /* Find next available peer */ + if (link & pp->nmask) { + pidx = __ffs64(link & pp->nmask); + out_db = BIT_ULL(pidx + 1); + } else if (link & pp->pmask) { + pidx = __ffs64(link & pp->pmask); + out_db = BIT_ULL(pidx); + } else { + return -ENODEV; + } - spin_lock_irqsave(&pp->db_lock, irqflags); - { - db_mask = ntb_db_valid_mask(pp->ntb); - db_bits = ntb_db_read(pp->ntb); + spin_lock(&pp->lock); + pp->out_pidx = pidx; + pp->out_db = out_db; + spin_unlock(&pp->lock); - if (db_bits) { - dev_dbg(&pp->ntb->dev, - "Masked pongs %#llx\n", - db_bits); - ntb_db_clear(pp->ntb, db_bits); - } + return 0; +} - db_bits = ((pp->db_bits | db_bits) << 1) & db_mask; +static void pp_setup(struct pp_ctx *pp) +{ + int ret; - if (!db_bits) - db_bits = db_init; + ntb_db_set_mask(pp->ntb, pp->in_db); - spad_rd = ntb_spad_read(pp->ntb, 0); - spad_wr = spad_rd + 1; + hrtimer_cancel(&pp->timer); - dev_dbg(&pp->ntb->dev, - "Ping bits %#llx read %#x write %#x\n", - db_bits, spad_rd, spad_wr); + ret = pp_find_next_peer(pp); + if (ret == -ENODEV) { + dev_dbg(&pp->ntb->dev, "Got no peers, so cancel\n"); + return; + } - ntb_peer_spad_write(pp->ntb, PIDX, 0, spad_wr); - ntb_peer_db_set(pp->ntb, db_bits); - ntb_db_clear_mask(pp->ntb, db_mask); + dev_dbg(&pp->ntb->dev, "Ping-pong started with port %d, db %#llx\n", + ntb_peer_port_number(pp->ntb, pp->out_pidx), pp->out_db); - pp->db_bits = 0; - } - spin_unlock_irqrestore(&pp->db_lock, irqflags); + hrtimer_start(&pp->timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL); } -static void pp_link_event(void *ctx) +static void pp_clear(struct pp_ctx *pp) { - struct pp_ctx *pp = ctx; + hrtimer_cancel(&pp->timer); - if (ntb_link_is_up(pp->ntb, NULL, NULL) == 1) { - dev_dbg(&pp->ntb->dev, "link is up\n"); - pp_ping(&pp->db_timer); - } else { - dev_dbg(&pp->ntb->dev, "link is down\n"); - del_timer(&pp->db_timer); - } + ntb_db_set_mask(pp->ntb, pp->in_db); + + dev_dbg(&pp->ntb->dev, "Ping-pong cancelled\n"); } -static void pp_db_event(void *ctx, int vec) +static void pp_ping(struct pp_ctx *pp) { - struct pp_ctx *pp = ctx; - u64 db_bits, db_mask; - unsigned long irqflags; + u32 count; - spin_lock_irqsave(&pp->db_lock, irqflags); - { - db_mask = ntb_db_vector_mask(pp->ntb, vec); - db_bits = db_mask & ntb_db_read(pp->ntb); - ntb_db_set_mask(pp->ntb, db_mask); - ntb_db_clear(pp->ntb, db_bits); + count = atomic_read(&pp->count); - pp->db_bits |= db_bits; + spin_lock(&pp->lock); + ntb_peer_spad_write(pp->ntb, pp->out_pidx, 0, count); + ntb_peer_msg_write(pp->ntb, pp->out_pidx, 0, count); - mod_timer(&pp->db_timer, jiffies + pp->db_delay); + dev_dbg(&pp->ntb->dev, "Ping port %d spad %#x, msg %#x\n", + ntb_peer_port_number(pp->ntb, pp->out_pidx), count, count); - dev_dbg(&pp->ntb->dev, - "Pong vec %d bits %#llx\n", - vec, db_bits); - atomic_inc(&pp->count); - } - spin_unlock_irqrestore(&pp->db_lock, irqflags); + ntb_peer_db_set(pp->ntb, pp->out_db); + ntb_db_clear_mask(pp->ntb, pp->in_db); + spin_unlock(&pp->lock); } -static int pp_debugfs_setup(struct pp_ctx *pp) +static void pp_pong(struct pp_ctx *pp) { - struct pci_dev *pdev = pp->ntb->pdev; + u32 msg_data = -1, spad_data = -1; + int pidx = 0; - if (!pp_debugfs_dir) - return -ENODEV; + /* Read pong data */ + spad_data = ntb_spad_read(pp->ntb, 0); + msg_data = ntb_msg_read(pp->ntb, &pidx, 0); + ntb_msg_clear_sts(pp->ntb, -1); - pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev), - pp_debugfs_dir); - if (!pp->debugfs_node_dir) - return -ENODEV; + /* + * Scratchpad and message data may differ, since message register can't + * be rewritten unless status is cleared. Additionally either of them + * might be unsupported + */ + dev_dbg(&pp->ntb->dev, "Pong spad %#x, msg %#x (port %d)\n", + spad_data, msg_data, ntb_peer_port_number(pp->ntb, pidx)); - pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR, - pp->debugfs_node_dir, - &pp->count); - if (!pp->debugfs_count) - return -ENODEV; + atomic_inc(&pp->count); - return 0; + ntb_db_set_mask(pp->ntb, pp->in_db); + ntb_db_clear(pp->ntb, pp->in_db); + + hrtimer_start(&pp->timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL); +} + +static enum hrtimer_restart pp_timer_func(struct hrtimer *t) +{ + struct pp_ctx *pp = to_pp_timer(t); + + pp_ping(pp); + + return HRTIMER_NORESTART; +} + +static void pp_link_event(void *ctx) +{ + struct pp_ctx *pp = ctx; + + pp_setup(pp); +} + +static void pp_db_event(void *ctx, int vec) +{ + struct pp_ctx *pp = ctx; + + pp_pong(pp); } static const struct ntb_ctx_ops pp_ops = { .link_event = pp_link_event, - .db_event = pp_db_event, + .db_event = pp_db_event }; -static int pp_probe(struct ntb_client *client, - struct ntb_dev *ntb) +static int pp_check_ntb(struct ntb_dev *ntb) { - struct pp_ctx *pp; - int rc; + u64 pmask; if (ntb_db_is_unsafe(ntb)) { - dev_dbg(&ntb->dev, "doorbell is unsafe\n"); - if (!unsafe) { - rc = -EINVAL; - goto err_pp; - } - } - - if (ntb_spad_count(ntb) < 1) { - dev_dbg(&ntb->dev, "no enough scratchpads\n"); - rc = -EINVAL; - goto err_pp; + dev_dbg(&ntb->dev, "Doorbell is unsafe\n"); + if (!unsafe) + return -EINVAL; } if (ntb_spad_is_unsafe(ntb)) { - dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); - if (!unsafe) { - rc = -EINVAL; - goto err_pp; - } + dev_dbg(&ntb->dev, "Scratchpad is unsafe\n"); + if (!unsafe) + return -EINVAL; } - if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT) - dev_warn(&ntb->dev, "multi-port NTB is unsupported\n"); + pmask = GENMASK_ULL(ntb_peer_port_count(ntb), 0); + if ((ntb_db_valid_mask(ntb) & pmask) != pmask) { + dev_err(&ntb->dev, "Unsupported DB configuration\n"); + return -EINVAL; + } - pp = kmalloc(sizeof(*pp), GFP_KERNEL); - if (!pp) { - rc = -ENOMEM; - goto err_pp; + if (ntb_spad_count(ntb) < 1 && ntb_msg_count(ntb) < 1) { + dev_err(&ntb->dev, "Scratchpads and messages unsupported\n"); + return -EINVAL; + } else if (ntb_spad_count(ntb) < 1) { + dev_dbg(&ntb->dev, "Scratchpads unsupported\n"); + } else if (ntb_msg_count(ntb) < 1) { + dev_dbg(&ntb->dev, "Messages unsupported\n"); } + return 0; +} + +static struct pp_ctx *pp_create_data(struct ntb_dev *ntb) +{ + struct pp_ctx *pp; + + pp = devm_kzalloc(&ntb->dev, sizeof(*pp), GFP_KERNEL); + if (!pp) + return ERR_PTR(-ENOMEM); + pp->ntb = ntb; - pp->db_bits = 0; atomic_set(&pp->count, 0); - spin_lock_init(&pp->db_lock); - timer_setup(&pp->db_timer, pp_ping, 0); - pp->db_delay = msecs_to_jiffies(delay_ms); + spin_lock_init(&pp->lock); + hrtimer_init(&pp->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + pp->timer.function = pp_timer_func; + + return pp; +} + +static void pp_init_flds(struct pp_ctx *pp) +{ + int pidx, lport, pcnt; + + /* Find global port index */ + lport = ntb_port_number(pp->ntb); + pcnt = ntb_peer_port_count(pp->ntb); + for (pidx = 0; pidx < pcnt; pidx++) { + if (lport < ntb_peer_port_number(pp->ntb, pidx)) + break; + } - rc = ntb_set_ctx(ntb, pp, &pp_ops); - if (rc) - goto err_ctx; + pp->in_db = BIT_ULL(pidx); + pp->pmask = GENMASK_ULL(pidx, 0) >> 1; + pp->nmask = GENMASK_ULL(pcnt - 1, pidx); - rc = pp_debugfs_setup(pp); - if (rc) - goto err_ctx; + dev_dbg(&pp->ntb->dev, "Inbound db %#llx, prev %#llx, next %#llx\n", + pp->in_db, pp->pmask, pp->nmask); +} + +static int pp_mask_events(struct pp_ctx *pp) +{ + u64 db_mask, msg_mask; + int ret; + + db_mask = ntb_db_valid_mask(pp->ntb); + ret = ntb_db_set_mask(pp->ntb, db_mask); + if (ret) + return ret; - ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); - ntb_link_event(ntb); + /* Skip message events masking if unsupported */ + if (ntb_msg_count(pp->ntb) < 1) + return 0; + + msg_mask = ntb_msg_outbits(pp->ntb) | ntb_msg_inbits(pp->ntb); + return ntb_msg_set_mask(pp->ntb, msg_mask); +} + +static int pp_setup_ctx(struct pp_ctx *pp) +{ + int ret; + + ret = ntb_set_ctx(pp->ntb, pp, &pp_ops); + if (ret) + return ret; + + ntb_link_enable(pp->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + /* Might be not necessary */ + ntb_link_event(pp->ntb); return 0; +} + +static void pp_clear_ctx(struct pp_ctx *pp) +{ + ntb_link_disable(pp->ntb); -err_ctx: - kfree(pp); -err_pp: - return rc; + ntb_clear_ctx(pp->ntb); } -static void pp_remove(struct ntb_client *client, - struct ntb_dev *ntb) +static void pp_setup_dbgfs(struct pp_ctx *pp) +{ + struct pci_dev *pdev = pp->ntb->pdev; + void *ret; + + pp->dbgfs_dir = debugfs_create_dir(pci_name(pdev), pp_dbgfs_topdir); + + ret = debugfs_create_atomic_t("count", 0600, pp->dbgfs_dir, &pp->count); + if (!ret) + dev_warn(&pp->ntb->dev, "DebugFS unsupported\n"); +} + +static void pp_clear_dbgfs(struct pp_ctx *pp) +{ + debugfs_remove_recursive(pp->dbgfs_dir); +} + +static int pp_probe(struct ntb_client *client, struct ntb_dev *ntb) +{ + struct pp_ctx *pp; + int ret; + + ret = pp_check_ntb(ntb); + if (ret) + return ret; + + pp = pp_create_data(ntb); + if (IS_ERR(pp)) + return PTR_ERR(pp); + + pp_init_flds(pp); + + ret = pp_mask_events(pp); + if (ret) + return ret; + + ret = pp_setup_ctx(pp); + if (ret) + return ret; + + pp_setup_dbgfs(pp); + + return 0; +} + +static void pp_remove(struct ntb_client *client, struct ntb_dev *ntb) { struct pp_ctx *pp = ntb->ctx; - debugfs_remove_recursive(pp->debugfs_node_dir); + pp_clear_dbgfs(pp); - ntb_clear_ctx(ntb); - del_timer_sync(&pp->db_timer); - ntb_link_disable(ntb); + pp_clear_ctx(pp); - kfree(pp); + pp_clear(pp); } static struct ntb_client pp_client = { .ops = { .probe = pp_probe, - .remove = pp_remove, - }, + .remove = pp_remove + } }; static int __init pp_init(void) { - int rc; + int ret; if (debugfs_initialized()) - pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + pp_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); - rc = ntb_register_client(&pp_client); - if (rc) - goto err_client; + ret = ntb_register_client(&pp_client); + if (ret) + debugfs_remove_recursive(pp_dbgfs_topdir); - return 0; - -err_client: - debugfs_remove_recursive(pp_debugfs_dir); - return rc; + return ret; } module_init(pp_init); static void __exit pp_exit(void) { ntb_unregister_client(&pp_client); - debugfs_remove_recursive(pp_debugfs_dir); + debugfs_remove_recursive(pp_dbgfs_topdir); } module_exit(pp_exit); + -- cgit From 7f46c8b3a5523a28cb81c2c12bc3dcc76ed52d59 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:56 +0300 Subject: NTB: ntb_tool: Add full multi-port NTB API support Former NTB Debugging tool driver supported only the limited functionality of the recently updated NTB API, which is now available to work with the truly NTB multi-port devices and devices, which got NTB Message registers instead of Scratchpads. This patch fully rewrites the driver so one would fully expose all the new NTB API interfaces. Particularly it concerns the Message registers, peer ports API, NTB link settings. Additional cleanups are also added here. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 1820 +++++++++++++++++++++++++++++-------------- 1 file changed, 1240 insertions(+), 580 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index e490bbc8726c..920fc9b161b0 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2017 T-Platforms All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -18,6 +19,7 @@ * BSD LICENSE * * Copyright (C) 2015 EMC Corporation. All Rights Reserved. + * Copyright (C) 2017 T-Platforms All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -46,9 +48,6 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * PCIe NTB Debugging Tool Linux driver - * - * Contact Information: - * Allen Hubbe */ /* @@ -56,42 +55,125 @@ * * Assuming $DBG_DIR is something like: * '/sys/kernel/debug/ntb_tool/0000:00:03.0' + * Suppose aside from local device there is at least one remote device + * connected to NTB with index 0. + *----------------------------------------------------------------------------- + * Eg: check local/peer device information. + * + * # Get local device port number + * root@self# cat $DBG_DIR/port + * + * # Check local device functionality + * root@self# ls $DBG_DIR + * db msg1 msg_sts peer4/ port + * db_event msg2 peer0/ peer5/ spad0 + * db_mask msg3 peer1/ peer_db spad1 + * link msg_event peer2/ peer_db_mask spad2 + * msg0 msg_mask peer3/ peer_spad spad3 + * # As one can see it supports: + * # 1) four inbound message registers + * # 2) four inbound scratchpads + * # 3) up to six peer devices + * + * # Check peer device port number + * root@self# cat $DBG_DIR/peer0/port * - * Eg: check if clearing the doorbell mask generates an interrupt. + * # Check peer device(s) functionality to be used + * root@self# ls $DBG_DIR/peer0 + * link mw_trans0 mw_trans6 port + * link_event mw_trans1 mw_trans7 spad0 + * msg0 mw_trans2 peer_mw_trans0 spad1 + * msg1 mw_trans3 peer_mw_trans1 spad2 + * msg2 mw_trans4 peer_mw_trans2 spad3 + * msg3 mw_trans5 peer_mw_trans3 + * # As one can see we got: + * # 1) four outbound message registers + * # 2) four outbound scratchpads + * # 3) eight inbound memory windows + * # 4) four outbound memory windows + *----------------------------------------------------------------------------- + * Eg: NTB link tests * - * # Check the link status - * root@self# cat $DBG_DIR/link + * # Set local link up/down + * root@self# echo Y > $DBG_DIR/link + * root@self# echo N > $DBG_DIR/link * - * # Block until the link is up - * root@self# echo Y > $DBG_DIR/link_event + * # Check if link with peer device is up/down: + * root@self# cat $DBG_DIR/peer0/link * - * # Set the doorbell mask - * root@self# echo 's 1' > $DBG_DIR/mask + * # Block until the link is up/down + * root@self# echo Y > $DBG_DIR/peer0/link_event + * root@self# echo N > $DBG_DIR/peer0/link_event + *----------------------------------------------------------------------------- + * Eg: Doorbell registers tests (some functionality might be absent) * - * # Ring the doorbell from the peer + * # Set/clear/get local doorbell + * root@self# echo 's 1' > $DBG_DIR/db + * root@self# echo 'c 1' > $DBG_DIR/db + * root@self# cat $DBG_DIR/db + * + * # Set/clear/get local doorbell mask + * root@self# echo 's 1' > $DBG_DIR/db_mask + * root@self# echo 'c 1' > $DBG_DIR/db_mask + * root@self# cat $DBG_DIR/db_mask + * + * # Ring/clear/get peer doorbell * root@peer# echo 's 1' > $DBG_DIR/peer_db + * root@peer# echo 'c 1' > $DBG_DIR/peer_db + * root@peer# cat $DBG_DIR/peer_db + * + * # Set/clear/get peer doorbell mask + * root@self# echo 's 1' > $DBG_DIR/peer_db_mask + * root@self# echo 'c 1' > $DBG_DIR/peer_db_mask + * root@self# cat $DBG_DIR/peer_db_mask + * + * # Block until local doorbell is set with specified value + * root@self# echo 1 > $DBG_DIR/db_event + *----------------------------------------------------------------------------- + * Eg: Message registers tests (functionality might be absent) * - * # Clear the doorbell mask - * root@self# echo 'c 1' > $DBG_DIR/mask + * # Set/clear/get in/out message registers status + * root@self# echo 's 1' > $DBG_DIR/msg_sts + * root@self# echo 'c 1' > $DBG_DIR/msg_sts + * root@self# cat $DBG_DIR/msg_sts * - * Observe debugging output in dmesg or your console. You should see a - * doorbell event triggered by clearing the mask. If not, this may indicate an - * issue with the hardware that needs to be worked around in the driver. + * # Set/clear in/out message registers mask + * root@self# echo 's 1' > $DBG_DIR/msg_mask + * root@self# echo 'c 1' > $DBG_DIR/msg_mask * - * Eg: read and write scratchpad registers + * # Get inbound message register #0 value and source of port index + * root@self# cat $DBG_DIR/msg0 * - * root@peer# echo '0 0x01010101 1 0x7f7f7f7f' > $DBG_DIR/peer_spad + * # Send some data to peer over outbound message register #0 + * root@self# echo 0x01020304 > $DBG_DIR/peer0/msg0 + *----------------------------------------------------------------------------- + * Eg: Scratchpad registers tests (functionality might be absent) * - * root@self# cat $DBG_DIR/spad + * # Write/read to/from local scratchpad register #0 + * root@peer# echo 0x01020304 > $DBG_DIR/spad0 + * root@peer# cat $DBG_DIR/spad0 * - * Observe that spad 0 and 1 have the values set by the peer. + * # Write/read to/from peer scratchpad register #0 + * root@peer# echo 0x01020304 > $DBG_DIR/peer0/spad0 + * root@peer# cat $DBG_DIR/peer0/spad0 + *----------------------------------------------------------------------------- + * Eg: Memory windows tests * - * # Check the memory window translation info - * cat $DBG_DIR/peer_trans0 + * # Create inbound memory window buffer of specified size/get its base address + * root@peer# echo 16384 > $DBG_DIR/peer0/mw_trans0 + * root@peer# cat $DBG_DIR/peer0/mw_trans0 * - * # Setup a 16k memory window buffer - * echo 16384 > $DBG_DIR/peer_trans0 + * # Write/read data to/from inbound memory window + * root@peer# echo Hello > $DBG_DIR/peer0/mw0 + * root@peer# head -c 7 $DBG_DIR/peer0/mw0 * + * # Map outbound memory window/check it settings (on peer device) + * root@peer# echo 0xADD0BA5E:16384 > $DBG_DIR/peer0/peer_mw_trans0 + * root@peer# cat $DBG_DIR/peer0/peer_mw_trans0 + * + * # Write/read data to/from outbound memory window (on peer device) + * root@peer# echo olleH > $DBG_DIR/peer0/peer_mw0 + * root@peer# head -c 7 $DBG_DIR/peer0/peer_mw0 */ #include @@ -106,48 +188,87 @@ #include -#define DRIVER_NAME "ntb_tool" -#define DRIVER_DESCRIPTION "PCIe NTB Debugging Tool" - -#define DRIVER_VERSION "1.0" -#define DRIVER_RELDATE "22 April 2015" -#define DRIVER_AUTHOR "Allen Hubbe " +#define DRIVER_NAME "ntb_tool" +#define DRIVER_VERSION "2.0" MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESCRIPTION); - -/* It is rare to have hadrware with greater than six MWs */ -#define MAX_MWS 6 -/* Only two-ports devices are supported */ -#define PIDX NTB_DEF_PEER_IDX - -static struct dentry *tool_dbgfs; +MODULE_AUTHOR("Allen Hubbe "); +MODULE_DESCRIPTION("PCIe NTB Debugging Tool"); +/* + * Inbound and outbound memory windows descriptor. Union members selection + * depends on the MW type the structure describes. mm_base/dma_base are the + * virtual and DMA address of an inbound MW. io_base/tr_base are the MMIO + * mapped virtual and xlat addresses of an outbound MW respectively. + */ struct tool_mw { - int idx; + int widx; + int pidx; struct tool_ctx *tc; - resource_size_t win_size; + union { + u8 *mm_base; + u8 __iomem *io_base; + }; + union { + dma_addr_t dma_base; + u64 tr_base; + }; resource_size_t size; - u8 __iomem *local; - u8 *peer; - dma_addr_t peer_dma; - struct dentry *peer_dbg_file; + struct dentry *dbgfs_file; +}; + +/* + * Wrapper structure is used to distinguish the outbound MW peers reference + * within the corresponding DebugFS directory IO operation. + */ +struct tool_mw_wrap { + int pidx; + struct tool_mw *mw; +}; + +struct tool_msg { + int midx; + int pidx; + struct tool_ctx *tc; +}; + +struct tool_spad { + int sidx; + int pidx; + struct tool_ctx *tc; +}; + +struct tool_peer { + int pidx; + struct tool_ctx *tc; + int inmw_cnt; + struct tool_mw *inmws; + int outmw_cnt; + struct tool_mw_wrap *outmws; + int outmsg_cnt; + struct tool_msg *outmsgs; + int outspad_cnt; + struct tool_spad *outspads; + struct dentry *dbgfs_dir; }; struct tool_ctx { struct ntb_dev *ntb; - struct dentry *dbgfs; wait_queue_head_t link_wq; - int mw_count; - struct tool_mw mws[MAX_MWS]; + wait_queue_head_t db_wq; + wait_queue_head_t msg_wq; + int outmw_cnt; + struct tool_mw *outmws; + int peer_cnt; + struct tool_peer *peers; + int inmsg_cnt; + struct tool_msg *inmsgs; + int inspad_cnt; + struct tool_spad *inspads; + struct dentry *dbgfs_dir; }; -#define SPAD_FNAME_SIZE 0x10 -#define INT_PTR(x) ((void *)(unsigned long)x) -#define PTR_INT(x) ((int)(unsigned long)x) - #define TOOL_FOPS_RDWR(__name, __read, __write) \ const struct file_operations __name = { \ .owner = THIS_MODULE, \ @@ -156,6 +277,15 @@ struct tool_ctx { .write = __write, \ } +#define TOOL_BUF_LEN 32 + +static struct dentry *tool_dbgfs_topdir; + +/*============================================================================== + * NTB events handlers + *============================================================================== + */ + static void tool_link_event(void *ctx) { struct tool_ctx *tc = ctx; @@ -181,580 +311,576 @@ static void tool_db_event(void *ctx, int vec) dev_dbg(&tc->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", vec, db_mask, db_bits); + + wake_up(&tc->db_wq); +} + +static void tool_msg_event(void *ctx) +{ + struct tool_ctx *tc = ctx; + u64 msg_sts; + + msg_sts = ntb_msg_read_sts(tc->ntb); + + dev_dbg(&tc->ntb->dev, "message bits %#llx\n", msg_sts); + + wake_up(&tc->msg_wq); } static const struct ntb_ctx_ops tool_ops = { .link_event = tool_link_event, .db_event = tool_db_event, + .msg_event = tool_msg_event }; -static ssize_t tool_dbfn_read(struct tool_ctx *tc, char __user *ubuf, - size_t size, loff_t *offp, - u64 (*db_read_fn)(struct ntb_dev *)) +/*============================================================================== + * Common read/write methods + *============================================================================== + */ + +static ssize_t tool_fn_read(struct tool_ctx *tc, char __user *ubuf, + size_t size, loff_t *offp, + u64 (*fn_read)(struct ntb_dev *)) { size_t buf_size; - char *buf; - ssize_t pos, rc; + char buf[TOOL_BUF_LEN]; + ssize_t pos; - if (!db_read_fn) + if (!fn_read) return -EINVAL; - buf_size = min_t(size_t, size, 0x20); - - buf = kmalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - pos = scnprintf(buf, buf_size, "%#llx\n", - db_read_fn(tc->ntb)); + buf_size = min(size, sizeof(buf)); - rc = simple_read_from_buffer(ubuf, size, offp, buf, pos); + pos = scnprintf(buf, buf_size, "%#llx\n", fn_read(tc->ntb)); - kfree(buf); - - return rc; + return simple_read_from_buffer(ubuf, size, offp, buf, pos); } -static ssize_t tool_dbfn_write(struct tool_ctx *tc, - const char __user *ubuf, - size_t size, loff_t *offp, - int (*db_set_fn)(struct ntb_dev *, u64), - int (*db_clear_fn)(struct ntb_dev *, u64)) +static ssize_t tool_fn_write(struct tool_ctx *tc, + const char __user *ubuf, + size_t size, loff_t *offp, + int (*fn_set)(struct ntb_dev *, u64), + int (*fn_clear)(struct ntb_dev *, u64)) { - u64 db_bits; char *buf, cmd; - ssize_t rc; + ssize_t ret; + u64 bits; int n; buf = kmalloc(size + 1, GFP_KERNEL); if (!buf) return -ENOMEM; - rc = simple_write_to_buffer(buf, size, offp, ubuf, size); - if (rc < 0) { + ret = simple_write_to_buffer(buf, size, offp, ubuf, size); + if (ret < 0) { kfree(buf); - return rc; + return ret; } buf[size] = 0; - n = sscanf(buf, "%c %lli", &cmd, &db_bits); + n = sscanf(buf, "%c %lli", &cmd, &bits); kfree(buf); if (n != 2) { - rc = -EINVAL; + ret = -EINVAL; } else if (cmd == 's') { - if (!db_set_fn) - rc = -EINVAL; + if (!fn_set) + ret = -EINVAL; else - rc = db_set_fn(tc->ntb, db_bits); + ret = fn_set(tc->ntb, bits); } else if (cmd == 'c') { - if (!db_clear_fn) - rc = -EINVAL; + if (!fn_clear) + ret = -EINVAL; else - rc = db_clear_fn(tc->ntb, db_bits); + ret = fn_clear(tc->ntb, bits); } else { - rc = -EINVAL; + ret = -EINVAL; } - return rc ? : size; + return ret ? : size; } -static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, - size_t size, loff_t *offp, - u32 (*spad_read_fn)(struct ntb_dev *, int)) -{ - size_t buf_size; - char *buf; - ssize_t pos, rc; - int i, spad_count; - - if (!spad_read_fn) - return -EINVAL; - - spad_count = ntb_spad_count(tc->ntb); +/*============================================================================== + * Port read/write methods + *============================================================================== + */ - /* - * We multiply the number of spads by 15 to get the buffer size - * this is from 3 for the %d, 10 for the largest hex value - * (0x00000000) and 2 for the tab and line feed. - */ - buf_size = min_t(size_t, size, spad_count * 15); +static ssize_t tool_port_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[TOOL_BUF_LEN]; + int pos; - buf = kmalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; + pos = scnprintf(buf, sizeof(buf), "%d\n", ntb_port_number(tc->ntb)); - pos = 0; + return simple_read_from_buffer(ubuf, size, offp, buf, pos); +} - for (i = 0; i < spad_count; ++i) { - pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", - i, spad_read_fn(tc->ntb, i)); - } +static TOOL_FOPS_RDWR(tool_port_fops, + tool_port_read, + NULL); - rc = simple_read_from_buffer(ubuf, size, offp, buf, pos); +static ssize_t tool_peer_port_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_peer *peer = filep->private_data; + struct tool_ctx *tc = peer->tc; + char buf[TOOL_BUF_LEN]; + int pos; - kfree(buf); + pos = scnprintf(buf, sizeof(buf), "%d\n", + ntb_peer_port_number(tc->ntb, peer->pidx)); - return rc; + return simple_read_from_buffer(ubuf, size, offp, buf, pos); } -static ssize_t tool_spadfn_write(struct tool_ctx *tc, - const char __user *ubuf, - size_t size, loff_t *offp, - int (*spad_write_fn)(struct ntb_dev *, - int, u32)) +static TOOL_FOPS_RDWR(tool_peer_port_fops, + tool_peer_port_read, + NULL); + +static int tool_init_peers(struct tool_ctx *tc) { - int spad_idx; - u32 spad_val; - char *buf, *buf_ptr; - int pos, n; - ssize_t rc; - - if (!spad_write_fn) { - dev_dbg(&tc->ntb->dev, "no spad write fn\n"); - return -EINVAL; - } + int pidx; - buf = kmalloc(size + 1, GFP_KERNEL); - if (!buf) + tc->peer_cnt = ntb_peer_port_count(tc->ntb); + tc->peers = devm_kcalloc(&tc->ntb->dev, tc->peer_cnt, + sizeof(*tc->peers), GFP_KERNEL); + if (tc->peers == NULL) return -ENOMEM; - rc = simple_write_to_buffer(buf, size, offp, ubuf, size); - if (rc < 0) { - kfree(buf); - return rc; - } - - buf[size] = 0; - buf_ptr = buf; - n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); - while (n == 2) { - buf_ptr += pos; - rc = spad_write_fn(tc->ntb, spad_idx, spad_val); - if (rc) - break; - - n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); + for (pidx = 0; pidx < tc->peer_cnt; pidx++) { + tc->peers[pidx].pidx = pidx; + tc->peers[pidx].tc = tc; } - if (n < 0) - rc = n; - - kfree(buf); - - return rc ? : size; + return 0; } -static ssize_t tool_db_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; - - return tool_dbfn_read(tc, ubuf, size, offp, - tc->ntb->ops->db_read); -} +/*============================================================================== + * Link state read/write methods + *============================================================================== + */ -static ssize_t tool_db_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_link_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) { struct tool_ctx *tc = filep->private_data; + bool val; + int ret; - return tool_dbfn_write(tc, ubuf, size, offp, - tc->ntb->ops->db_set, - tc->ntb->ops->db_clear); -} + ret = kstrtobool_from_user(ubuf, size, &val); + if (ret) + return ret; -static TOOL_FOPS_RDWR(tool_db_fops, - tool_db_read, - tool_db_write); + if (val) + ret = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + else + ret = ntb_link_disable(tc->ntb); -static ssize_t tool_mask_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; + if (ret) + return ret; - return tool_dbfn_read(tc, ubuf, size, offp, - tc->ntb->ops->db_read_mask); + return size; } -static ssize_t tool_mask_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static TOOL_FOPS_RDWR(tool_link_fops, + NULL, + tool_link_write); + +static ssize_t tool_peer_link_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_ctx *tc = filep->private_data; + struct tool_peer *peer = filep->private_data; + struct tool_ctx *tc = peer->tc; + char buf[3]; - return tool_dbfn_write(tc, ubuf, size, offp, - tc->ntb->ops->db_set_mask, - tc->ntb->ops->db_clear_mask); + if (ntb_link_is_up(tc->ntb, NULL, NULL) & BIT(peer->pidx)) + buf[0] = 'Y'; + else + buf[0] = 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + + return simple_read_from_buffer(ubuf, size, offp, buf, 3); } -static TOOL_FOPS_RDWR(tool_mask_fops, - tool_mask_read, - tool_mask_write); +static TOOL_FOPS_RDWR(tool_peer_link_fops, + tool_peer_link_read, + NULL); -static ssize_t tool_peer_db_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_peer_link_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_ctx *tc = filep->private_data; + struct tool_peer *peer = filep->private_data; + struct tool_ctx *tc = peer->tc; + u64 link_msk; + bool val; + int ret; - return tool_dbfn_read(tc, ubuf, size, offp, - tc->ntb->ops->peer_db_read); -} + ret = kstrtobool_from_user(ubuf, size, &val); + if (ret) + return ret; -static ssize_t tool_peer_db_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; + link_msk = BIT_ULL_MASK(peer->pidx); + + if (wait_event_interruptible(tc->link_wq, + !!(ntb_link_is_up(tc->ntb, NULL, NULL) & link_msk) == val)) + return -ERESTART; - return tool_dbfn_write(tc, ubuf, size, offp, - tc->ntb->ops->peer_db_set, - tc->ntb->ops->peer_db_clear); + return size; } -static TOOL_FOPS_RDWR(tool_peer_db_fops, - tool_peer_db_read, - tool_peer_db_write); +static TOOL_FOPS_RDWR(tool_peer_link_event_fops, + NULL, + tool_peer_link_event_write); -static ssize_t tool_peer_mask_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) +/*============================================================================== + * Memory windows read/write/setting methods + *============================================================================== + */ + +static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_ctx *tc = filep->private_data; + struct tool_mw *inmw = filep->private_data; + + if (inmw->mm_base == NULL) + return -ENXIO; - return tool_dbfn_read(tc, ubuf, size, offp, - tc->ntb->ops->peer_db_read_mask); + return simple_read_from_buffer(ubuf, size, offp, + inmw->mm_base, inmw->size); } -static ssize_t tool_peer_mask_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_ctx *tc = filep->private_data; + struct tool_mw *inmw = filep->private_data; + + if (inmw->mm_base == NULL) + return -ENXIO; - return tool_dbfn_write(tc, ubuf, size, offp, - tc->ntb->ops->peer_db_set_mask, - tc->ntb->ops->peer_db_clear_mask); + return simple_write_to_buffer(inmw->mm_base, inmw->size, offp, + ubuf, size); } -static TOOL_FOPS_RDWR(tool_peer_mask_fops, - tool_peer_mask_read, - tool_peer_mask_write); +static TOOL_FOPS_RDWR(tool_mw_fops, + tool_mw_read, + tool_mw_write); -static ssize_t tool_spad_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) +static int tool_setup_mw(struct tool_ctx *tc, int pidx, int widx, + size_t req_size) { - struct tool_ctx *tc = filep->private_data; + resource_size_t size, addr_align, size_align; + struct tool_mw *inmw = &tc->peers[pidx].inmws[widx]; + char buf[TOOL_BUF_LEN]; + int ret; - return tool_spadfn_read(tc, ubuf, size, offp, - tc->ntb->ops->spad_read); -} + if (inmw->mm_base != NULL) + return 0; -static ssize_t tool_spad_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; + ret = ntb_mw_get_align(tc->ntb, pidx, widx, &addr_align, + &size_align, &size); + if (ret) + return ret; + + inmw->size = min_t(resource_size_t, req_size, size); + inmw->size = round_up(inmw->size, addr_align); + inmw->size = round_up(inmw->size, size_align); + inmw->mm_base = dma_alloc_coherent(&tc->ntb->dev, inmw->size, + &inmw->dma_base, GFP_KERNEL); + if (!inmw->mm_base) + return -ENOMEM; - return tool_spadfn_write(tc, ubuf, size, offp, - tc->ntb->ops->spad_write); -} + if (!IS_ALIGNED(inmw->dma_base, addr_align)) { + ret = -ENOMEM; + goto err_free_dma; + } -static TOOL_FOPS_RDWR(tool_spad_fops, - tool_spad_read, - tool_spad_write); + ret = ntb_mw_set_trans(tc->ntb, pidx, widx, inmw->dma_base, inmw->size); + if (ret) + goto err_free_dma; -static u32 ntb_tool_peer_spad_read(struct ntb_dev *ntb, int sidx) -{ - return ntb_peer_spad_read(ntb, PIDX, sidx); -} + snprintf(buf, sizeof(buf), "mw%d", widx); + inmw->dbgfs_file = debugfs_create_file(buf, 0600, + tc->peers[pidx].dbgfs_dir, inmw, + &tool_mw_fops); -static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; + return 0; - return tool_spadfn_read(tc, ubuf, size, offp, ntb_tool_peer_spad_read); -} +err_free_dma: + dma_free_coherent(&tc->ntb->dev, inmw->size, inmw->mm_base, + inmw->dma_base); + inmw->mm_base = NULL; + inmw->dma_base = 0; + inmw->size = 0; -static int ntb_tool_peer_spad_write(struct ntb_dev *ntb, int sidx, u32 val) -{ - return ntb_peer_spad_write(ntb, PIDX, sidx, val); + return ret; } -static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static void tool_free_mw(struct tool_ctx *tc, int pidx, int widx) { - struct tool_ctx *tc = filep->private_data; - - return tool_spadfn_write(tc, ubuf, size, offp, - ntb_tool_peer_spad_write); -} - -static TOOL_FOPS_RDWR(tool_peer_spad_fops, - tool_peer_spad_read, - tool_peer_spad_write); + struct tool_mw *inmw = &tc->peers[pidx].inmws[widx]; -static ssize_t tool_link_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; - char buf[3]; + debugfs_remove(inmw->dbgfs_file); - buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N'; - buf[1] = '\n'; - buf[2] = '\0'; + if (inmw->mm_base != NULL) { + ntb_mw_clear_trans(tc->ntb, pidx, widx); + dma_free_coherent(&tc->ntb->dev, inmw->size, + inmw->mm_base, inmw->dma_base); + } - return simple_read_from_buffer(ubuf, size, offp, buf, 2); + inmw->mm_base = NULL; + inmw->dma_base = 0; + inmw->size = 0; + inmw->dbgfs_file = NULL; } -static ssize_t tool_link_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_mw_trans_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_ctx *tc = filep->private_data; - char buf[32]; + struct tool_mw *inmw = filep->private_data; + resource_size_t addr_align; + resource_size_t size_align; + resource_size_t size_max; + ssize_t ret, off = 0; size_t buf_size; - bool val; - int rc; + char *buf; - buf_size = min(size, (sizeof(buf) - 1)); - if (copy_from_user(buf, ubuf, buf_size)) - return -EFAULT; + buf_size = min_t(size_t, size, 512); - buf[buf_size] = '\0'; + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; - rc = strtobool(buf, &val); - if (rc) - return rc; + ret = ntb_mw_get_align(inmw->tc->ntb, inmw->pidx, inmw->widx, + &addr_align, &size_align, &size_max); + if (ret) + return ret; - if (val) - rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); - else - rc = ntb_link_disable(tc->ntb); + off += scnprintf(buf + off, buf_size - off, + "Inbound MW \t%d\n", + inmw->widx); - if (rc) - return rc; + off += scnprintf(buf + off, buf_size - off, + "Port \t%d (%d)\n", + ntb_peer_port_number(inmw->tc->ntb, inmw->pidx), + inmw->pidx); - return size; -} + off += scnprintf(buf + off, buf_size - off, + "Window Address \t0x%pK\n", inmw->mm_base); -static TOOL_FOPS_RDWR(tool_link_fops, - tool_link_read, - tool_link_write); + off += scnprintf(buf + off, buf_size - off, + "DMA Address \t%pad\n", + &inmw->dma_base); -static ssize_t tool_link_event_write(struct file *filep, - const char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_ctx *tc = filep->private_data; - char buf[32]; - size_t buf_size; - bool val; - int rc; + off += scnprintf(buf + off, buf_size - off, + "Window Size \t%pa[p]\n", + &inmw->size); - buf_size = min(size, (sizeof(buf) - 1)); - if (copy_from_user(buf, ubuf, buf_size)) - return -EFAULT; + off += scnprintf(buf + off, buf_size - off, + "Alignment \t%pa[p]\n", + &addr_align); - buf[buf_size] = '\0'; + off += scnprintf(buf + off, buf_size - off, + "Size Alignment \t%pa[p]\n", + &size_align); + + off += scnprintf(buf + off, buf_size - off, + "Size Max \t%pa[p]\n", + &size_max); - rc = strtobool(buf, &val); - if (rc) - return rc; + ret = simple_read_from_buffer(ubuf, size, offp, buf, off); + kfree(buf); - if (wait_event_interruptible(tc->link_wq, - ntb_link_is_up(tc->ntb, NULL, NULL) == val)) - return -ERESTART; + return ret; +} + +static ssize_t tool_mw_trans_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *inmw = filep->private_data; + unsigned int val; + int ret; + + ret = kstrtouint_from_user(ubuf, size, 0, &val); + if (ret) + return ret; + + tool_free_mw(inmw->tc, inmw->pidx, inmw->widx); + if (val) { + ret = tool_setup_mw(inmw->tc, inmw->pidx, inmw->widx, val); + if (ret) + return ret; + } return size; } -static TOOL_FOPS_RDWR(tool_link_event_fops, - NULL, - tool_link_event_write); +static TOOL_FOPS_RDWR(tool_mw_trans_fops, + tool_mw_trans_read, + tool_mw_trans_write); -static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_mw *mw = filep->private_data; - ssize_t rc; + struct tool_mw *outmw = filep->private_data; loff_t pos = *offp; + ssize_t ret; void *buf; - if (mw->local == NULL) + if (outmw->io_base == NULL) return -EIO; - if (pos < 0) - return -EINVAL; - if (pos >= mw->win_size || !size) + + if (pos >= outmw->size || !size) return 0; - if (size > mw->win_size - pos) - size = mw->win_size - pos; + + if (size > outmw->size - pos) + size = outmw->size - pos; buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; - memcpy_fromio(buf, mw->local + pos, size); - rc = copy_to_user(ubuf, buf, size); - if (rc == size) { - rc = -EFAULT; + memcpy_fromio(buf, outmw->io_base + pos, size); + ret = copy_to_user(ubuf, buf, size); + if (ret == size) { + ret = -EFAULT; goto err_free; } - size -= rc; + size -= ret; *offp = pos + size; - rc = size; + ret = size; err_free: kfree(buf); - return rc; + return ret; } -static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_mw *mw = filep->private_data; - ssize_t rc; + struct tool_mw *outmw = filep->private_data; + ssize_t ret; loff_t pos = *offp; void *buf; - if (pos < 0) - return -EINVAL; - if (pos >= mw->win_size || !size) + if (outmw->io_base == NULL) + return -EIO; + + if (pos >= outmw->size || !size) return 0; - if (size > mw->win_size - pos) - size = mw->win_size - pos; + if (size > outmw->size - pos) + size = outmw->size - pos; buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; - rc = copy_from_user(buf, ubuf, size); - if (rc == size) { - rc = -EFAULT; + ret = copy_from_user(buf, ubuf, size); + if (ret == size) { + ret = -EFAULT; goto err_free; } - size -= rc; + size -= ret; *offp = pos + size; - rc = size; + ret = size; - memcpy_toio(mw->local + pos, buf, size); + memcpy_toio(outmw->io_base + pos, buf, size); err_free: kfree(buf); - return rc; -} - -static TOOL_FOPS_RDWR(tool_mw_fops, - tool_mw_read, - tool_mw_write); - -static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_mw *mw = filep->private_data; - - if (!mw->peer) - return -ENXIO; - - return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size); -} - -static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) -{ - struct tool_mw *mw = filep->private_data; - - if (!mw->peer) - return -ENXIO; - - return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size); + return ret; } static TOOL_FOPS_RDWR(tool_peer_mw_fops, tool_peer_mw_read, tool_peer_mw_write); -static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size) +static int tool_setup_peer_mw(struct tool_ctx *tc, int pidx, int widx, + u64 req_addr, size_t req_size) { - int rc; - struct tool_mw *mw = &tc->mws[idx]; - resource_size_t size, align_addr, align_size; - char buf[16]; + struct tool_mw *outmw = &tc->outmws[widx]; + resource_size_t map_size; + phys_addr_t map_base; + char buf[TOOL_BUF_LEN]; + int ret; - if (mw->peer) + if (outmw->io_base != NULL) return 0; - rc = ntb_mw_get_align(tc->ntb, PIDX, idx, &align_addr, - &align_size, &size); - if (rc) - return rc; + ret = ntb_peer_mw_get_addr(tc->ntb, widx, &map_base, &map_size); + if (ret) + return ret; - mw->size = min_t(resource_size_t, req_size, size); - mw->size = round_up(mw->size, align_addr); - mw->size = round_up(mw->size, align_size); - mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, - &mw->peer_dma, GFP_KERNEL); + ret = ntb_peer_mw_set_trans(tc->ntb, pidx, widx, req_addr, req_size); + if (ret) + return ret; - if (!mw->peer || !IS_ALIGNED(mw->peer_dma, align_addr)) - return -ENOMEM; + outmw->io_base = ioremap_wc(map_base, map_size); + if (outmw->io_base == NULL) { + ret = -EFAULT; + goto err_clear_trans; + } - rc = ntb_mw_set_trans(tc->ntb, PIDX, idx, mw->peer_dma, mw->size); - if (rc) - goto err_free_dma; + outmw->tr_base = req_addr; + outmw->size = req_size; + outmw->pidx = pidx; - snprintf(buf, sizeof(buf), "peer_mw%d", idx); - mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR, - mw->tc->dbgfs, mw, - &tool_peer_mw_fops); + snprintf(buf, sizeof(buf), "peer_mw%d", widx); + outmw->dbgfs_file = debugfs_create_file(buf, 0600, + tc->peers[pidx].dbgfs_dir, outmw, + &tool_peer_mw_fops); return 0; -err_free_dma: - dma_free_coherent(&tc->ntb->pdev->dev, mw->size, - mw->peer, - mw->peer_dma); - mw->peer = NULL; - mw->peer_dma = 0; - mw->size = 0; - - return rc; +err_clear_trans: + ntb_peer_mw_clear_trans(tc->ntb, pidx, widx); + + return ret; } -static void tool_free_mw(struct tool_ctx *tc, int idx) +static void tool_free_peer_mw(struct tool_ctx *tc, int widx) { - struct tool_mw *mw = &tc->mws[idx]; + struct tool_mw *outmw = &tc->outmws[widx]; - if (mw->peer) { - ntb_mw_clear_trans(tc->ntb, PIDX, idx); - dma_free_coherent(&tc->ntb->pdev->dev, mw->size, - mw->peer, - mw->peer_dma); - } + debugfs_remove(outmw->dbgfs_file); - mw->peer = NULL; - mw->peer_dma = 0; - - debugfs_remove(mw->peer_dbg_file); + if (outmw->io_base != NULL) { + iounmap(tc->outmws[widx].io_base); + ntb_peer_mw_clear_trans(tc->ntb, outmw->pidx, widx); + } - mw->peer_dbg_file = NULL; + outmw->io_base = NULL; + outmw->tr_base = 0; + outmw->size = 0; + outmw->pidx = -1; + outmw->dbgfs_file = NULL; } -static ssize_t tool_peer_mw_trans_read(struct file *filep, - char __user *ubuf, - size_t size, loff_t *offp) +static ssize_t tool_peer_mw_trans_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_mw *mw = filep->private_data; - - char *buf; + struct tool_mw_wrap *outmw_wrap = filep->private_data; + struct tool_mw *outmw = outmw_wrap->mw; + resource_size_t map_size; + phys_addr_t map_base; + ssize_t off = 0; size_t buf_size; - ssize_t ret, off = 0; + char *buf; + int ret; - phys_addr_t base; - resource_size_t mw_size; - resource_size_t align_addr = 0; - resource_size_t align_size = 0; - resource_size_t max_size = 0; + ret = ntb_peer_mw_get_addr(outmw->tc->ntb, outmw->widx, + &map_base, &map_size); + if (ret) + return ret; buf_size = min_t(size_t, size, 512); @@ -762,43 +888,37 @@ static ssize_t tool_peer_mw_trans_read(struct file *filep, if (!buf) return -ENOMEM; - ntb_mw_get_align(mw->tc->ntb, PIDX, mw->idx, - &align_addr, &align_size, &max_size); - ntb_peer_mw_get_addr(mw->tc->ntb, mw->idx, &base, &mw_size); - off += scnprintf(buf + off, buf_size - off, - "Peer MW %d Information:\n", mw->idx); + "Outbound MW: \t%d\n", outmw->widx); - off += scnprintf(buf + off, buf_size - off, - "Physical Address \t%pa[p]\n", - &base); - - off += scnprintf(buf + off, buf_size - off, - "Window Size \t%lld\n", - (unsigned long long)mw_size); + if (outmw->io_base != NULL) { + off += scnprintf(buf + off, buf_size - off, + "Port attached \t%d (%d)\n", + ntb_peer_port_number(outmw->tc->ntb, outmw->pidx), + outmw->pidx); + } else { + off += scnprintf(buf + off, buf_size - off, + "Port attached \t-1 (-1)\n"); + } off += scnprintf(buf + off, buf_size - off, - "Alignment \t%lld\n", - (unsigned long long)align_addr); + "Virtual address \t0x%pK\n", outmw->io_base); off += scnprintf(buf + off, buf_size - off, - "Size Alignment \t%lld\n", - (unsigned long long)align_size); + "Phys Address \t%pa[p]\n", &map_base); off += scnprintf(buf + off, buf_size - off, - "Size Max \t%lld\n", - (unsigned long long)max_size); + "Mapping Size \t%pa[p]\n", &map_size); off += scnprintf(buf + off, buf_size - off, - "Ready \t%c\n", - (mw->peer) ? 'Y' : 'N'); + "Translation Address \t0x%016llx\n", outmw->tr_base); off += scnprintf(buf + off, buf_size - off, - "Allocated Size \t%zd\n", - (mw->peer) ? (size_t)mw->size : 0); + "Window Size \t%pa[p]\n", &outmw->size); ret = simple_read_from_buffer(ubuf, size, offp, buf, off); kfree(buf); + return ret; } @@ -806,12 +926,12 @@ static ssize_t tool_peer_mw_trans_write(struct file *filep, const char __user *ubuf, size_t size, loff_t *offp) { - struct tool_mw *mw = filep->private_data; - - char buf[32]; - size_t buf_size; - unsigned long long val; - int rc; + struct tool_mw_wrap *outmw_wrap = filep->private_data; + struct tool_mw *outmw = outmw_wrap->mw; + size_t buf_size, wsize; + char buf[TOOL_BUF_LEN]; + int ret, n; + u64 addr; buf_size = min(size, (sizeof(buf) - 1)); if (copy_from_user(buf, ubuf, buf_size)) @@ -819,16 +939,17 @@ static ssize_t tool_peer_mw_trans_write(struct file *filep, buf[buf_size] = '\0'; - rc = kstrtoull(buf, 0, &val); - if (rc) - return rc; - - tool_free_mw(mw->tc, mw->idx); - if (val) - rc = tool_setup_mw(mw->tc, mw->idx, val); + n = sscanf(buf, "%lli:%zi", &addr, &wsize); + if (n != 2) + return -EINVAL; - if (rc) - return rc; + tool_free_peer_mw(outmw->tc, outmw->widx); + if (wsize) { + ret = tool_setup_peer_mw(outmw->tc, outmw_wrap->pidx, + outmw->widx, addr, wsize); + if (ret) + return ret; + } return size; } @@ -837,195 +958,734 @@ static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops, tool_peer_mw_trans_read, tool_peer_mw_trans_write); -static int tool_init_mw(struct tool_ctx *tc, int idx) +static int tool_init_mws(struct tool_ctx *tc) { - struct tool_mw *mw = &tc->mws[idx]; - phys_addr_t base; - int rc; - - rc = ntb_peer_mw_get_addr(tc->ntb, idx, &base, &mw->win_size); - if (rc) - return rc; - - mw->tc = tc; - mw->idx = idx; - mw->local = ioremap_wc(base, mw->win_size); - if (!mw->local) - return -EFAULT; + int widx, pidx; + + /* Initialize outbound memory windows */ + tc->outmw_cnt = ntb_peer_mw_count(tc->ntb); + tc->outmws = devm_kcalloc(&tc->ntb->dev, tc->outmw_cnt, + sizeof(*tc->outmws), GFP_KERNEL); + if (tc->outmws == NULL) + return -ENOMEM; + + for (widx = 0; widx < tc->outmw_cnt; widx++) { + tc->outmws[widx].widx = widx; + tc->outmws[widx].pidx = -1; + tc->outmws[widx].tc = tc; + } + + /* Initialize inbound memory windows and outbound MWs wrapper */ + for (pidx = 0; pidx < tc->peer_cnt; pidx++) { + tc->peers[pidx].inmw_cnt = ntb_mw_count(tc->ntb, pidx); + tc->peers[pidx].inmws = + devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].inmw_cnt, + sizeof(*tc->peers[pidx].inmws), GFP_KERNEL); + if (tc->peers[pidx].inmws == NULL) + return -ENOMEM; + + for (widx = 0; widx < tc->peers[pidx].inmw_cnt; widx++) { + tc->peers[pidx].inmws[widx].widx = widx; + tc->peers[pidx].inmws[widx].pidx = pidx; + tc->peers[pidx].inmws[widx].tc = tc; + } + + tc->peers[pidx].outmw_cnt = ntb_peer_mw_count(tc->ntb); + tc->peers[pidx].outmws = + devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt, + sizeof(*tc->peers[pidx].outmws), GFP_KERNEL); + + for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) { + tc->peers[pidx].outmws[widx].pidx = pidx; + tc->peers[pidx].outmws[widx].mw = &tc->outmws[widx]; + } + } return 0; } -static void tool_free_mws(struct tool_ctx *tc) +static void tool_clear_mws(struct tool_ctx *tc) { - int i; + int widx, pidx; - for (i = 0; i < tc->mw_count; i++) { - tool_free_mw(tc, i); + /* Free outbound memory windows */ + for (widx = 0; widx < tc->outmw_cnt; widx++) + tool_free_peer_mw(tc, widx); - if (tc->mws[i].local) - iounmap(tc->mws[i].local); + /* Free outbound memory windows */ + for (pidx = 0; pidx < tc->peer_cnt; pidx++) + for (widx = 0; widx < tc->peers[pidx].inmw_cnt; widx++) + tool_free_mw(tc, pidx, widx); +} - tc->mws[i].local = NULL; - } +/*============================================================================== + * Doorbell read/write methods + *============================================================================== + */ + +static ssize_t tool_db_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->db_read); } -static void tool_setup_dbgfs(struct tool_ctx *tc) +static ssize_t tool_db_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) { - int i; + struct tool_ctx *tc = filep->private_data; - /* This modules is useless without dbgfs... */ - if (!tool_dbgfs) { - tc->dbgfs = NULL; - return; + return tool_fn_write(tc, ubuf, size, offp, tc->ntb->ops->db_set, + tc->ntb->ops->db_clear); +} + +static TOOL_FOPS_RDWR(tool_db_fops, + tool_db_read, + tool_db_write); + +static ssize_t tool_db_valid_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->db_valid_mask); +} + +static TOOL_FOPS_RDWR(tool_db_valid_mask_fops, + tool_db_valid_mask_read, + NULL); + +static ssize_t tool_db_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->db_read_mask); +} + +static ssize_t tool_db_mask_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_write(tc, ubuf, size, offp, tc->ntb->ops->db_set_mask, + tc->ntb->ops->db_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_db_mask_fops, + tool_db_mask_read, + tool_db_mask_write); + +static ssize_t tool_peer_db_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->peer_db_read); +} + +static ssize_t tool_peer_db_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_write(tc, ubuf, size, offp, tc->ntb->ops->peer_db_set, + tc->ntb->ops->peer_db_clear); +} + +static TOOL_FOPS_RDWR(tool_peer_db_fops, + tool_peer_db_read, + tool_peer_db_write); + +static ssize_t tool_peer_db_mask_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_read_mask); +} + +static ssize_t tool_peer_db_mask_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_write(tc, ubuf, size, offp, + tc->ntb->ops->peer_db_set_mask, + tc->ntb->ops->peer_db_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_peer_db_mask_fops, + tool_peer_db_mask_read, + tool_peer_db_mask_write); + +static ssize_t tool_db_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + u64 val; + int ret; + + ret = kstrtou64_from_user(ubuf, size, 0, &val); + if (ret) + return ret; + + if (wait_event_interruptible(tc->db_wq, ntb_db_read(tc->ntb) == val)) + return -ERESTART; + + return size; +} + +static TOOL_FOPS_RDWR(tool_db_event_fops, + NULL, + tool_db_event_write); + +/*============================================================================== + * Scratchpads read/write methods + *============================================================================== + */ + +static ssize_t tool_spad_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_spad *spad = filep->private_data; + char buf[TOOL_BUF_LEN]; + ssize_t pos; + + if (!spad->tc->ntb->ops->spad_read) + return -EINVAL; + + pos = scnprintf(buf, sizeof(buf), "%#x\n", + ntb_spad_read(spad->tc->ntb, spad->sidx)); + + return simple_read_from_buffer(ubuf, size, offp, buf, pos); +} + +static ssize_t tool_spad_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_spad *spad = filep->private_data; + u32 val; + int ret; + + if (!spad->tc->ntb->ops->spad_write) { + dev_dbg(&spad->tc->ntb->dev, "no spad write fn\n"); + return -EINVAL; } - tc->dbgfs = debugfs_create_dir(dev_name(&tc->ntb->dev), - tool_dbgfs); - if (!tc->dbgfs) - return; + ret = kstrtou32_from_user(ubuf, size, 0, &val); + if (ret) + return ret; - debugfs_create_file("db", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_db_fops); + ret = ntb_spad_write(spad->tc->ntb, spad->sidx, val); - debugfs_create_file("mask", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_mask_fops); + return ret ?: size; +} - debugfs_create_file("peer_db", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_peer_db_fops); +static TOOL_FOPS_RDWR(tool_spad_fops, + tool_spad_read, + tool_spad_write); + +static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_spad *spad = filep->private_data; + char buf[TOOL_BUF_LEN]; + ssize_t pos; - debugfs_create_file("peer_mask", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_peer_mask_fops); + if (!spad->tc->ntb->ops->peer_spad_read) + return -EINVAL; - debugfs_create_file("spad", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_spad_fops); + pos = scnprintf(buf, sizeof(buf), "%#x\n", + ntb_peer_spad_read(spad->tc->ntb, spad->pidx, spad->sidx)); - debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_peer_spad_fops); + return simple_read_from_buffer(ubuf, size, offp, buf, pos); +} - debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs, - tc, &tool_link_fops); +static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_spad *spad = filep->private_data; + u32 val; + int ret; + + if (!spad->tc->ntb->ops->peer_spad_write) { + dev_dbg(&spad->tc->ntb->dev, "no spad write fn\n"); + return -EINVAL; + } + + ret = kstrtou32_from_user(ubuf, size, 0, &val); + if (ret) + return ret; - debugfs_create_file("link_event", S_IWUSR, tc->dbgfs, - tc, &tool_link_event_fops); + ret = ntb_peer_spad_write(spad->tc->ntb, spad->pidx, spad->sidx, val); - for (i = 0; i < tc->mw_count; i++) { - char buf[30]; + return ret ?: size; +} + +static TOOL_FOPS_RDWR(tool_peer_spad_fops, + tool_peer_spad_read, + tool_peer_spad_write); - snprintf(buf, sizeof(buf), "mw%d", i); - debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, - &tc->mws[i], &tool_mw_fops); +static int tool_init_spads(struct tool_ctx *tc) +{ + int sidx, pidx; - snprintf(buf, sizeof(buf), "peer_trans%d", i); - debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, - &tc->mws[i], &tool_peer_mw_trans_fops); + /* Initialize inbound scratchpad structures */ + tc->inspad_cnt = ntb_spad_count(tc->ntb); + tc->inspads = devm_kcalloc(&tc->ntb->dev, tc->inspad_cnt, + sizeof(*tc->inspads), GFP_KERNEL); + if (tc->inspads == NULL) + return -ENOMEM; + + for (sidx = 0; sidx < tc->inspad_cnt; sidx++) { + tc->inspads[sidx].sidx = sidx; + tc->inspads[sidx].pidx = -1; + tc->inspads[sidx].tc = tc; } + + /* Initialize outbound scratchpad structures */ + for (pidx = 0; pidx < tc->peer_cnt; pidx++) { + tc->peers[pidx].outspad_cnt = ntb_spad_count(tc->ntb); + tc->peers[pidx].outspads = + devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outspad_cnt, + sizeof(*tc->peers[pidx].outspads), GFP_KERNEL); + if (tc->peers[pidx].outspads == NULL) + return -ENOMEM; + + for (sidx = 0; sidx < tc->peers[pidx].outspad_cnt; sidx++) { + tc->peers[pidx].outspads[sidx].sidx = sidx; + tc->peers[pidx].outspads[sidx].pidx = pidx; + tc->peers[pidx].outspads[sidx].tc = tc; + } + } + + return 0; } -static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) +/*============================================================================== + * Messages read/write methods + *============================================================================== + */ + +static ssize_t tool_inmsg_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct tool_ctx *tc; - int rc; - int i; + struct tool_msg *msg = filep->private_data; + char buf[TOOL_BUF_LEN]; + ssize_t pos; + u32 data; + int pidx; + + data = ntb_msg_read(msg->tc->ntb, &pidx, msg->midx); + + pos = scnprintf(buf, sizeof(buf), "0x%08x<-%d\n", data, pidx); + + return simple_read_from_buffer(ubuf, size, offp, buf, pos); +} + +static TOOL_FOPS_RDWR(tool_inmsg_fops, + tool_inmsg_read, + NULL); + +static ssize_t tool_outmsg_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_msg *msg = filep->private_data; + u32 val; + int ret; + + ret = kstrtou32_from_user(ubuf, size, 0, &val); + if (ret) + return ret; + + ret = ntb_peer_msg_write(msg->tc->ntb, msg->pidx, msg->midx, val); + + return ret ? : size; +} + +static TOOL_FOPS_RDWR(tool_outmsg_fops, + NULL, + tool_outmsg_write); + +static ssize_t tool_msg_sts_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->msg_read_sts); +} + +static ssize_t tool_msg_sts_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_write(tc, ubuf, size, offp, NULL, + tc->ntb->ops->msg_clear_sts); +} + +static TOOL_FOPS_RDWR(tool_msg_sts_fops, + tool_msg_sts_read, + tool_msg_sts_write); + +static ssize_t tool_msg_inbits_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->msg_inbits); +} + +static TOOL_FOPS_RDWR(tool_msg_inbits_fops, + tool_msg_inbits_read, + NULL); + +static ssize_t tool_msg_outbits_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_read(tc, ubuf, size, offp, tc->ntb->ops->msg_outbits); +} + +static TOOL_FOPS_RDWR(tool_msg_outbits_fops, + tool_msg_outbits_read, + NULL); + +static ssize_t tool_msg_mask_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + + return tool_fn_write(tc, ubuf, size, offp, + tc->ntb->ops->msg_set_mask, + tc->ntb->ops->msg_clear_mask); +} + +static TOOL_FOPS_RDWR(tool_msg_mask_fops, + NULL, + tool_msg_mask_write); + +static ssize_t tool_msg_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + u64 val; + int ret; + + ret = kstrtou64_from_user(ubuf, size, 0, &val); + if (ret) + return ret; + + if (wait_event_interruptible(tc->msg_wq, + ntb_msg_read_sts(tc->ntb) == val)) + return -ERESTART; - if (!ntb->ops->mw_set_trans) { - dev_dbg(&ntb->dev, "need inbound MW based NTB API\n"); - rc = -EINVAL; - goto err_tc; + return size; +} + +static TOOL_FOPS_RDWR(tool_msg_event_fops, + NULL, + tool_msg_event_write); + +static int tool_init_msgs(struct tool_ctx *tc) +{ + int midx, pidx; + + /* Initialize inbound message structures */ + tc->inmsg_cnt = ntb_msg_count(tc->ntb); + tc->inmsgs = devm_kcalloc(&tc->ntb->dev, tc->inmsg_cnt, + sizeof(*tc->inmsgs), GFP_KERNEL); + if (tc->inmsgs == NULL) + return -ENOMEM; + + for (midx = 0; midx < tc->inmsg_cnt; midx++) { + tc->inmsgs[midx].midx = midx; + tc->inmsgs[midx].pidx = -1; + tc->inmsgs[midx].tc = tc; } - if (ntb_spad_count(ntb) < 1) { - dev_dbg(&ntb->dev, "no enough scratchpads\n"); - rc = -EINVAL; - goto err_tc; + /* Initialize outbound message structures */ + for (pidx = 0; pidx < tc->peer_cnt; pidx++) { + tc->peers[pidx].outmsg_cnt = ntb_msg_count(tc->ntb); + tc->peers[pidx].outmsgs = + devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmsg_cnt, + sizeof(*tc->peers[pidx].outmsgs), GFP_KERNEL); + if (tc->peers[pidx].outmsgs == NULL) + return -ENOMEM; + + for (midx = 0; midx < tc->peers[pidx].outmsg_cnt; midx++) { + tc->peers[pidx].outmsgs[midx].midx = midx; + tc->peers[pidx].outmsgs[midx].pidx = pidx; + tc->peers[pidx].outmsgs[midx].tc = tc; + } } + return 0; +} + +/*============================================================================== + * Initialization methods + *============================================================================== + */ + +static struct tool_ctx *tool_create_data(struct ntb_dev *ntb) +{ + struct tool_ctx *tc; + + tc = devm_kzalloc(&ntb->dev, sizeof(*tc), GFP_KERNEL); + if (tc == NULL) + return ERR_PTR(-ENOMEM); + + tc->ntb = ntb; + init_waitqueue_head(&tc->link_wq); + init_waitqueue_head(&tc->db_wq); + init_waitqueue_head(&tc->msg_wq); + if (ntb_db_is_unsafe(ntb)) dev_dbg(&ntb->dev, "doorbell is unsafe\n"); if (ntb_spad_is_unsafe(ntb)) dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); - if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT) - dev_warn(&ntb->dev, "multi-port NTB is unsupported\n"); + return tc; +} + +static void tool_clear_data(struct tool_ctx *tc) +{ + wake_up(&tc->link_wq); + wake_up(&tc->db_wq); + wake_up(&tc->msg_wq); +} + +static int tool_init_ntb(struct tool_ctx *tc) +{ + return ntb_set_ctx(tc->ntb, tc, &tool_ops); +} + +static void tool_clear_ntb(struct tool_ctx *tc) +{ + ntb_clear_ctx(tc->ntb); + ntb_link_disable(tc->ntb); +} - tc = kzalloc(sizeof(*tc), GFP_KERNEL); - if (!tc) { - rc = -ENOMEM; - goto err_tc; +static void tool_setup_dbgfs(struct tool_ctx *tc) +{ + int pidx, widx, sidx, midx; + char buf[TOOL_BUF_LEN]; + + /* This modules is useless without dbgfs... */ + if (!tool_dbgfs_topdir) { + tc->dbgfs_dir = NULL; + return; } - tc->ntb = ntb; - init_waitqueue_head(&tc->link_wq); + tc->dbgfs_dir = debugfs_create_dir(dev_name(&tc->ntb->dev), + tool_dbgfs_topdir); + if (!tc->dbgfs_dir) + return; + + debugfs_create_file("port", 0600, tc->dbgfs_dir, + tc, &tool_port_fops); + + debugfs_create_file("link", 0600, tc->dbgfs_dir, + tc, &tool_link_fops); + + debugfs_create_file("db", 0600, tc->dbgfs_dir, + tc, &tool_db_fops); + + debugfs_create_file("db_valid_mask", 0600, tc->dbgfs_dir, + tc, &tool_db_valid_mask_fops); + + debugfs_create_file("db_mask", 0600, tc->dbgfs_dir, + tc, &tool_db_mask_fops); + + debugfs_create_file("db_event", 0600, tc->dbgfs_dir, + tc, &tool_db_event_fops); - tc->mw_count = min(ntb_peer_mw_count(tc->ntb), MAX_MWS); - for (i = 0; i < tc->mw_count; i++) { - rc = tool_init_mw(tc, i); - if (rc) - goto err_ctx; + debugfs_create_file("peer_db", 0600, tc->dbgfs_dir, + tc, &tool_peer_db_fops); + + debugfs_create_file("peer_db_mask", 0600, tc->dbgfs_dir, + tc, &tool_peer_db_mask_fops); + + if (tc->inspad_cnt != 0) { + for (sidx = 0; sidx < tc->inspad_cnt; sidx++) { + snprintf(buf, sizeof(buf), "spad%d", sidx); + + debugfs_create_file(buf, 0600, tc->dbgfs_dir, + &tc->inspads[sidx], &tool_spad_fops); + } } - tool_setup_dbgfs(tc); + if (tc->inmsg_cnt != 0) { + for (midx = 0; midx < tc->inmsg_cnt; midx++) { + snprintf(buf, sizeof(buf), "msg%d", midx); + debugfs_create_file(buf, 0600, tc->dbgfs_dir, + &tc->inmsgs[midx], &tool_inmsg_fops); + } + + debugfs_create_file("msg_sts", 0600, tc->dbgfs_dir, + tc, &tool_msg_sts_fops); + + debugfs_create_file("msg_inbits", 0600, tc->dbgfs_dir, + tc, &tool_msg_inbits_fops); + + debugfs_create_file("msg_outbits", 0600, tc->dbgfs_dir, + tc, &tool_msg_outbits_fops); + + debugfs_create_file("msg_mask", 0600, tc->dbgfs_dir, + tc, &tool_msg_mask_fops); + + debugfs_create_file("msg_event", 0600, tc->dbgfs_dir, + tc, &tool_msg_event_fops); + } + + for (pidx = 0; pidx < tc->peer_cnt; pidx++) { + snprintf(buf, sizeof(buf), "peer%d", pidx); + tc->peers[pidx].dbgfs_dir = + debugfs_create_dir(buf, tc->dbgfs_dir); + + debugfs_create_file("port", 0600, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx], &tool_peer_port_fops); + + debugfs_create_file("link", 0200, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx], &tool_peer_link_fops); + + debugfs_create_file("link_event", 0200, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx], &tool_peer_link_event_fops); + + for (widx = 0; widx < tc->peers[pidx].inmw_cnt; widx++) { + snprintf(buf, sizeof(buf), "mw_trans%d", widx); + debugfs_create_file(buf, 0600, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx].inmws[widx], + &tool_mw_trans_fops); + } + + for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) { + snprintf(buf, sizeof(buf), "peer_mw_trans%d", widx); + debugfs_create_file(buf, 0600, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx].outmws[widx], + &tool_peer_mw_trans_fops); + } + + for (sidx = 0; sidx < tc->peers[pidx].outspad_cnt; sidx++) { + snprintf(buf, sizeof(buf), "spad%d", sidx); + + debugfs_create_file(buf, 0600, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx].outspads[sidx], + &tool_peer_spad_fops); + } + + for (midx = 0; midx < tc->peers[pidx].outmsg_cnt; midx++) { + snprintf(buf, sizeof(buf), "msg%d", midx); + debugfs_create_file(buf, 0600, + tc->peers[pidx].dbgfs_dir, + &tc->peers[pidx].outmsgs[midx], + &tool_outmsg_fops); + } + } +} + +static void tool_clear_dbgfs(struct tool_ctx *tc) +{ + debugfs_remove_recursive(tc->dbgfs_dir); +} + +static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) +{ + struct tool_ctx *tc; + int ret; - rc = ntb_set_ctx(ntb, tc, &tool_ops); - if (rc) - goto err_ctx; + tc = tool_create_data(ntb); + if (IS_ERR(tc)) + return PTR_ERR(tc); - ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); - ntb_link_event(ntb); + ret = tool_init_peers(tc); + if (ret != 0) + goto err_clear_data; + + ret = tool_init_mws(tc); + if (ret != 0) + goto err_clear_data; + + ret = tool_init_spads(tc); + if (ret != 0) + goto err_clear_mws; + + ret = tool_init_msgs(tc); + if (ret != 0) + goto err_clear_mws; + + ret = tool_init_ntb(tc); + if (ret != 0) + goto err_clear_mws; + + tool_setup_dbgfs(tc); return 0; -err_ctx: - tool_free_mws(tc); - debugfs_remove_recursive(tc->dbgfs); - kfree(tc); -err_tc: - return rc; +err_clear_mws: + tool_clear_mws(tc); + +err_clear_data: + tool_clear_data(tc); + + return ret; } static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc = ntb->ctx; - tool_free_mws(tc); + tool_clear_dbgfs(tc); + + tool_clear_ntb(tc); - ntb_clear_ctx(ntb); - ntb_link_disable(ntb); + tool_clear_mws(tc); - debugfs_remove_recursive(tc->dbgfs); - kfree(tc); + tool_clear_data(tc); } static struct ntb_client tool_client = { .ops = { .probe = tool_probe, .remove = tool_remove, - }, + } }; static int __init tool_init(void) { - int rc; + int ret; if (debugfs_initialized()) - tool_dbgfs = debugfs_create_dir(KBUILD_MODNAME, NULL); - - rc = ntb_register_client(&tool_client); - if (rc) - goto err_client; + tool_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); - return 0; + ret = ntb_register_client(&tool_client); + if (ret) + debugfs_remove_recursive(tool_dbgfs_topdir); -err_client: - debugfs_remove_recursive(tool_dbgfs); - return rc; + return ret; } module_init(tool_init); static void __exit tool_exit(void) { ntb_unregister_client(&tool_client); - debugfs_remove_recursive(tool_dbgfs); + debugfs_remove_recursive(tool_dbgfs_topdir); } module_exit(tool_exit); + -- cgit From 5648e56d03fa0c153fccd089efd903f1f6af559f Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:31:57 +0300 Subject: NTB: ntb_perf: Add full multi-port NTB API support Former NTB Performance driver could only work with NTB devices, which got Scratchpads available and had just two ports. Since there are devices, which don't have Scratchpads and got more than two peer ports, the performance measuring tool needs to be rewritten. This patch adds the ability to test any available NTB peer. Additionally it allows to set NTB memory windows up using any available data exchange interface: Scratchpad or Message registers. Some cleanups are also added here. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 1820 +++++++++++++++++++++++++++++-------------- 1 file changed, 1219 insertions(+), 601 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 6f6c602d04af..8de72f3fba4d 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -5,6 +5,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2017 T-Platforms. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -13,6 +14,7 @@ * BSD LICENSE * * Copyright(c) 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2017 T-Platforms. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -40,859 +42,1475 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * PCIe NTB Perf Linux driver + * PCIe NTB Perf Linux driver + */ + +/* + * How to use this tool, by example. + * + * Assuming $DBG_DIR is something like: + * '/sys/kernel/debug/ntb_perf/0000:00:03.0' + * Suppose aside from local device there is at least one remote device + * connected to NTB with index 0. + *----------------------------------------------------------------------------- + * Eg: install driver with specified chunk/total orders and dma-enabled flag + * + * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma + *----------------------------------------------------------------------------- + * Eg: check NTB ports (index) and MW mapping information + * + * root@self# cat $DBG_DIR/info + *----------------------------------------------------------------------------- + * Eg: start performance test with peer (index 0) and get the test metrics + * + * root@self# echo 0 > $DBG_DIR/run + * root@self# cat $DBG_DIR/run */ #include #include #include -#include -#include -#include +#include +#include #include +#include #include +#include #include -#include -#include -#include #include #include +#include +#include +#include #include -#include #define DRIVER_NAME "ntb_perf" -#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" - -#define DRIVER_VERSION "1.0" -#define DRIVER_AUTHOR "Dave Jiang " - -#define PERF_LINK_DOWN_TIMEOUT 10 -#define PERF_VERSION 0xffff0001 -#define MAX_THREADS 32 -#define MAX_TEST_SIZE SZ_1M -#define MAX_SRCS 32 -#define DMA_OUT_RESOURCE_TO msecs_to_jiffies(50) -#define DMA_RETRIES 20 -#define SZ_4G (1ULL << 32) -#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */ -#define PIDX NTB_DEF_PEER_IDX +#define DRIVER_VERSION "2.0" MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +MODULE_AUTHOR("Dave Jiang "); +MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); + +#define MAX_THREADS_CNT 32 +#define DEF_THREADS_CNT 1 +#define MAX_CHUNK_SIZE SZ_1M +#define MAX_CHUNK_ORDER 20 /* no larger than 1M */ + +#define DMA_TRIES 100 +#define DMA_MDELAY 10 -static struct dentry *perf_debugfs_dir; +#define MSG_TRIES 500 +#define MSG_UDELAY_LOW 1000 +#define MSG_UDELAY_HIGH 2000 + +#define PERF_BUF_LEN 1024 static unsigned long max_mw_size; module_param(max_mw_size, ulong, 0644); -MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); +MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size"); -static unsigned int seg_order = 19; /* 512K */ -module_param(seg_order, uint, 0644); -MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing"); +static unsigned char chunk_order = 19; /* 512K */ +module_param(chunk_order, byte, 0644); +MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer"); -static unsigned int run_order = 32; /* 4G */ -module_param(run_order, uint, 0644); -MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer"); +static unsigned char total_order = 30; /* 1G */ +module_param(total_order, byte, 0644); +MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer"); static bool use_dma; /* default to 0 */ module_param(use_dma, bool, 0644); -MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance"); - -static bool on_node = true; /* default to 1 */ -module_param(on_node, bool, 0644); -MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)"); - -struct perf_mw { - phys_addr_t phys_addr; - resource_size_t phys_size; - void __iomem *vbase; - size_t xlat_size; - size_t buf_size; - void *virt_addr; - dma_addr_t dma_addr; +MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance"); + +/*============================================================================== + * Perf driver data definition + *============================================================================== + */ + +enum perf_cmd { + PERF_CMD_INVAL = -1,/* invalid spad command */ + PERF_CMD_SSIZE = 0, /* send out buffer size */ + PERF_CMD_RSIZE = 1, /* recv in buffer size */ + PERF_CMD_SXLAT = 2, /* send in buffer xlat */ + PERF_CMD_RXLAT = 3, /* recv out buffer xlat */ + PERF_CMD_CLEAR = 4, /* clear allocated memory */ + PERF_STS_DONE = 5, /* init is done */ + PERF_STS_LNKUP = 6, /* link up state flag */ }; struct perf_ctx; -struct pthr_ctx { - struct task_struct *thread; - struct perf_ctx *perf; - atomic_t dma_sync; - struct dma_chan *dma_chan; - int dma_prep_err; - int src_idx; - void *srcs[MAX_SRCS]; - wait_queue_head_t *wq; - int status; - u64 copied; - u64 diff_us; +struct perf_peer { + struct perf_ctx *perf; + int pidx; + int gidx; + + /* Outbound MW params */ + u64 outbuf_xlat; + resource_size_t outbuf_size; + void __iomem *outbuf; + + /* Inbound MW params */ + dma_addr_t inbuf_xlat; + resource_size_t inbuf_size; + void *inbuf; + + /* NTB connection setup service */ + struct work_struct service; + unsigned long sts; }; +#define to_peer_service(__work) \ + container_of(__work, struct perf_peer, service) -struct perf_ctx { - struct ntb_dev *ntb; - spinlock_t db_lock; - struct perf_mw mw; - bool link_is_up; - struct delayed_work link_work; - wait_queue_head_t link_wq; - u8 perf_threads; - /* mutex ensures only one set of threads run at once */ - struct mutex run_mutex; - struct pthr_ctx pthr_ctx[MAX_THREADS]; - atomic_t tsync; - atomic_t tdone; +struct perf_thread { + struct perf_ctx *perf; + int tidx; + + /* DMA-based test sync parameters */ + atomic_t dma_sync; + wait_queue_head_t dma_wait; + struct dma_chan *dma_chan; + + /* Data source and measured statistics */ + void *src; + u64 copied; + ktime_t duration; + int status; + struct work_struct work; }; +#define to_thread_work(__work) \ + container_of(__work, struct perf_thread, work) -enum { - VERSION = 0, - MW_SZ_HIGH, - MW_SZ_LOW, - MAX_SPAD +struct perf_ctx { + struct ntb_dev *ntb; + + /* Global device index and peers descriptors */ + int gidx; + int pcnt; + struct perf_peer *peers; + + /* Performance measuring work-threads interface */ + unsigned long busy_flag; + wait_queue_head_t twait; + atomic_t tsync; + u8 tcnt; + struct perf_peer *test_peer; + struct perf_thread threads[MAX_THREADS_CNT]; + + /* Scratchpad/Message IO operations */ + int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data); + int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd, + u64 *data); + + struct dentry *dbgfs_dir; }; +/* + * Scratchpads-base commands interface + */ +#define PERF_SPAD_CNT(_pcnt) \ + (3*((_pcnt) + 1)) +#define PERF_SPAD_CMD(_gidx) \ + (3*(_gidx)) +#define PERF_SPAD_LDATA(_gidx) \ + (3*(_gidx) + 1) +#define PERF_SPAD_HDATA(_gidx) \ + (3*(_gidx) + 2) +#define PERF_SPAD_NOTIFY(_gidx) \ + (BIT_ULL(_gidx)) + +/* + * Messages-base commands interface + */ +#define PERF_MSG_CNT 3 +#define PERF_MSG_CMD 0 +#define PERF_MSG_LDATA 1 +#define PERF_MSG_HDATA 2 + +/*============================================================================== + * Static data declarations + *============================================================================== + */ + +static struct dentry *perf_dbgfs_topdir; + +static struct workqueue_struct *perf_wq __read_mostly; + +/*============================================================================== + * NTB cross-link commands execution service + *============================================================================== + */ + +static void perf_terminate_test(struct perf_ctx *perf); + +static inline bool perf_link_is_up(struct perf_peer *peer) +{ + u64 link; + + link = ntb_link_is_up(peer->perf->ntb, NULL, NULL); + return !!(link & BIT_ULL_MASK(peer->pidx)); +} + +static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, + u64 data) +{ + struct perf_ctx *perf = peer->perf; + int try; + u32 sts; + + dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); + + /* + * Perform predefined number of attempts before give up. + * We are sending the data to the port specific scratchpad, so + * to prevent a multi-port access race-condition. Additionally + * there is no need in local locking since only thread-safe + * service work is using this method. + */ + for (try = 0; try < MSG_TRIES; try++) { + if (!perf_link_is_up(peer)) + return -ENOLINK; + + sts = ntb_peer_spad_read(perf->ntb, peer->pidx, + PERF_SPAD_CMD(perf->gidx)); + if (le32_to_cpu(sts) != PERF_CMD_INVAL) { + usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); + continue; + } + + ntb_peer_spad_write(perf->ntb, peer->pidx, + PERF_SPAD_LDATA(perf->gidx), + cpu_to_le32(lower_32_bits(data))); + ntb_peer_spad_write(perf->ntb, peer->pidx, + PERF_SPAD_HDATA(perf->gidx), + cpu_to_le32(upper_32_bits(data))); + mmiowb(); + ntb_peer_spad_write(perf->ntb, peer->pidx, + PERF_SPAD_CMD(perf->gidx), + cpu_to_le32(cmd)); + mmiowb(); + ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); + + dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n", + PERF_SPAD_NOTIFY(peer->gidx)); + + break; + } + + return try < MSG_TRIES ? 0 : -EAGAIN; +} + +static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, + enum perf_cmd *cmd, u64 *data) +{ + struct perf_peer *peer; + u32 val; + + ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); + + /* + * We start scanning all over, since cleared DB may have been set + * by any peer. Yes, it makes peer with smaller index being + * serviced with greater priority, but it's convenient for spad + * and message code unification and simplicity. + */ + for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) { + peer = &perf->peers[*pidx]; + + if (!perf_link_is_up(peer)) + continue; + + val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); + val = le32_to_cpu(val); + if (val == PERF_CMD_INVAL) + continue; + + *cmd = val; + + val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); + *data = le32_to_cpu(val); + + val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); + *data |= (u64)le32_to_cpu(val) << 32; + + /* Next command can be retrieved from now */ + ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), + cpu_to_le32(PERF_CMD_INVAL)); + + dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); + + return 0; + } + + return -ENODATA; +} + +static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, + u64 data) +{ + struct perf_ctx *perf = peer->perf; + int try, ret; + u64 outbits; + + dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data); + + /* + * Perform predefined number of attempts before give up. Message + * registers are free of race-condition problem when accessed + * from different ports, so we don't need splitting registers + * by global device index. We also won't have local locking, + * since the method is used from service work only. + */ + outbits = ntb_msg_outbits(perf->ntb); + for (try = 0; try < MSG_TRIES; try++) { + if (!perf_link_is_up(peer)) + return -ENOLINK; + + ret = ntb_msg_clear_sts(perf->ntb, outbits); + if (ret) + return ret; + + ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, + cpu_to_le32(lower_32_bits(data))); + + if (ntb_msg_read_sts(perf->ntb) & outbits) { + usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); + continue; + } + + ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, + cpu_to_le32(upper_32_bits(data))); + mmiowb(); + + /* This call shall trigger peer message event */ + ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, + cpu_to_le32(cmd)); + + break; + } + + return try < MSG_TRIES ? 0 : -EAGAIN; +} + +static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, + enum perf_cmd *cmd, u64 *data) +{ + u64 inbits; + u32 val; + + inbits = ntb_msg_inbits(perf->ntb); + + if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3) + return -ENODATA; + + val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); + *cmd = le32_to_cpu(val); + + val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); + *data = le32_to_cpu(val); + + val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); + *data |= (u64)le32_to_cpu(val) << 32; + + /* Next command can be retrieved from now */ + ntb_msg_clear_sts(perf->ntb, inbits); + + dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); + + return 0; +} + +static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data) +{ + struct perf_ctx *perf = peer->perf; + + if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT) + return perf->cmd_send(peer, cmd, data); + + dev_err(&perf->ntb->dev, "Send invalid command\n"); + return -EINVAL; +} + +static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd) +{ + switch (cmd) { + case PERF_CMD_SSIZE: + case PERF_CMD_RSIZE: + case PERF_CMD_SXLAT: + case PERF_CMD_RXLAT: + case PERF_CMD_CLEAR: + break; + default: + dev_err(&peer->perf->ntb->dev, "Exec invalid command\n"); + return -EINVAL; + } + + /* No need of memory barrier, since bit ops have invernal lock */ + set_bit(cmd, &peer->sts); + + dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd); + + (void)queue_work(system_highpri_wq, &peer->service); + + return 0; +} + +static int perf_cmd_recv(struct perf_ctx *perf) +{ + struct perf_peer *peer; + int ret, pidx, cmd; + u64 data; + + while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) { + peer = &perf->peers[pidx]; + + switch (cmd) { + case PERF_CMD_SSIZE: + peer->inbuf_size = data; + return perf_cmd_exec(peer, PERF_CMD_RSIZE); + case PERF_CMD_SXLAT: + peer->outbuf_xlat = data; + return perf_cmd_exec(peer, PERF_CMD_RXLAT); + default: + dev_err(&perf->ntb->dev, "Recv invalid command\n"); + return -EINVAL; + } + } + + /* Return 0 if no data left to process, otherwise an error */ + return ret == -ENODATA ? 0 : ret; +} + static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; + struct perf_peer *peer; + bool lnk_up; + int pidx; - if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { - schedule_delayed_work(&perf->link_work, 2*HZ); - } else { - dev_dbg(&perf->ntb->pdev->dev, "link down\n"); + for (pidx = 0; pidx < perf->pcnt; pidx++) { + peer = &perf->peers[pidx]; - if (!perf->link_is_up) - cancel_delayed_work_sync(&perf->link_work); + lnk_up = perf_link_is_up(peer); - perf->link_is_up = false; + if (lnk_up && + !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) { + perf_cmd_exec(peer, PERF_CMD_SSIZE); + } else if (!lnk_up && + test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) { + perf_cmd_exec(peer, PERF_CMD_CLEAR); + } } } static void perf_db_event(void *ctx, int vec) { struct perf_ctx *perf = ctx; - u64 db_bits, db_mask; - db_mask = ntb_db_vector_mask(perf->ntb, vec); - db_bits = ntb_db_read(perf->ntb); + dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec, + ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb)); + + /* Just receive all available commands */ + (void)perf_cmd_recv(perf); +} + +static void perf_msg_event(void *ctx) +{ + struct perf_ctx *perf = ctx; + + dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n", + ntb_msg_read_sts(perf->ntb)); - dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n", - vec, db_mask, db_bits); + /* Messages are only sent one-by-one */ + (void)perf_cmd_recv(perf); } static const struct ntb_ctx_ops perf_ops = { .link_event = perf_link_event, .db_event = perf_db_event, + .msg_event = perf_msg_event }; -static void perf_copy_callback(void *data) +static void perf_free_outbuf(struct perf_peer *peer) +{ + (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); +} + +static int perf_setup_outbuf(struct perf_peer *peer) { - struct pthr_ctx *pctx = data; + struct perf_ctx *perf = peer->perf; + int ret; + + /* Outbuf size can be unaligned due to custom max_mw_size */ + ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, + peer->outbuf_xlat, peer->outbuf_size); + if (ret) { + dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n"); + return ret; + } + + /* Initialization is finally done */ + set_bit(PERF_STS_DONE, &peer->sts); - atomic_dec(&pctx->dma_sync); + return 0; } -static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst, - char *src, size_t size) +static void perf_free_inbuf(struct perf_peer *peer) { - struct perf_ctx *perf = pctx->perf; - struct dma_async_tx_descriptor *txd; - struct dma_chan *chan = pctx->dma_chan; - struct dma_device *device; - struct dmaengine_unmap_data *unmap; - dma_cookie_t cookie; - size_t src_off, dst_off; - struct perf_mw *mw = &perf->mw; - void __iomem *vbase; - void __iomem *dst_vaddr; - dma_addr_t dst_phys; - int retries = 0; + if (!peer->inbuf) + return; - if (!use_dma) { - memcpy_toio(dst, src, size); - return size; + (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); + dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size, + peer->inbuf, peer->inbuf_xlat); + peer->inbuf = NULL; +} + +static int perf_setup_inbuf(struct perf_peer *peer) +{ + resource_size_t xlat_align, size_align, size_max; + struct perf_ctx *perf = peer->perf; + int ret; + + /* Get inbound MW parameters */ + ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx, + &xlat_align, &size_align, &size_max); + if (ret) { + dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n"); + return ret; } - if (!chan) { - dev_err(&perf->ntb->dev, "DMA engine does not exist\n"); + if (peer->inbuf_size > size_max) { + dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n", + &peer->inbuf_size, &size_max); return -EINVAL; } - device = chan->device; - src_off = (uintptr_t)src & ~PAGE_MASK; - dst_off = (uintptr_t __force)dst & ~PAGE_MASK; + peer->inbuf_size = round_up(peer->inbuf_size, size_align); - if (!is_dma_copy_aligned(device, src_off, dst_off, size)) - return -ENODEV; - - vbase = mw->vbase; - dst_vaddr = dst; - dst_phys = mw->phys_addr + (dst_vaddr - vbase); + perf_free_inbuf(peer); - unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); - if (!unmap) + peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size, + &peer->inbuf_xlat, GFP_KERNEL); + if (!peer->inbuf) { + dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", + &peer->inbuf_size); return -ENOMEM; + } + if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) { + dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n"); + goto err_free_inbuf; + } - unmap->len = size; - unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), - src_off, size, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev, unmap->addr[0])) - goto err_get_unmap; + ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx, + peer->inbuf_xlat, peer->inbuf_size); + if (ret) { + dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n"); + goto err_free_inbuf; + } - unmap->to_cnt = 1; + /* + * We submit inbuf xlat transmission cmd for execution here to follow + * the code architecture, even though this method is called from service + * work itself so the command will be executed right after it returns. + */ + (void)perf_cmd_exec(peer, PERF_CMD_SXLAT); - do { - txd = device->device_prep_dma_memcpy(chan, dst_phys, - unmap->addr[0], - size, DMA_PREP_INTERRUPT); - if (!txd) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(DMA_OUT_RESOURCE_TO); - } - } while (!txd && (++retries < DMA_RETRIES)); + return 0; - if (!txd) { - pctx->dma_prep_err++; - goto err_get_unmap; - } +err_free_inbuf: + perf_free_inbuf(peer); - txd->callback = perf_copy_callback; - txd->callback_param = pctx; - dma_set_unmap(txd, unmap); + return ret; +} - cookie = dmaengine_submit(txd); - if (dma_submit_error(cookie)) - goto err_set_unmap; +static void perf_service_work(struct work_struct *work) +{ + struct perf_peer *peer = to_peer_service(work); - dmaengine_unmap_put(unmap); + if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts)) + perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size); - atomic_inc(&pctx->dma_sync); - dma_async_issue_pending(chan); + if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts)) + perf_setup_inbuf(peer); - return size; + if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts)) + perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat); -err_set_unmap: - dmaengine_unmap_put(unmap); -err_get_unmap: - dmaengine_unmap_put(unmap); - return 0; -} + if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts)) + perf_setup_outbuf(peer); -static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, - u64 buf_size, u64 win_size, u64 total) -{ - int chunks, total_chunks, i; - int copied_chunks = 0; - u64 copied = 0, result; - char __iomem *tmp = dst; - u64 perf, diff_us; - ktime_t kstart, kstop, kdiff; - unsigned long last_sleep = jiffies; - - chunks = div64_u64(win_size, buf_size); - total_chunks = div64_u64(total, buf_size); - kstart = ktime_get(); - - for (i = 0; i < total_chunks; i++) { - result = perf_copy(pctx, tmp, src, buf_size); - copied += result; - copied_chunks++; - if (copied_chunks == chunks) { - tmp = dst; - copied_chunks = 0; - } else - tmp += buf_size; - - /* Probably should schedule every 5s to prevent soft hang. */ - if (unlikely((jiffies - last_sleep) > 5 * HZ)) { - last_sleep = jiffies; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(1); + if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { + clear_bit(PERF_STS_DONE, &peer->sts); + if (test_bit(0, &peer->perf->busy_flag) && + peer == peer->perf->test_peer) { + dev_warn(&peer->perf->ntb->dev, + "Freeing while test on-fly\n"); + perf_terminate_test(peer->perf); } + perf_free_outbuf(peer); + perf_free_inbuf(peer); + } +} - if (unlikely(kthread_should_stop())) - break; +static int perf_init_service(struct perf_ctx *perf) +{ + u64 mask; + + if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) { + dev_err(&perf->ntb->dev, "Not enough memory windows\n"); + return -EINVAL; } - if (use_dma) { - pr_debug("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) { - if (kthread_should_stop()) - break; - msleep(20); - } + if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) { + perf->cmd_send = perf_msg_cmd_send; + perf->cmd_recv = perf_msg_cmd_recv; + + dev_dbg(&perf->ntb->dev, "Message service initialized\n"); + + return 0; } - kstop = ktime_get(); - kdiff = ktime_sub(kstop, kstart); - diff_us = ktime_to_us(kdiff); + dev_dbg(&perf->ntb->dev, "Message service unsupported\n"); - pr_debug("%s: copied %llu bytes\n", current->comm, copied); + mask = GENMASK_ULL(perf->pcnt, 0); + if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) && + (ntb_db_valid_mask(perf->ntb) & mask) == mask) { + perf->cmd_send = perf_spad_cmd_send; + perf->cmd_recv = perf_spad_cmd_recv; - pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); + dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n"); - perf = div64_u64(copied, diff_us); + return 0; + } - pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); + dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n"); - pctx->copied = copied; - pctx->diff_us = diff_us; + dev_err(&perf->ntb->dev, "Command services unsupported\n"); - return 0; + return -EINVAL; } -static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) +static int perf_enable_service(struct perf_ctx *perf) { - /* Is the channel required to be on the same node as the device? */ - if (!on_node) - return true; + u64 mask, incmd_bit; + int ret, sidx, scnt; - return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; -} + mask = ntb_db_valid_mask(perf->ntb); + (void)ntb_db_set_mask(perf->ntb, mask); -static int ntb_perf_thread(void *data) -{ - struct pthr_ctx *pctx = data; - struct perf_ctx *perf = pctx->perf; - struct pci_dev *pdev = perf->ntb->pdev; - struct perf_mw *mw = &perf->mw; - char __iomem *dst; - u64 win_size, buf_size, total; - void *src; - int rc, node, i; - struct dma_chan *dma_chan = NULL; + ret = ntb_set_ctx(perf->ntb, perf, &perf_ops); + if (ret) + return ret; - pr_debug("kthread %s starting...\n", current->comm); + if (perf->cmd_send == perf_msg_cmd_send) { + u64 inbits, outbits; - node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; + inbits = ntb_msg_inbits(perf->ntb); + outbits = ntb_msg_outbits(perf->ntb); + (void)ntb_msg_set_mask(perf->ntb, inbits | outbits); - if (use_dma && !pctx->dma_chan) { - dma_cap_mask_t dma_mask; + incmd_bit = BIT_ULL(__ffs64(inbits)); + ret = ntb_msg_clear_mask(perf->ntb, incmd_bit); - dma_cap_zero(dma_mask); - dma_cap_set(DMA_MEMCPY, dma_mask); - dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, - (void *)(unsigned long)node); - if (!dma_chan) { - pr_warn("%s: cannot acquire DMA channel, quitting\n", - current->comm); - return -ENODEV; - } - pctx->dma_chan = dma_chan; + dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit); + } else { + scnt = ntb_spad_count(perf->ntb); + for (sidx = 0; sidx < scnt; sidx++) + ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL); + incmd_bit = PERF_SPAD_NOTIFY(perf->gidx); + ret = ntb_db_clear_mask(perf->ntb, incmd_bit); + + dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit); + } + if (ret) { + ntb_clear_ctx(perf->ntb); + return ret; } - for (i = 0; i < MAX_SRCS; i++) { - pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); - if (!pctx->srcs[i]) { - rc = -ENOMEM; - goto err; - } + ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + /* Might be not necessary */ + ntb_link_event(perf->ntb); + + return 0; +} + +static void perf_disable_service(struct perf_ctx *perf) +{ + int pidx; + + ntb_link_disable(perf->ntb); + + if (perf->cmd_send == perf_msg_cmd_send) { + u64 inbits; + + inbits = ntb_msg_inbits(perf->ntb); + (void)ntb_msg_set_mask(perf->ntb, inbits); + } else { + (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); } - win_size = mw->phys_size; - buf_size = 1ULL << seg_order; - total = 1ULL << run_order; + ntb_clear_ctx(perf->ntb); - if (buf_size > MAX_TEST_SIZE) - buf_size = MAX_TEST_SIZE; + for (pidx = 0; pidx < perf->pcnt; pidx++) + perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR); - dst = (char __iomem *)mw->vbase; + for (pidx = 0; pidx < perf->pcnt; pidx++) + flush_work(&perf->peers[pidx].service); +} - atomic_inc(&perf->tsync); - while (atomic_read(&perf->tsync) != perf->perf_threads) - schedule(); +/*============================================================================== + * Performance measuring work-thread + *============================================================================== + */ - src = pctx->srcs[pctx->src_idx]; - pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1); +static void perf_dma_copy_callback(void *data) +{ + struct perf_thread *pthr = data; - rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); + atomic_dec(&pthr->dma_sync); + wake_up(&pthr->dma_wait); +} - atomic_dec(&perf->tsync); +static int perf_copy_chunk(struct perf_thread *pthr, + void __iomem *dst, void *src, size_t len) +{ + struct dma_async_tx_descriptor *tx; + struct dmaengine_unmap_data *unmap; + struct device *dma_dev; + int try = 0, ret = 0; - if (rc < 0) { - pr_err("%s: failed\n", current->comm); - rc = -ENXIO; - goto err; + if (!use_dma) { + memcpy_toio(dst, src, len); + goto ret_check_tsync; } - for (i = 0; i < MAX_SRCS; i++) { - kfree(pctx->srcs[i]); - pctx->srcs[i] = NULL; + dma_dev = pthr->dma_chan->device->dev; + + if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src), + offset_in_page(dst), len)) + return -EIO; + + unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT); + if (!unmap) + return -ENOMEM; + + unmap->len = len; + unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src), + offset_in_page(src), len, DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, unmap->addr[0])) { + ret = -EIO; + goto err_free_resource; } + unmap->to_cnt = 1; - atomic_inc(&perf->tdone); - wake_up(pctx->wq); - rc = 0; - goto done; + unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst), + offset_in_page(dst), len, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, unmap->addr[1])) { + ret = -EIO; + goto err_free_resource; + } + unmap->from_cnt = 1; -err: - for (i = 0; i < MAX_SRCS; i++) { - kfree(pctx->srcs[i]); - pctx->srcs[i] = NULL; + do { + tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1], + unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx) + msleep(DMA_MDELAY); + } while (!tx && (try++ < DMA_TRIES)); + + if (!tx) { + ret = -EIO; + goto err_free_resource; } - if (dma_chan) { - dma_release_channel(dma_chan); - pctx->dma_chan = NULL; + tx->callback = perf_dma_copy_callback; + tx->callback_param = pthr; + dma_set_unmap(tx, unmap); + + if (dma_submit_error(dmaengine_submit(tx))) { + dmaengine_unmap_put(unmap); + goto err_free_resource; } -done: - /* Wait until we are told to stop */ - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); + dmaengine_unmap_put(unmap); + + atomic_inc(&pthr->dma_sync); + dma_async_issue_pending(pthr->dma_chan); + +ret_check_tsync: + return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR; + +err_free_resource: + dmaengine_unmap_put(unmap); + + return ret; +} + +static bool perf_dma_filter(struct dma_chan *chan, void *data) +{ + struct perf_ctx *perf = data; + int node; + + node = dev_to_node(&perf->ntb->dev); + + return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev); +} + +static int perf_init_test(struct perf_thread *pthr) +{ + struct perf_ctx *perf = pthr->perf; + dma_cap_mask_t dma_mask; + + pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL, + dev_to_node(&perf->ntb->dev)); + if (!pthr->src) + return -ENOMEM; + + get_random_bytes(pthr->src, perf->test_peer->outbuf_size); + + if (!use_dma) + return 0; + + dma_cap_zero(dma_mask); + dma_cap_set(DMA_MEMCPY, dma_mask); + pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf); + if (!pthr->dma_chan) { + dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n", + pthr->tidx); + atomic_dec(&perf->tsync); + wake_up(&perf->twait); + kfree(pthr->src); + return -ENODEV; } - __set_current_state(TASK_RUNNING); - return rc; + atomic_set(&pthr->dma_sync, 0); + + return 0; } -static void perf_free_mw(struct perf_ctx *perf) +static int perf_run_test(struct perf_thread *pthr) { - struct perf_mw *mw = &perf->mw; - struct pci_dev *pdev = perf->ntb->pdev; + struct perf_peer *peer = pthr->perf->test_peer; + struct perf_ctx *perf = pthr->perf; + void __iomem *flt_dst, *bnd_dst; + u64 total_size, chunk_size; + void *flt_src; + int ret = 0; + + total_size = 1ULL << total_order; + chunk_size = 1ULL << chunk_order; + chunk_size = min_t(u64, peer->outbuf_size, chunk_size); + + flt_src = pthr->src; + bnd_dst = peer->outbuf + peer->outbuf_size; + flt_dst = peer->outbuf; + + pthr->duration = ktime_get(); + + /* Copied field is cleared on test launch stage */ + while (pthr->copied < total_size) { + ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size); + if (ret) { + dev_err(&perf->ntb->dev, "%d: Got error %d on test\n", + pthr->tidx, ret); + return ret; + } - if (!mw->virt_addr) - return; + pthr->copied += chunk_size; + + flt_dst += chunk_size; + flt_src += chunk_size; + if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) { + flt_dst = peer->outbuf; + flt_src = pthr->src; + } - ntb_mw_clear_trans(perf->ntb, PIDX, 0); - dma_free_coherent(&pdev->dev, mw->buf_size, - mw->virt_addr, mw->dma_addr); - mw->xlat_size = 0; - mw->buf_size = 0; - mw->virt_addr = NULL; + /* Give up CPU to give a chance for other threads to use it */ + schedule(); + } + + return 0; } -static int perf_set_mw(struct perf_ctx *perf, resource_size_t size) +static int perf_sync_test(struct perf_thread *pthr) { - struct perf_mw *mw = &perf->mw; - size_t xlat_size, buf_size; - resource_size_t xlat_align; - resource_size_t xlat_align_size; - int rc; + struct perf_ctx *perf = pthr->perf; - if (!size) - return -EINVAL; + if (!use_dma) + goto no_dma_ret; - rc = ntb_mw_get_align(perf->ntb, PIDX, 0, &xlat_align, - &xlat_align_size, NULL); - if (rc) - return rc; + wait_event(pthr->dma_wait, + (atomic_read(&pthr->dma_sync) == 0 || + atomic_read(&perf->tsync) < 0)); - xlat_size = round_up(size, xlat_align_size); - buf_size = round_up(size, xlat_align); + if (atomic_read(&perf->tsync) < 0) + return -EINTR; - if (mw->xlat_size == xlat_size) - return 0; +no_dma_ret: + pthr->duration = ktime_sub(ktime_get(), pthr->duration); - if (mw->buf_size) - perf_free_mw(perf); + dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n", + pthr->tidx, pthr->copied); - mw->xlat_size = xlat_size; - mw->buf_size = buf_size; + dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n", + pthr->tidx, ktime_to_us(pthr->duration)); + + dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx, + div64_u64(pthr->copied, ktime_to_us(pthr->duration))); + + return 0; +} + +static void perf_clear_test(struct perf_thread *pthr) +{ + struct perf_ctx *perf = pthr->perf; + + if (!use_dma) + goto no_dma_notify; + + /* + * If test finished without errors, termination isn't needed. + * We call it anyway just to be sure of the transfers completion. + */ + (void)dmaengine_terminate_sync(pthr->dma_chan); + + dma_release_channel(pthr->dma_chan); + +no_dma_notify: + atomic_dec(&perf->tsync); + wake_up(&perf->twait); + kfree(pthr->src); +} - mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size, - &mw->dma_addr, GFP_KERNEL); - if (!mw->virt_addr) { - mw->xlat_size = 0; - mw->buf_size = 0; +static void perf_thread_work(struct work_struct *work) +{ + struct perf_thread *pthr = to_thread_work(work); + int ret; + + /* + * Perform stages in compliance with use_dma flag value. + * Test status is changed only if error happened, otherwise + * status -ENODATA is kept while test is on-fly. Results + * synchronization is performed only if test fininshed + * without an error or interruption. + */ + ret = perf_init_test(pthr); + if (ret) { + pthr->status = ret; + return; } - rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size); - if (rc) { - dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n"); - perf_free_mw(perf); - return -EIO; + ret = perf_run_test(pthr); + if (ret) { + pthr->status = ret; + goto err_clear_test; } - return 0; + pthr->status = perf_sync_test(pthr); + +err_clear_test: + perf_clear_test(pthr); } -static void perf_link_work(struct work_struct *work) +static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt) { - struct perf_ctx *perf = - container_of(work, struct perf_ctx, link_work.work); - struct ntb_dev *ndev = perf->ntb; - struct pci_dev *pdev = ndev->pdev; - u32 val; - u64 size; - int rc; + if (tcnt == 0 || tcnt > MAX_THREADS_CNT) + return -EINVAL; - dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); + if (test_and_set_bit_lock(0, &perf->busy_flag)) + return -EBUSY; + + perf->tcnt = tcnt; + + clear_bit_unlock(0, &perf->busy_flag); - size = perf->mw.phys_size; + return 0; +} - if (max_mw_size && size > max_mw_size) - size = max_mw_size; +static void perf_terminate_test(struct perf_ctx *perf) +{ + int tidx; - ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size)); - ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size)); - ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION); + atomic_set(&perf->tsync, -1); + wake_up(&perf->twait); - /* now read what peer wrote */ - val = ntb_spad_read(ndev, VERSION); - if (val != PERF_VERSION) { - dev_dbg(&pdev->dev, "Remote version = %#x\n", val); - goto out; + for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { + wake_up(&perf->threads[tidx].dma_wait); + cancel_work_sync(&perf->threads[tidx].work); } +} + +static int perf_submit_test(struct perf_peer *peer) +{ + struct perf_ctx *perf = peer->perf; + struct perf_thread *pthr; + int tidx, ret; - val = ntb_spad_read(ndev, MW_SZ_HIGH); - size = (u64)val << 32; + if (!test_bit(PERF_STS_DONE, &peer->sts)) + return -ENOLINK; - val = ntb_spad_read(ndev, MW_SZ_LOW); - size |= val; + if (test_and_set_bit_lock(0, &perf->busy_flag)) + return -EBUSY; - dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size); + perf->test_peer = peer; + atomic_set(&perf->tsync, perf->tcnt); - rc = perf_set_mw(perf, size); - if (rc) - goto out1; + for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { + pthr = &perf->threads[tidx]; - perf->link_is_up = true; - wake_up(&perf->link_wq); + pthr->status = -ENODATA; + pthr->copied = 0; + pthr->duration = ktime_set(0, 0); + if (tidx < perf->tcnt) + (void)queue_work(perf_wq, &pthr->work); + } - return; + ret = wait_event_interruptible(perf->twait, + atomic_read(&perf->tsync) <= 0); + if (ret == -ERESTARTSYS) { + perf_terminate_test(perf); + ret = -EINTR; + } -out1: - perf_free_mw(perf); + clear_bit_unlock(0, &perf->busy_flag); -out: - if (ntb_link_is_up(ndev, NULL, NULL) == 1) - schedule_delayed_work(&perf->link_work, - msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); + return ret; } -static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) +static int perf_read_stats(struct perf_ctx *perf, char *buf, + size_t size, ssize_t *pos) { - struct perf_mw *mw; - int rc; + struct perf_thread *pthr; + int tidx; + + if (test_and_set_bit_lock(0, &perf->busy_flag)) + return -EBUSY; - mw = &perf->mw; + (*pos) += scnprintf(buf + *pos, size - *pos, + " Peer %d test statistics:\n", perf->test_peer->pidx); - rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size); - if (rc) - return rc; + for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { + pthr = &perf->threads[tidx]; - perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size); - if (!mw->vbase) - return -ENOMEM; + if (pthr->status == -ENODATA) + continue; + + if (pthr->status) { + (*pos) += scnprintf(buf + *pos, size - *pos, + "%d: error status %d\n", tidx, pthr->status); + continue; + } + + (*pos) += scnprintf(buf + *pos, size - *pos, + "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", + tidx, pthr->copied, ktime_to_us(pthr->duration), + div64_u64(pthr->copied, ktime_to_us(pthr->duration))); + } + + clear_bit_unlock(0, &perf->busy_flag); return 0; } -static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, - size_t count, loff_t *offp) +static void perf_init_threads(struct perf_ctx *perf) { - struct perf_ctx *perf = filp->private_data; + struct perf_thread *pthr; + int tidx; + + perf->tcnt = DEF_THREADS_CNT; + perf->test_peer = &perf->peers[0]; + init_waitqueue_head(&perf->twait); + + for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) { + pthr = &perf->threads[tidx]; + + pthr->perf = perf; + pthr->tidx = tidx; + pthr->status = -ENODATA; + init_waitqueue_head(&pthr->dma_wait); + INIT_WORK(&pthr->work, perf_thread_work); + } +} + +static void perf_clear_threads(struct perf_ctx *perf) +{ + perf_terminate_test(perf); +} + +/*============================================================================== + * DebugFS nodes + *============================================================================== + */ + +static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct perf_ctx *perf = filep->private_data; + struct perf_peer *peer; + size_t buf_size; + ssize_t pos = 0; + int ret, pidx; char *buf; - ssize_t ret, out_off = 0; - struct pthr_ctx *pctx; - int i; - u64 rate; - if (!perf) - return 0; + buf_size = min_t(size_t, size, 0x1000U); - buf = kmalloc(1024, GFP_KERNEL); + buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOMEM; - if (mutex_is_locked(&perf->run_mutex)) { - out_off = scnprintf(buf, 64, "running\n"); - goto read_from_buf; + pos += scnprintf(buf + pos, buf_size - pos, + " Performance measuring tool info:\n\n"); + + pos += scnprintf(buf + pos, buf_size - pos, + "Local port %d, Global index %d\n", ntb_port_number(perf->ntb), + perf->gidx); + pos += scnprintf(buf + pos, buf_size - pos, "Test status: "); + if (test_bit(0, &perf->busy_flag)) { + pos += scnprintf(buf + pos, buf_size - pos, + "on-fly with port %d (%d)\n", + ntb_peer_port_number(perf->ntb, perf->test_peer->pidx), + perf->test_peer->pidx); + } else { + pos += scnprintf(buf + pos, buf_size - pos, "idle\n"); } - for (i = 0; i < MAX_THREADS; i++) { - pctx = &perf->pthr_ctx[i]; + for (pidx = 0; pidx < perf->pcnt; pidx++) { + peer = &perf->peers[pidx]; + + pos += scnprintf(buf + pos, buf_size - pos, + "Port %d (%d), Global index %d:\n", + ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx, + peer->gidx); + + pos += scnprintf(buf + pos, buf_size - pos, + "\tLink status: %s\n", + test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down"); + + pos += scnprintf(buf + pos, buf_size - pos, + "\tOut buffer addr 0x%pK\n", peer->outbuf); - if (pctx->status == -ENODATA) - break; + pos += scnprintf(buf + pos, buf_size - pos, + "\tOut buffer size %pa\n", &peer->outbuf_size); - if (pctx->status) { - out_off += scnprintf(buf + out_off, 1024 - out_off, - "%d: error %d\n", i, - pctx->status); + pos += scnprintf(buf + pos, buf_size - pos, + "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat); + + if (!peer->inbuf) { + pos += scnprintf(buf + pos, buf_size - pos, + "\tIn buffer addr: unallocated\n"); continue; } - rate = div64_u64(pctx->copied, pctx->diff_us); - out_off += scnprintf(buf + out_off, 1024 - out_off, - "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", - i, pctx->copied, pctx->diff_us, rate); + pos += scnprintf(buf + pos, buf_size - pos, + "\tIn buffer addr 0x%pK\n", peer->inbuf); + + pos += scnprintf(buf + pos, buf_size - pos, + "\tIn buffer size %pa\n", &peer->inbuf_size); + + pos += scnprintf(buf + pos, buf_size - pos, + "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat); } -read_from_buf: - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); + ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); kfree(buf); return ret; } -static void threads_cleanup(struct perf_ctx *perf) +static const struct file_operations perf_dbgfs_info = { + .open = simple_open, + .read = perf_dbgfs_read_info +}; + +static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) { - struct pthr_ctx *pctx; - int i; + struct perf_ctx *perf = filep->private_data; + ssize_t ret, pos = 0; + char *buf; - for (i = 0; i < MAX_THREADS; i++) { - pctx = &perf->pthr_ctx[i]; - if (pctx->thread) { - pctx->status = kthread_stop(pctx->thread); - pctx->thread = NULL; - } - } -} + buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; -static void perf_clear_thread_status(struct perf_ctx *perf) -{ - int i; + ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos); + if (ret) + goto err_free; + + ret = simple_read_from_buffer(ubuf, size, offp, buf, pos); +err_free: + kfree(buf); - for (i = 0; i < MAX_THREADS; i++) - perf->pthr_ctx[i].status = -ENODATA; + return ret; } -static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, - size_t count, loff_t *offp) +static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) { - struct perf_ctx *perf = filp->private_data; - int node, i; - DECLARE_WAIT_QUEUE_HEAD(wq); + struct perf_ctx *perf = filep->private_data; + struct perf_peer *peer; + int pidx, ret; - if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) - return -ENOLINK; + ret = kstrtoint_from_user(ubuf, size, 0, &pidx); + if (ret) + return ret; - if (perf->perf_threads == 0) + if (pidx < 0 || pidx >= perf->pcnt) return -EINVAL; - if (!mutex_trylock(&perf->run_mutex)) - return -EBUSY; + peer = &perf->peers[pidx]; - perf_clear_thread_status(perf); + ret = perf_submit_test(peer); + if (ret) + return ret; - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } + return size; +} - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } +static const struct file_operations perf_dbgfs_run = { + .open = simple_open, + .read = perf_dbgfs_read_run, + .write = perf_dbgfs_write_run +}; - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } +static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct perf_ctx *perf = filep->private_data; + char buf[8]; + ssize_t pos; - node = on_node ? dev_to_node(&perf->ntb->pdev->dev) - : NUMA_NO_NODE; - atomic_set(&perf->tdone, 0); + pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; + return simple_read_from_buffer(ubuf, size, offp, buf, pos); +} - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->wq = &wq; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else { - wake_up_process(pctx->thread); - } - } +static ssize_t perf_dbgfs_write_tcnt(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct perf_ctx *perf = filep->private_data; + int ret; + u8 val; - wait_event_interruptible(wq, - atomic_read(&perf->tdone) == perf->perf_threads); + ret = kstrtou8_from_user(ubuf, size, 0, &val); + if (ret) + return ret; - threads_cleanup(perf); - mutex_unlock(&perf->run_mutex); - return count; + ret = perf_set_tcnt(perf, val); + if (ret) + return ret; -err: - threads_cleanup(perf); - mutex_unlock(&perf->run_mutex); - return -ENXIO; + return size; } -static const struct file_operations ntb_perf_debugfs_run = { - .owner = THIS_MODULE, +static const struct file_operations perf_dbgfs_tcnt = { .open = simple_open, - .read = debugfs_run_read, - .write = debugfs_run_write, + .read = perf_dbgfs_read_tcnt, + .write = perf_dbgfs_write_tcnt }; -static int perf_debugfs_setup(struct perf_ctx *perf) +static void perf_setup_dbgfs(struct perf_ctx *perf) { struct pci_dev *pdev = perf->ntb->pdev; - struct dentry *debugfs_node_dir; - struct dentry *debugfs_run; - struct dentry *debugfs_threads; - struct dentry *debugfs_seg_order; - struct dentry *debugfs_run_order; - struct dentry *debugfs_use_dma; - struct dentry *debugfs_on_node; - - if (!debugfs_initialized()) - return -ENODEV; - /* Assumpion: only one NTB device in the system */ - if (!perf_debugfs_dir) { - perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - if (!perf_debugfs_dir) - return -ENODEV; - } - - debugfs_node_dir = debugfs_create_dir(pci_name(pdev), - perf_debugfs_dir); - if (!debugfs_node_dir) - goto err; - - debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, - debugfs_node_dir, perf, - &ntb_perf_debugfs_run); - if (!debugfs_run) - goto err; - - debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, - debugfs_node_dir, - &perf->perf_threads); - if (!debugfs_threads) - goto err; - - debugfs_seg_order = debugfs_create_u32("seg_order", 0600, - debugfs_node_dir, - &seg_order); - if (!debugfs_seg_order) - goto err; - - debugfs_run_order = debugfs_create_u32("run_order", 0600, - debugfs_node_dir, - &run_order); - if (!debugfs_run_order) - goto err; - - debugfs_use_dma = debugfs_create_bool("use_dma", 0600, - debugfs_node_dir, - &use_dma); - if (!debugfs_use_dma) - goto err; - - debugfs_on_node = debugfs_create_bool("on_node", 0600, - debugfs_node_dir, - &on_node); - if (!debugfs_on_node) - goto err; + perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir); + if (!perf->dbgfs_dir) { + dev_warn(&perf->ntb->dev, "DebugFS unsupported\n"); + return; + } + + debugfs_create_file("info", 0600, perf->dbgfs_dir, perf, + &perf_dbgfs_info); - return 0; + debugfs_create_file("run", 0600, perf->dbgfs_dir, perf, + &perf_dbgfs_run); -err: - debugfs_remove_recursive(perf_debugfs_dir); - perf_debugfs_dir = NULL; - return -ENODEV; + debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf, + &perf_dbgfs_tcnt); + + /* They are made read-only for test exec safety and integrity */ + debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order); + + debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order); + + debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma); } -static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) +static void perf_clear_dbgfs(struct perf_ctx *perf) +{ + debugfs_remove_recursive(perf->dbgfs_dir); +} + +/*============================================================================== + * Basic driver initialization + *============================================================================== + */ + +static struct perf_ctx *perf_create_data(struct ntb_dev *ntb) { - struct pci_dev *pdev = ntb->pdev; struct perf_ctx *perf; - int node; - int rc = 0; - if (ntb_spad_count(ntb) < MAX_SPAD) { - dev_err(&ntb->dev, "Not enough scratch pad registers for %s", - DRIVER_NAME); - return -EIO; - } + perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL); + if (!perf) + return ERR_PTR(-ENOMEM); - if (!ntb->ops->mw_set_trans) { - dev_err(&ntb->dev, "Need inbound MW based NTB API\n"); - return -EINVAL; + perf->pcnt = ntb_peer_port_count(ntb); + perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers), + GFP_KERNEL); + if (!perf->peers) + return ERR_PTR(-ENOMEM); + + perf->ntb = ntb; + + return perf; +} + +static int perf_setup_peer_mw(struct perf_peer *peer) +{ + struct perf_ctx *perf = peer->perf; + phys_addr_t phys_addr; + int ret; + + /* Get outbound MW parameters and map it */ + ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr, + &peer->outbuf_size); + if (ret) + return ret; + + peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr, + peer->outbuf_size); + if (!peer->outbuf) + return -ENOMEM; + + if (max_mw_size && peer->outbuf_size > max_mw_size) { + peer->outbuf_size = max_mw_size; + dev_warn(&peer->perf->ntb->dev, + "Peer %d outbuf reduced to %#llx\n", peer->pidx, + peer->outbuf_size); } - if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT) - dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n"); + return 0; +} - node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE; - perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); - if (!perf) { - rc = -ENOMEM; - goto err_perf; +static int perf_init_peers(struct perf_ctx *perf) +{ + struct perf_peer *peer; + int pidx, lport, ret; + + lport = ntb_port_number(perf->ntb); + perf->gidx = -1; + for (pidx = 0; pidx < perf->pcnt; pidx++) { + peer = &perf->peers[pidx]; + + peer->perf = perf; + peer->pidx = pidx; + if (lport < ntb_peer_port_number(perf->ntb, pidx)) { + if (perf->gidx == -1) + perf->gidx = pidx; + peer->gidx = pidx + 1; + } else { + peer->gidx = pidx; + } + INIT_WORK(&peer->service, perf_service_work); } + if (perf->gidx == -1) + perf->gidx = pidx; - perf->ntb = ntb; - perf->perf_threads = 1; - atomic_set(&perf->tsync, 0); - mutex_init(&perf->run_mutex); - spin_lock_init(&perf->db_lock); - perf_setup_mw(ntb, perf); - init_waitqueue_head(&perf->link_wq); - INIT_DELAYED_WORK(&perf->link_work, perf_link_work); + for (pidx = 0; pidx < perf->pcnt; pidx++) { + ret = perf_setup_peer_mw(&perf->peers[pidx]); + if (ret) + return ret; + } + + dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx); + + return 0; +} - rc = ntb_set_ctx(ntb, perf, &perf_ops); - if (rc) - goto err_ctx; +static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) +{ + struct perf_ctx *perf; + int ret; - perf->link_is_up = false; - ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); - ntb_link_event(ntb); + perf = perf_create_data(ntb); + if (IS_ERR(perf)) + return PTR_ERR(perf); - rc = perf_debugfs_setup(perf); - if (rc) - goto err_ctx; + ret = perf_init_peers(perf); + if (ret) + return ret; - perf_clear_thread_status(perf); + perf_init_threads(perf); - return 0; + ret = perf_init_service(perf); + if (ret) + return ret; -err_ctx: - cancel_delayed_work_sync(&perf->link_work); - kfree(perf); -err_perf: - return rc; + ret = perf_enable_service(perf); + if (ret) + return ret; + + perf_setup_dbgfs(perf); + + return 0; } static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) { struct perf_ctx *perf = ntb->ctx; - int i; - dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + perf_clear_dbgfs(perf); - mutex_lock(&perf->run_mutex); + perf_disable_service(perf); - cancel_delayed_work_sync(&perf->link_work); + perf_clear_threads(perf); +} - ntb_clear_ctx(ntb); - ntb_link_disable(ntb); +static struct ntb_client perf_client = { + .ops = { + .probe = perf_probe, + .remove = perf_remove + } +}; - debugfs_remove_recursive(perf_debugfs_dir); - perf_debugfs_dir = NULL; +static int __init perf_init(void) +{ + int ret; - if (use_dma) { - for (i = 0; i < MAX_THREADS; i++) { - struct pthr_ctx *pctx = &perf->pthr_ctx[i]; + if (chunk_order > MAX_CHUNK_ORDER) { + chunk_order = MAX_CHUNK_ORDER; + pr_info("Chunk order reduced to %hhu\n", chunk_order); + } - if (pctx->dma_chan) - dma_release_channel(pctx->dma_chan); - } + if (total_order < chunk_order) { + total_order = chunk_order; + pr_info("Total data order reduced to %hhu\n", total_order); } - kfree(perf); + perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0); + if (!perf_wq) + return -ENOMEM; + + if (debugfs_initialized()) + perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + ret = ntb_register_client(&perf_client); + if (ret) { + debugfs_remove_recursive(perf_dbgfs_topdir); + destroy_workqueue(perf_wq); + } + + return ret; } +module_init(perf_init); + +static void __exit perf_exit(void) +{ + ntb_unregister_client(&perf_client); + debugfs_remove_recursive(perf_dbgfs_topdir); + destroy_workqueue(perf_wq); +} +module_exit(perf_exit); -static struct ntb_client perf_client = { - .ops = { - .probe = perf_probe, - .remove = perf_remove, - }, -}; -module_ntb_client(perf_client); -- cgit From 6952c6de8a350ef6052f3ae33499b947819df913 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 6 Dec 2017 17:32:06 +0300 Subject: NTB: ntb_hw_idt: Set NTB_TOPO_SWITCH topology Since Switchtec patch there has been a new topology added to the NTB API. It's called NTB_TOPO_SWITCH and dedicated for PCIe switch chips. Even though topo field isn't used within the IDT driver much, lets set it for the sake of unification. Signed-off-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/hw/idt/ntb_hw_idt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 93d4c9d2a9ad..8d98872d0983 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2070,7 +2070,7 @@ static int idt_register_device(struct idt_ntb_dev *ndev) /* Initialize the rest of NTB device structure and register it */ ndev->ntb.ops = &idt_ntb_ops; - ndev->ntb.topo = NTB_TOPO_PRI; + ndev->ntb.topo = NTB_TOPO_SWITCH; ret = ntb_register_device(&ndev->ntb); if (ret != 0) { -- cgit From 1536dc063e82090c5da65ddd94b1da1b38bfab43 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jan 2018 15:55:28 +0100 Subject: NTB: ntb_perf: fix printing of resource_size_t On 32-bit architectures, resource_size_t is usually 'unsigned int' or 'unsigned long' but not 'unsigned long long', so we get a warning about printing the wrong data: drivers/ntb/test/ntb_perf.c: In function 'perf_setup_peer_mw': drivers/ntb/test/ntb_perf.c:1390:35: error: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'resource_size_t {aka unsigned int}' [-Werror=format=] This changes the format string to the special %pa that is already used elsewhere in the same file. Fixes: b83003b3fdc1 ("NTB: ntb_perf: Add full multi-port NTB API support") Signed-off-by: Arnd Bergmann Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 8de72f3fba4d..1829a17dd461 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -1387,8 +1387,8 @@ static int perf_setup_peer_mw(struct perf_peer *peer) if (max_mw_size && peer->outbuf_size > max_mw_size) { peer->outbuf_size = max_mw_size; dev_warn(&peer->perf->ntb->dev, - "Peer %d outbuf reduced to %#llx\n", peer->pidx, - peer->outbuf_size); + "Peer %d outbuf reduced to %pa\n", peer->pidx, + &peer->outbuf_size); } return 0; -- cgit From 2e2bc5a9a300261853303c006b7c29faafc64221 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 22 Jan 2018 09:38:57 +0000 Subject: NTB: ntb_tool: fix memory leak on 'buf' on error exit path Currently there is a memory leak on buf when the call to ntb_mw_get_align fails. Add an exit err label and jump to this so that kfree on buf frees the memory. Detected by CoverityScan, CID#1464286 ("Resource leak") Fixes: d637628ce00c ("NTB: ntb_tool: Add full multi-port NTB API support") Signed-off-by: Colin Ian King Acked-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 920fc9b161b0..d592c0ffbd19 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -659,7 +659,7 @@ static ssize_t tool_mw_trans_read(struct file *filep, char __user *ubuf, ret = ntb_mw_get_align(inmw->tc->ntb, inmw->pidx, inmw->widx, &addr_align, &size_align, &size_max); if (ret) - return ret; + goto err; off += scnprintf(buf + off, buf_size - off, "Inbound MW \t%d\n", @@ -694,6 +694,8 @@ static ssize_t tool_mw_trans_read(struct file *filep, char __user *ubuf, &size_max); ret = simple_read_from_buffer(ubuf, size, offp, buf, off); + +err: kfree(buf); return ret; -- cgit From ae07abdb84b267627f6e10fb813e62de5c3c8117 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Jan 2018 02:09:21 +0000 Subject: ntb_hw_switchtec: Make function switchtec_ntb_remove() static Fixes the following sparse warnings: drivers/ntb/hw/mscc/ntb_hw_switchtec.c:1552:6: warning: symbol 'switchtec_ntb_remove' was not declared. Should it be static? Signed-off-by: Wei Yongjun Reviewed-by: Logan Gunthorpe Signed-off-by: Jon Mason --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index a1d547b6aa12..f624ae27eabe 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -1549,8 +1549,8 @@ free_and_exit: return rc; } -void switchtec_ntb_remove(struct device *dev, - struct class_interface *class_intf) +static void switchtec_ntb_remove(struct device *dev, + struct class_interface *class_intf) { struct switchtec_dev *stdev = to_stdev(dev); struct switchtec_ntb *sndev = stdev->sndev; -- cgit From cd20dc3ca75f17fd391e825b1606050bd62703fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Jan 2018 11:33:56 +0300 Subject: ntb_perf: Fix an error code in perf_copy_chunk() We accidentally return success if dmaengine_submit() fails. The fix is to preserve the error code from dma_submit_error(). Signed-off-by: Dan Carpenter Acked-by: Serge Semin Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 1829a17dd461..b376609ffadb 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -828,7 +828,8 @@ static int perf_copy_chunk(struct perf_thread *pthr, tx->callback_param = pthr; dma_set_unmap(tx, unmap); - if (dma_submit_error(dmaengine_submit(tx))) { + ret = dma_submit_error(dmaengine_submit(tx)); + if (ret) { dmaengine_unmap_put(unmap); goto err_free_resource; } -- cgit From 3b28c987fb9547ca9aac73241d0e281cf646387c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Wed, 24 Jan 2018 10:48:45 +0300 Subject: NTB: ntb_perf: fix cast to restricted __le32 Sparse is whining about the u32 and __le32 mixed usage in the driver drivers/ntb/test/ntb_perf.c:288:21: warning: cast to restricted __le32 drivers/ntb/test/ntb_perf.c:295:37: warning: incorrect type in argument 4 (different base types) drivers/ntb/test/ntb_perf.c:295:37: expected unsigned int [unsigned] [usertype] val drivers/ntb/test/ntb_perf.c:295:37: got restricted __le32 [usertype] ... NTB hardware drivers shall accept CPU-endian data and translate it to the portable formate by internal means, so the explicit conversions are not necessary before Scratchpad/Messages API usage anymore. Fixes: b83003b3fdc1 ("NTB: ntb_perf: Add full multi-port NTB API support") Signed-off-by: Serge Semin Acked-by: Arnd Bergmann Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index b376609ffadb..2a9d6b0d1f19 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -273,21 +273,21 @@ static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, sts = ntb_peer_spad_read(perf->ntb, peer->pidx, PERF_SPAD_CMD(perf->gidx)); - if (le32_to_cpu(sts) != PERF_CMD_INVAL) { + if (sts != PERF_CMD_INVAL) { usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); continue; } ntb_peer_spad_write(perf->ntb, peer->pidx, PERF_SPAD_LDATA(perf->gidx), - cpu_to_le32(lower_32_bits(data))); + lower_32_bits(data)); ntb_peer_spad_write(perf->ntb, peer->pidx, PERF_SPAD_HDATA(perf->gidx), - cpu_to_le32(upper_32_bits(data))); + upper_32_bits(data)); mmiowb(); ntb_peer_spad_write(perf->ntb, peer->pidx, PERF_SPAD_CMD(perf->gidx), - cpu_to_le32(cmd)); + cmd); mmiowb(); ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx)); @@ -321,21 +321,20 @@ static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx, continue; val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx)); - val = le32_to_cpu(val); if (val == PERF_CMD_INVAL) continue; *cmd = val; val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx)); - *data = le32_to_cpu(val); + *data = val; val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx)); - *data |= (u64)le32_to_cpu(val) << 32; + *data |= (u64)val << 32; /* Next command can be retrieved from now */ ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), - cpu_to_le32(PERF_CMD_INVAL)); + PERF_CMD_INVAL); dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data); @@ -371,7 +370,7 @@ static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, return ret; ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA, - cpu_to_le32(lower_32_bits(data))); + lower_32_bits(data)); if (ntb_msg_read_sts(perf->ntb) & outbits) { usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH); @@ -379,12 +378,11 @@ static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, } ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA, - cpu_to_le32(upper_32_bits(data))); + upper_32_bits(data)); mmiowb(); /* This call shall trigger peer message event */ - ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, - cpu_to_le32(cmd)); + ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd); break; } @@ -404,13 +402,13 @@ static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx, return -ENODATA; val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD); - *cmd = le32_to_cpu(val); + *cmd = val; val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA); - *data = le32_to_cpu(val); + *data = val; val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA); - *data |= (u64)le32_to_cpu(val) << 32; + *data |= (u64)val << 32; /* Next command can be retrieved from now */ ntb_msg_clear_sts(perf->ntb, inbits); -- cgit