summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorDoug Ledford <dledford@redhat.com>2016-12-14 14:44:08 -0500
committerDoug Ledford <dledford@redhat.com>2016-12-14 14:44:08 -0500
commit253f8b22e0ad643edafd75e831e5c765732877f5 (patch)
treec0e682e339f287a70606927863b9cc622c9952f1 /drivers/infiniband
parent884fa4f3048c4c43facfa6ba3b710169f7ee162c (diff)
parent22dccc5454a39427de7b87a080d026b6bf66a7b9 (diff)
Merge branch 'hfi1' into merge-test
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c3
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h9
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c9
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h3
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c110
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c3
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c211
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c156
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h144
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h8
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c19
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c39
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h38
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c22
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c193
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h127
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c11
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c60
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c44
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c18
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h12
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c4
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c4
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c60
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c209
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h16
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c13
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h1
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c44
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c33
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c64
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c22
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c20
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h141
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h112
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h96
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rvt.h81
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h132
42 files changed, 1677 insertions, 648 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index a26a9a0bfc41..4962b6ef1f34 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -125,6 +125,7 @@ int node_affinity_init(void)
cpumask_weight(topology_sibling_cpumask(
cpumask_first(&node_affinity.proc.mask)
));
+ node_affinity.num_possible_nodes = num_possible_nodes();
node_affinity.num_online_nodes = num_online_nodes();
node_affinity.num_online_cpus = num_online_cpus();
@@ -135,7 +136,7 @@ int node_affinity_init(void)
*/
init_real_cpu_mask();
- hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
+ hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
if (!hfi1_per_node_cntr)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index b89ea3c0ee1a..c9453b3d47b4 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -70,14 +70,6 @@ struct cpu_mask_set {
uint gen;
};
-struct hfi1_affinity {
- struct cpu_mask_set def_intr;
- struct cpu_mask_set rcv_intr;
- struct cpumask real_cpu_mask;
- /* spin lock to protect affinity struct */
- spinlock_t lock;
-};
-
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
@@ -119,6 +111,7 @@ struct hfi1_affinity_node_list {
struct cpumask real_cpu_mask;
struct cpu_mask_set proc;
int num_core_siblings;
+ int num_possible_nodes;
int num_online_nodes;
int num_online_cpus;
struct mutex lock; /* protects affinity nodes */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9bf5f23544d4..37d8af50cc13 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8488,7 +8488,10 @@ static int do_8051_command(
*/
if (type == HCMD_WRITE_LCB_CSR) {
in_data |= ((*out_data) & 0xffffffffffull) << 8;
- reg = ((((*out_data) >> 40) & 0xff) <<
+ /* must preserve COMPLETED - it is tied to hardware */
+ reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0);
+ reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK;
+ reg |= ((((*out_data) >> 40) & 0xff) <<
DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT)
| ((((*out_data) >> 48) & 0xffff) <<
DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
@@ -9567,11 +9570,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
- guid = ppd->guid;
+ guid = ppd->guids[HFI1_PORT_GUID_INDEX];
if (!guid) {
if (dd->base_guid)
guid = dd->base_guid + ppd->port - 1;
- ppd->guid = guid;
+ ppd->guids[HFI1_PORT_GUID_INDEX] = guid;
}
/* Set linkinit_reason on power up per OPA spec */
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5b9993899789..5bfa839d1c48 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -415,6 +415,9 @@
#define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32
#define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0
#define ASIC_CFG_SCRATCH (ASIC + 0x000000000020)
+#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08)
+#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10)
+#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18)
#define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050)
#define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308)
#define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 632ba21759ab..8725f4c086cf 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
return ret;
}
+/* read the dc8051 memory */
+static ssize_t dc8051_memory_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ ssize_t rval;
+ void *tmp;
+ loff_t start, end;
+
+ /* the checks below expect the position to be positive */
+ if (*ppos < 0)
+ return -EINVAL;
+
+ tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ /*
+ * Fill in the requested portion of the temporary buffer from the
+ * 8051 memory. The 8051 memory read is done in terms of 8 bytes.
+ * Adjust start and end to fit. Skip reading anything if out of
+ * range.
+ */
+ start = *ppos & ~0x7; /* round down */
+ if (start < DC8051_DATA_MEM_SIZE) {
+ end = (*ppos + count + 7) & ~0x7; /* round up */
+ if (end > DC8051_DATA_MEM_SIZE)
+ end = DC8051_DATA_MEM_SIZE;
+ rval = read_8051_data(ppd->dd, start, end - start,
+ (u64 *)(tmp + start));
+ if (rval)
+ goto done;
+ }
+
+ rval = simple_read_from_buffer(buf, count, ppos, tmp,
+ DC8051_DATA_MEM_SIZE);
+done:
+ kfree(tmp);
+ return rval;
+}
+
+static ssize_t debugfs_lcb_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ struct hfi1_devdata *dd = ppd->dd;
+ unsigned long total, csr_off;
+ u64 data;
+
+ if (*ppos < 0)
+ return -EINVAL;
+ /* only read 8 byte quantities */
+ if ((count % 8) != 0)
+ return -EINVAL;
+ /* offset must be 8-byte aligned */
+ if ((*ppos % 8) != 0)
+ return -EINVAL;
+ /* do nothing if out of range or zero count */
+ if (*ppos >= (LCB_END - LCB_START) || !count)
+ return 0;
+ /* reduce count if needed */
+ if (*ppos + count > LCB_END - LCB_START)
+ count = (LCB_END - LCB_START) - *ppos;
+
+ csr_off = LCB_START + *ppos;
+ for (total = 0; total < count; total += 8, csr_off += 8) {
+ if (read_lcb_csr(dd, csr_off, (u64 *)&data))
+ break; /* failed */
+ if (put_user(data, (unsigned long __user *)(buf + total)))
+ break;
+ }
+ *ppos += total;
+ return total;
+}
+
+static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ struct hfi1_devdata *dd = ppd->dd;
+ unsigned long total, csr_off, data;
+
+ if (*ppos < 0)
+ return -EINVAL;
+ /* only write 8 byte quantities */
+ if ((count % 8) != 0)
+ return -EINVAL;
+ /* offset must be 8-byte aligned */
+ if ((*ppos % 8) != 0)
+ return -EINVAL;
+ /* do nothing if out of range or zero count */
+ if (*ppos >= (LCB_END - LCB_START) || !count)
+ return 0;
+ /* reduce count if needed */
+ if (*ppos + count > LCB_END - LCB_START)
+ count = (LCB_END - LCB_START) - *ppos;
+
+ csr_off = LCB_START + *ppos;
+ for (total = 0; total < count; total += 8, csr_off += 8) {
+ if (get_user(data, (unsigned long __user *)(buf + total)))
+ break;
+ if (write_lcb_csr(dd, csr_off, data))
+ break; /* failed */
+ }
+ *ppos += total;
+ return total;
+}
+
/*
* read the per-port QSFP data for ppd
*/
@@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
qsfp2_debugfs_open, qsfp2_debugfs_release),
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
+ DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL),
+ DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write),
};
static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 6563e4d38b80..d4261163bd25 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -793,8 +793,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index e70c223801b4..26da124c88e2 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -207,6 +207,40 @@ done_asic:
/* magic character sequence that trails an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"
+/* EPROM file types */
+#define HFI1_EFT_PLATFORM_CONFIG 2
+
+/* segment size - 128 KiB */
+#define SEG_SIZE (128 * 1024)
+
+struct hfi1_eprom_footer {
+ u32 oprom_size; /* size of the oprom, in bytes */
+ u16 num_table_entries;
+ u16 version; /* version of this footer */
+ u32 magic; /* must be last */
+};
+
+struct hfi1_eprom_table_entry {
+ u32 type; /* file type */
+ u32 offset; /* file offset from start of EPROM */
+ u32 size; /* file size, in bytes */
+};
+
+/*
+ * Calculate the max number of table entries that will fit within a directory
+ * buffer of size 'dir_size'.
+ */
+#define MAX_TABLE_ENTRIES(dir_size) \
+ (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \
+ sizeof(struct hfi1_eprom_table_entry))
+
+#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \
+ (sizeof(struct hfi1_eprom_table_entry) * (n)))
+
+#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
+#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm')
+#define FOOTER_VERSION 1
+
/*
* Read all of partition 1. The actual file is at the front. Adjust
* the returned size if a trailing image magic is found.
@@ -242,6 +276,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
}
/*
+ * The segment magic has been checked. There is a footer and table of
+ * contents present.
+ *
+ * directory is a u32 aligned buffer of size EP_PAGE_SIZE.
+ */
+static int read_segment_platform_config(struct hfi1_devdata *dd,
+ void *directory, void **data, u32 *size)
+{
+ struct hfi1_eprom_footer *footer;
+ struct hfi1_eprom_table_entry *table;
+ struct hfi1_eprom_table_entry *entry;
+ void *buffer = NULL;
+ void *table_buffer = NULL;
+ int ret, i;
+ u32 directory_size;
+ u32 seg_base, seg_offset;
+ u32 bytes_available, ncopied, to_copy;
+
+ /* the footer is at the end of the directory */
+ footer = (struct hfi1_eprom_footer *)
+ (directory + EP_PAGE_SIZE - sizeof(*footer));
+
+ /* make sure the structure version is supported */
+ if (footer->version != FOOTER_VERSION)
+ return -EINVAL;
+
+ /* oprom size cannot be larger than a segment */
+ if (footer->oprom_size >= SEG_SIZE)
+ return -EINVAL;
+
+ /* the file table must fit in a segment with the oprom */
+ if (footer->num_table_entries >
+ MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size))
+ return -EINVAL;
+
+ /* find the file table start, which precedes the footer */
+ directory_size = DIRECTORY_SIZE(footer->num_table_entries);
+ if (directory_size <= EP_PAGE_SIZE) {
+ /* the file table fits into the directory buffer handed in */
+ table = (struct hfi1_eprom_table_entry *)
+ (directory + EP_PAGE_SIZE - directory_size);
+ } else {
+ /* need to allocate and read more */
+ table_buffer = kmalloc(directory_size, GFP_KERNEL);
+ if (!table_buffer)
+ return -ENOMEM;
+ ret = read_length(dd, SEG_SIZE - directory_size,
+ directory_size, table_buffer);
+ if (ret)
+ goto done;
+ table = table_buffer;
+ }
+
+ /* look for the platform configuration file in the table */
+ for (entry = NULL, i = 0; i < footer->num_table_entries; i++) {
+ if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) {
+ entry = &table[i];
+ break;
+ }
+ }
+ if (!entry) {
+ ret = -ENOENT;
+ goto done;
+ }
+
+ /*
+ * Sanity check on the configuration file size - it should never
+ * be larger than 4 KiB.
+ */
+ if (entry->size > (4 * 1024)) {
+ dd_dev_err(dd, "Bad configuration file size 0x%x\n",
+ entry->size);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* check for bogus offset and size that wrap when added together */
+ if (entry->offset + entry->size < entry->offset) {
+ dd_dev_err(dd,
+ "Bad configuration file start + size 0x%x+0x%x\n",
+ entry->offset, entry->size);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* allocate the buffer to return */
+ buffer = kmalloc(entry->size, GFP_KERNEL);
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * Extract the file by looping over segments until it is fully read.
+ */
+ seg_offset = entry->offset % SEG_SIZE;
+ seg_base = entry->offset - seg_offset;
+ ncopied = 0;
+ while (ncopied < entry->size) {
+ /* calculate data bytes available in this segment */
+
+ /* start with the bytes from the current offset to the end */
+ bytes_available = SEG_SIZE - seg_offset;
+ /* subtract off footer and table from segment 0 */
+ if (seg_base == 0) {
+ /*
+ * Sanity check: should not have a starting point
+ * at or within the directory.
+ */
+ if (bytes_available <= directory_size) {
+ dd_dev_err(dd,
+ "Bad configuration file - offset 0x%x within footer+table\n",
+ entry->offset);
+ ret = -EINVAL;
+ goto done;
+ }
+ bytes_available -= directory_size;
+ }
+
+ /* calculate bytes wanted */
+ to_copy = entry->size - ncopied;
+
+ /* max out at the available bytes in this segment */
+ if (to_copy > bytes_available)
+ to_copy = bytes_available;
+
+ /*
+ * Read from the EPROM.
+ *
+ * The sanity check for entry->offset is done in read_length().
+ * The EPROM offset is validated against what the hardware
+ * addressing supports. In addition, if the offset is larger
+ * than the actual EPROM, it silently wraps. It will work
+ * fine, though the reader may not get what they expected
+ * from the EPROM.
+ */
+ ret = read_length(dd, seg_base + seg_offset, to_copy,
+ buffer + ncopied);
+ if (ret)
+ goto done;
+
+ ncopied += to_copy;
+
+ /* set up for next segment */
+ seg_offset = footer->oprom_size;
+ seg_base += SEG_SIZE;
+ }
+
+ /* success */
+ ret = 0;
+ *data = buffer;
+ *size = entry->size;
+
+done:
+ kfree(table_buffer);
+ if (ret)
+ kfree(buffer);
+ return ret;
+}
+
+/*
* Read the platform configuration file from the EPROM.
*
* On success, an allocated buffer containing the data and its size are
@@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
* -EBUSY - not able to acquire access to the EPROM
* -ENOENT - no recognizable file written
* -ENOMEM - buffer could not be allocated
+ * -EINVAL - invalid EPROM contentents found
*/
int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
{
@@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
if (ret)
return -EBUSY;
- /* read the last page of P0 for the EPROM format magic */
- ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ /* read the last page of the segment for the EPROM format magic */
+ ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
if (ret)
goto done;
- /* last dword of P0 contains a magic indicator */
- if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* last dword of the segment contains a magic value */
+ if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) {
+ /* segment format */
+ ret = read_segment_platform_config(dd, directory, data, size);
+ } else {
/* partition format */
ret = read_partition_platform_config(dd, data, size);
- goto done;
}
- /* nothing recognized */
- ret = -ENOENT;
-
done:
release_chip_resource(dd, CR_EPROM);
return ret;
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 13db8eb4f4ec..0dd50cdb039a 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1;
const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 };
static const u8 all_pcie_serdes_broadcast = 0xe0;
+static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
+ 0,
+ SYSTEM_TABLE_MAX,
+ PORT_TABLE_MAX,
+ RX_PRESET_TABLE_MAX,
+ TX_PRESET_TABLE_MAX,
+ QSFP_ATTEN_TABLE_MAX,
+ VARIABLE_SETTINGS_TABLE_MAX
+};
+
/* forwards */
static void dispose_one_firmware(struct firmware_details *fdet);
static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
@@ -263,11 +273,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result)
u64 reg;
int count;
- /* start the read at the given address */
- reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
- << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT)
- | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK;
+ /* step 1: set the address, clear enable */
+ reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
+ << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT;
write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg);
+ /* step 2: enable */
+ write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL,
+ reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK);
/* wait until ACCESS_COMPLETED is set */
count = 0;
@@ -707,6 +719,9 @@ static int obtain_firmware(struct hfi1_devdata *dd)
&dd->pcidev->dev);
if (err) {
platform_config = NULL;
+ dd_dev_err(dd,
+ "%s: No default platform config file found\n",
+ __func__);
goto done;
}
dd->platform_config.data = platform_config->data;
@@ -1761,8 +1776,17 @@ int parse_platform_config(struct hfi1_devdata *dd)
u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
int ret = -EINVAL; /* assume failure */
+ /*
+ * For integrated devices that did not fall back to the default file,
+ * the SI tuning information for active channels is acquired from the
+ * scratch register bitmap, thus there is no platform config to parse.
+ * Skip parsing in these situations.
+ */
+ if (is_integrated(dd) && !platform_config_load)
+ return 0;
+
if (!dd->platform_config.data) {
- dd_dev_info(dd, "%s: Missing config file\n", __func__);
+ dd_dev_err(dd, "%s: Missing config file\n", __func__);
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@@ -1770,7 +1794,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
magic_num = *ptr;
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
- dd_dev_info(dd, "%s: Bad config file\n", __func__);
+ dd_dev_err(dd, "%s: Bad config file\n", __func__);
goto bail;
}
@@ -1797,9 +1821,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
header1 = *ptr;
header2 = *(ptr + 1);
if (header1 != ~header2) {
- dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
- __func__, (ptr - (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
+ __func__, (ptr - (u32 *)
+ dd->platform_config.data));
goto bail;
}
@@ -1841,11 +1865,11 @@ int parse_platform_config(struct hfi1_devdata *dd)
table_length_dwords;
break;
default:
- dd_dev_info(dd,
- "%s: Unknown data table %d, offset %ld\n",
- __func__, table_type,
- (ptr - (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd,
+ "%s: Unknown data table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr - (u32 *)
+ dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1865,11 +1889,11 @@ int parse_platform_config(struct hfi1_devdata *dd)
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
- dd_dev_info(dd,
- "%s: Unknown meta table %d, offset %ld\n",
- __func__, table_type,
- (ptr -
- (u32 *)dd->platform_config.data));
+ dd_dev_err(dd,
+ "%s: Unknown meta table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr -
+ (u32 *)dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@@ -1884,10 +1908,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* Jump the table */
ptr += table_length_dwords;
if (crc != *ptr) {
- dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
- __func__, (ptr -
- (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
+ __func__, (ptr -
+ (u32 *)dd->platform_config.data));
goto bail;
}
/* Jump the CRC DWORD */
@@ -1901,6 +1924,84 @@ bail:
return ret;
}
+static void get_integrated_platform_config_field(
+ struct hfi1_devdata *dd,
+ enum platform_config_table_type_encoding table_type,
+ int field_index, u32 *data)
+{
+ struct hfi1_pportdata *ppd = dd->pport;
+ u8 *cache = ppd->qsfp_info.cache;
+ u32 tx_preset = 0;
+
+ switch (table_type) {
+ case PLATFORM_CONFIG_SYSTEM_TABLE:
+ if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX)
+ *data = ppd->max_power_class;
+ else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G)
+ *data = ppd->default_atten;
+ break;
+ case PLATFORM_CONFIG_PORT_TABLE:
+ if (field_index == PORT_TABLE_PORT_TYPE)
+ *data = ppd->port_type;
+ else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G)
+ *data = ppd->local_atten;
+ else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G)
+ *data = ppd->remote_atten;
+ break;
+ case PLATFORM_CONFIG_RX_PRESET_TABLE:
+ if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >>
+ QSFP_RX_CDR_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >>
+ QSFP_RX_EMP_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >>
+ QSFP_RX_AMP_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR)
+ *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >>
+ QSFP_RX_CDR_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP)
+ *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >>
+ QSFP_RX_EMP_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP)
+ *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >>
+ QSFP_RX_AMP_SHIFT;
+ break;
+ case PLATFORM_CONFIG_TX_PRESET_TABLE:
+ if (cache[QSFP_EQ_INFO_OFFS] & 0x4)
+ tx_preset = ppd->tx_preset_eq;
+ else
+ tx_preset = ppd->tx_preset_noeq;
+ if (field_index == TX_PRESET_TABLE_PRECUR)
+ *data = (tx_preset & TX_PRECUR_SMASK) >>
+ TX_PRECUR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_ATTN)
+ *data = (tx_preset & TX_ATTN_SMASK) >>
+ TX_ATTN_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_POSTCUR)
+ *data = (tx_preset & TX_POSTCUR_SMASK) >>
+ TX_POSTCUR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY)
+ *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >>
+ QSFP_TX_CDR_APPLY_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY)
+ *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >>
+ QSFP_TX_EQ_APPLY_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR)
+ *data = (tx_preset & QSFP_TX_CDR_SMASK) >>
+ QSFP_TX_CDR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ)
+ *data = (tx_preset & QSFP_TX_EQ_SMASK) >>
+ QSFP_TX_EQ_SHIFT;
+ break;
+ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
+ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
+ default:
+ break;
+ }
+}
+
static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
int field, u32 *field_len_bits,
u32 *field_start_bits)
@@ -1976,6 +2077,15 @@ int get_platform_config_field(struct hfi1_devdata *dd,
else
return -EINVAL;
+ if (is_integrated(dd) && !platform_config_load) {
+ /*
+ * Use saved configuration from ppd for integrated platforms
+ */
+ get_integrated_platform_config_field(dd, table_type,
+ field_index, data);
+ return 0;
+ }
+
ret = get_platform_fw_field_metadata(dd, table_type, field_index,
&field_len_bits,
&field_start_bits);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7eef11b316ff..4163596ce4c9 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -512,6 +512,9 @@ struct rvt_sge_state;
#define HFI1_MIN_VLS_SUPPORTED 1
#define HFI1_MAX_VLS_SUPPORTED 8
+#define HFI1_GUIDS_PER_PORT 5
+#define HFI1_PORT_GUID_INDEX 0
+
static inline void incr_cntr64(u64 *cntr)
{
if (*cntr < (u64)-1LL)
@@ -579,11 +582,20 @@ struct hfi1_pportdata {
struct kobject vl2mtu_kobj;
/* PHY support */
- u32 port_type;
struct qsfp_data qsfp_info;
+ /* Values for SI tuning of SerDes */
+ u32 port_type;
+ u32 tx_preset_eq;
+ u32 tx_preset_noeq;
+ u32 rx_preset;
+ u8 local_atten;
+ u8 remote_atten;
+ u8 default_atten;
+ u8 max_power_class;
+
+ /* GUIDs for this interface, in host order, guids[0] is a port guid */
+ u64 guids[HFI1_GUIDS_PER_PORT];
- /* GUID for this interface, in host order */
- u64 guid;
/* GUID for peer interface, in host order */
u64 neighbor_guid;
@@ -848,32 +860,29 @@ struct hfi1_devdata {
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
resource_size_t physaddr;
- /* receive context data */
- struct hfi1_ctxtdata **rcd;
+ /* Per VL data. Enough for all VLs but not all elements are set/used. */
+ struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
struct send_context_info *send_contexts;
/* map hardware send contexts to software index */
u8 *hw_to_sw;
/* spinlock for allocating and releasing send context resources */
spinlock_t sc_lock;
- /* Per VL data. Enough for all VLs but not all elements are set/used. */
- struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* lock for pio_map */
spinlock_t pio_map_lock;
+ /* Send Context initialization lock. */
+ spinlock_t sc_init_lock;
+ /* lock for sdma_map */
+ spinlock_t sde_map_lock;
/* array of kernel send contexts */
struct send_context **kernel_send_context;
/* array of vl maps */
struct pio_vl_map __rcu *pio_map;
- /* seqlock for sc2vl */
- seqlock_t sc2vl_lock;
- u64 sc2vl[4];
- /* Send Context initialization lock. */
- spinlock_t sc_init_lock;
+ /* default flags to last descriptor */
+ u64 default_desc1;
/* fields common to all SDMA engines */
- /* default flags to last descriptor */
- u64 default_desc1;
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
dma_addr_t sdma_heads_phys;
void *sdma_pad_dma; /* DMA'ed by chip */
@@ -884,8 +893,6 @@ struct hfi1_devdata {
u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
- /* lock for sdma_map */
- spinlock_t sde_map_lock;
/* array of engines sized by num_sdma */
struct sdma_engine *per_sdma;
/* array of vl maps */
@@ -894,14 +901,11 @@ struct hfi1_devdata {
wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count;
+ u32 lcb_access_count; /* count of LCB users */
+
/* common data between shared ASIC HFIs in this OS */
struct hfi1_asic_data *asic_data;
- /* hfi1_pportdata, points to array of (physical) port-specific
- * data structs, indexed by pidx (0..n-1)
- */
- struct hfi1_pportdata *pport;
-
/* mem-mapped pointer to base of PIO buffers */
void __iomem *piobase;
/*
@@ -918,20 +922,13 @@ struct hfi1_devdata {
/* send context numbers and sizes for each type */
struct sc_config_sizes sc_sizes[SC_MAX];
- u32 lcb_access_count; /* count of LCB users */
-
char *boardname; /* human readable board info */
- /* device (not port) flags, basically device capabilities */
- u32 flags;
-
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
- /* percpu int_counter */
- u64 __percpu *int_counter;
- u64 __percpu *rcv_limit;
+
u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
@@ -946,6 +943,7 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
+ u32 freezelen; /* max length of freezemsg */
u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@@ -967,7 +965,6 @@ struct hfi1_devdata {
* IB link status cheaply
*/
struct hfi1_status *status;
- u32 freezelen; /* max length of freezemsg */
/* revision register shadow */
u64 revision;
@@ -995,6 +992,8 @@ struct hfi1_devdata {
u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
+ /* default link down value (poll/sleep) */
+ u8 link_default;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -1030,8 +1029,6 @@ struct hfi1_devdata {
u8 hfi1_id;
/* implementation code */
u8 icode;
- /* default link down value (poll/sleep) */
- u8 link_default;
/* vAU of this device */
u8 vau;
/* vCU of this device */
@@ -1042,27 +1039,17 @@ struct hfi1_devdata {
u16 vl15_init;
/* Misc small ints */
- /* Number of physical ports available */
- u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
u8 n_krcv_queues;
u8 qos_shift;
- u8 qpn_mask;
- u16 rhf_offset; /* offset of RHF within receive header entry */
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
+ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
struct platform_config_cache pcfg_cache;
struct diag_client *diag_client;
- spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
-
- u8 psxmitwait_supported;
- /* cycle length of PS* counters in HW (in picoseconds) */
- u16 psxmitwait_check_rate;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1077,6 +1064,9 @@ struct hfi1_devdata {
struct rcv_array_data rcv_entries;
+ /* cycle length of PS* counters in HW (in picoseconds) */
+ u16 psxmitwait_check_rate;
+
/*
* 64 bit synthetic counters
*/
@@ -1109,11 +1099,11 @@ struct hfi1_devdata {
struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint;
- u8 err_info_uncorrectable;
- u8 err_info_fmconfig;
atomic_t drop_packet;
u8 do_drop;
+ u8 err_info_uncorrectable;
+ u8 err_info_fmconfig;
/*
* Software counters for the status bits defined by the
@@ -1136,47 +1126,70 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
- /* receive interrupt functions */
- rhf_rcv_function_ptr *rhf_rcv_function_map;
+ /* receive interrupt function */
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
+ /* Save the enabled LCB error bits */
+ u64 lcb_err_en;
+
/*
* Handlers for outgoing data so that snoop/capture does not
* have to have its hooks in the send path
*/
- send_routine process_pio_send;
+ send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ /* hfi1_pportdata, points to array of (physical) port-specific
+ * data structs, indexed by pidx (0..n-1)
+ */
+ struct hfi1_pportdata *pport;
+ /* receive context data */
+ struct hfi1_ctxtdata **rcd;
+ u64 __percpu *int_counter;
+ /* device (not port) flags, basically device capabilities */
+ u16 flags;
+ /* Number of physical ports available */
+ u8 num_pports;
+ /* Lowest context number which can be used by user processes */
+ u8 first_user_ctxt;
+ /* adding a new field here would make it part of this cacheline */
+
+ /* seqlock for sc2vl */
+ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
+ u64 sc2vl[4];
+ /* receive interrupt functions */
+ rhf_rcv_function_ptr *rhf_rcv_function_map;
+ u64 __percpu *rcv_limit;
+ u16 rhf_offset; /* offset of RHF within receive header entry */
+ /* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1;
u8 oui2;
u8 oui3;
+ u8 dc_shutdown;
+
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
- u32 rcv_ovfl_cnt;
wait_queue_head_t event_queue;
- /* Save the enabled LCB error bits */
- u64 lcb_err_en;
- u8 dc_shutdown;
-
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_dma;
- bool eprom_available; /* true if EPROM is available for this device */
- bool aspm_supported; /* Does HW support ASPM */
- bool aspm_enabled; /* ASPM state: enabled/disabled */
+ u32 rcv_ovfl_cnt;
/* Serialize ASPM enable/disable between multiple verbs contexts */
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
- struct hfi1_affinity *affinity;
+ bool eprom_available; /* true if EPROM is available for this device */
+ bool aspm_supported; /* Does HW support ASPM */
+ bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
+
struct kobject kobj;
};
@@ -1633,6 +1646,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index)
}
/*
+ * Return the indexed GUID from the port GUIDs table.
+ */
+static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
+{
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ WARN_ON(index >= HFI1_GUIDS_PER_PORT);
+ return cpu_to_be64(ppd->guids[index]);
+}
+
+/*
* Called by readers of cc_state only, must call under rcu_read_lock().
*/
static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
@@ -2003,6 +2027,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd)
return i2c_target(dd->hfi1_id);
}
+/* Is this device integrated or discrete? */
+static inline bool is_integrated(struct hfi1_devdata *dd)
+{
+ return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef38d389..d9740ddea6f1 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -64,6 +64,7 @@ struct sdma_engine;
/**
* struct iowait - linkage for delayed progress/waiting
* @list: used to add/insert into QP/PQ wait lists
+ * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
@@ -91,6 +92,11 @@ struct sdma_engine;
* so sleeping is not allowed.
*
* The wait_dma member along with the iow
+ *
+ * The lock field is used by waiters to record
+ * the seqlock_t that guards the list head.
+ * Waiters explicity know that, but the destroy
+ * code that unwaits QPs does not.
*/
struct iowait {
@@ -103,6 +109,7 @@ struct iowait {
unsigned seq);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
+ seqlock_t *lock;
struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@@ -141,6 +148,7 @@ static inline void iowait_init(
void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
+ wait->lock = NULL;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 9487c9bb8920..0ef62e67f283 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
ni = (struct opa_node_info *)data;
/* GUID 0 is illegal */
- if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
+ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+ get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
- ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+ ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
ni->base_version = OPA_MGMT_BASE_VERSION;
ni->class_version = OPA_SMI_CLASS_VERSION;
ni->node_type = 1; /* channel adapter */
ni->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
ni->system_image_guid = ib_hfi1_sys_image_guid;
- /* Use first-port GUID as node */
- ni->node_guid = cpu_to_be64(dd->pport->guid);
+ ni->node_guid = ibdev->node_guid;
ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
ni->device_id = cpu_to_be16(dd->pcidev->device);
ni->revision = cpu_to_be32(dd->minrev);
@@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
/* GUID 0 is illegal */
if (smp->attr_mod || pidx >= dd->num_pports ||
- dd->pport[pidx].guid == 0)
+ ibdev->node_guid == 0 ||
+ get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
- else
- nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+ return reply((struct ib_mad_hdr *)smp);
+ }
+ nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
nip->base_version = OPA_MGMT_BASE_VERSION;
nip->class_version = OPA_SMI_CLASS_VERSION;
nip->node_type = 1; /* channel adapter */
nip->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
nip->sys_guid = ib_hfi1_sys_image_guid;
- /* Use first-port GUID as node */
- nip->node_guid = cpu_to_be64(dd->pport->guid);
+ nip->node_guid = ibdev->node_guid;
nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
nip->device_id = cpu_to_be16(dd->pcidev->device);
nip->revision = cpu_to_be32(dd->minrev);
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 7ad30898fc19..ccbf52c8ff6f 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler,
struct list_head *del_list);
static void handle_remove(struct work_struct *work);
-static struct mmu_notifier_ops mn_opts = {
+static const struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
.invalidate_range_start = mmu_notifier_range_start,
};
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d15ffed48a39..64c9eeb52d86 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->hw_context = hw_context;
cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
+ sc->size = sc->credits * PIO_BLOCK_SIZE;
/* PIO Send Memory Address details */
#define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1249,6 +1250,7 @@ int sc_enable(struct send_context *sc)
sc->free = 0;
sc->alloc_free = 0;
sc->fill = 0;
+ sc->fill_wrap = 0;
sc->sr_head = 0;
sc->sr_tail = 0;
sc->flags = 0;
@@ -1392,7 +1394,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
unsigned long flags;
unsigned long avail;
unsigned long blocks = dwords_to_blocks(dw_len);
- unsigned long start_fill;
+ u32 fill_wrap;
int trycount = 0;
u32 head, next;
@@ -1417,9 +1419,7 @@ retry:
(sc->fill - sc->alloc_free);
if (blocks > avail) {
/* still no room, actively update */
- spin_unlock_irqrestore(&sc->alloc_lock, flags);
sc_release_update(sc);
- spin_lock_irqsave(&sc->alloc_lock, flags);
sc->alloc_free = ACCESS_ONCE(sc->free);
trycount++;
goto retry;
@@ -1435,8 +1435,11 @@ retry:
head = sc->sr_head;
/* "allocate" the buffer */
- start_fill = sc->fill;
sc->fill += blocks;
+ fill_wrap = sc->fill_wrap;
+ sc->fill_wrap += blocks;
+ if (sc->fill_wrap >= sc->credits)
+ sc->fill_wrap = sc->fill_wrap - sc->credits;
/*
* Fill the parts that the releaser looks at before moving the head.
@@ -1465,11 +1468,8 @@ retry:
spin_unlock_irqrestore(&sc->alloc_lock, flags);
/* finish filling in the buffer outside the lock */
- pbuf->start = sc->base_addr + ((start_fill % sc->credits)
- * PIO_BLOCK_SIZE);
- pbuf->size = sc->credits * PIO_BLOCK_SIZE;
- pbuf->end = sc->base_addr + pbuf->size;
- pbuf->block_count = blocks;
+ pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
+ pbuf->end = sc->base_addr + sc->size;
pbuf->qw_written = 0;
pbuf->carry_bytes = 0;
pbuf->carry.val64 = 0;
@@ -1580,6 +1580,7 @@ static void sc_piobufavail(struct send_context *sc)
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -2035,28 +2036,17 @@ freesc15:
int init_credit_return(struct hfi1_devdata *dd)
{
int ret;
- int num_numa;
int i;
- num_numa = num_online_nodes();
- /* enforce the expectation that the numas are compact */
- for (i = 0; i < num_numa; i++) {
- if (!node_online(i)) {
- dd_dev_err(dd, "NUMA nodes are not compact\n");
- ret = -EINVAL;
- goto done;
- }
- }
-
dd->cr_base = kcalloc(
- num_numa,
+ node_affinity.num_possible_nodes,
sizeof(struct credit_return_base),
GFP_KERNEL);
if (!dd->cr_base) {
ret = -ENOMEM;
goto done;
}
- for (i = 0; i < num_numa; i++) {
+ for_each_node_with_cpus(i) {
int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
set_dev_node(&dd->pcidev->dev, i);
@@ -2083,14 +2073,11 @@ done:
void free_credit_return(struct hfi1_devdata *dd)
{
- int num_numa;
int i;
if (!dd->cr_base)
return;
-
- num_numa = num_online_nodes();
- for (i = 0; i < num_numa; i++) {
+ for (i = 0; i < node_affinity.num_possible_nodes; i++) {
if (dd->cr_base[i].va) {
dma_free_coherent(&dd->pcidev->dev,
TXE_NUM_CONTEXTS *
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index e709eaf743b5..867e5ffc3595 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -83,53 +83,55 @@ struct pio_buf {
void *arg; /* argument for cb */
void __iomem *start; /* buffer start address */
void __iomem *end; /* context end address */
- unsigned long size; /* context size, in bytes */
unsigned long sent_at; /* buffer is sent when <= free */
- u32 block_count; /* size of buffer, in blocks */
- u32 qw_written; /* QW written so far */
- u32 carry_bytes; /* number of valid bytes in carry */
union mix carry; /* pending unwritten bytes */
+ u16 qw_written; /* QW written so far */
+ u8 carry_bytes; /* number of valid bytes in carry */
};
/* cache line aligned pio buffer array */
union pio_shadow_ring {
struct pio_buf pbuf;
- u64 unused[16]; /* cache line spacer */
} ____cacheline_aligned;
/* per-NUMA send context */
struct send_context {
/* read-only after init */
struct hfi1_devdata *dd; /* device */
- void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
+ void __iomem *base_addr; /* start of PIO memory */
+ u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u32 size; /* context size, in bytes */
- volatile __le64 *hw_free; /* HW free counter */
- struct work_struct halt_work; /* halted context work queue entry */
- unsigned long flags; /* flags */
int node; /* context home node */
- int type; /* context type */
- u32 sw_index; /* software index number */
- u32 hw_context; /* hardware context number */
- u32 credits; /* number of blocks in context */
u32 sr_size; /* size of the shadow ring */
- u32 group; /* credit return group */
+ u16 flags; /* flags */
+ u8 type; /* context type */
+ u8 sw_index; /* software index number */
+ u8 hw_context; /* hardware context number */
+ u8 group; /* credit return group */
+
/* allocator fields */
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
+ u32 sr_head; /* shadow ring head */
unsigned long fill; /* official alloc count */
unsigned long alloc_free; /* copy of free (less cache thrash) */
- u32 sr_head; /* shadow ring head */
+ u32 fill_wrap; /* tracks fill within ring */
+ u32 credits; /* number of blocks in context */
+ /* adding a new field here would make it part of this cacheline */
+
/* releaser fields */
spinlock_t release_lock ____cacheline_aligned_in_smp;
- unsigned long free; /* official free count */
u32 sr_tail; /* shadow ring tail */
+ unsigned long free; /* official free count */
+ volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
- u64 credit_ctrl; /* cache for credit control */
u32 credit_intr_count; /* count of credit intr users */
- u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u64 credit_ctrl; /* cache for credit control */
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
+ struct work_struct halt_work; /* halted context work queue entry */
};
/* send context flags */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index aa7773643107..03024cec78dd 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
*/
/* adjust if we've wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 202433178864..838fe84e285a 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -49,6 +49,90 @@
#include "efivar.h"
#include "eprom.h"
+static int validate_scratch_checksum(struct hfi1_devdata *dd)
+{
+ u64 checksum = 0, temp_scratch = 0;
+ int i, j, version;
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+ version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
+
+ /* Prevent power on default of all zeroes from passing checksum */
+ if (!version)
+ return 0;
+
+ /*
+ * ASIC scratch 0 only contains the checksum and bitmap version as
+ * fields of interest, both of which are handled separately from the
+ * loop below, so skip it
+ */
+ checksum += version;
+ for (i = 1; i < ASIC_NUM_SCRATCH; i++) {
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i));
+ for (j = sizeof(u64); j != 0; j -= 2) {
+ checksum += (temp_scratch & 0xFFFF);
+ temp_scratch >>= 16;
+ }
+ }
+
+ while (checksum >> 16)
+ checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16);
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+ temp_scratch &= CHECKSUM_SMASK;
+ temp_scratch >>= CHECKSUM_SHIFT;
+
+ if (checksum + temp_scratch == 0xFFFF)
+ return 1;
+ return 0;
+}
+
+static void save_platform_config_fields(struct hfi1_devdata *dd)
+{
+ struct hfi1_pportdata *ppd = dd->pport;
+ u64 temp_scratch = 0, temp_dest = 0;
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK :
+ PORT0_PORT_TYPE_SMASK);
+ ppd->port_type = temp_dest >>
+ (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT :
+ PORT0_PORT_TYPE_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK :
+ PORT0_LOCAL_ATTEN_SMASK);
+ ppd->local_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT :
+ PORT0_LOCAL_ATTEN_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK :
+ PORT0_REMOTE_ATTEN_SMASK);
+ ppd->remote_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT :
+ PORT0_REMOTE_ATTEN_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK :
+ PORT0_DEFAULT_ATTEN_SMASK);
+ ppd->default_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT :
+ PORT0_DEFAULT_ATTEN_SHIFT);
+
+ temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 :
+ ASIC_CFG_SCRATCH_2);
+
+ ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT;
+ ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT;
+ ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT;
+
+ ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >>
+ QSFP_MAX_POWER_SHIFT;
+}
+
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
@@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd)
u8 *temp_platform_config = NULL;
u32 esize;
- ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
- &esize);
- if (!ret) {
- /* success */
- size = esize;
- goto success;
+ if (is_integrated(dd)) {
+ if (validate_scratch_checksum(dd)) {
+ save_platform_config_fields(dd);
+ return;
+ }
+ dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
+ __func__);
+ dd_dev_err(dd,
+ "%s: Please update your BIOS to support active channels\n",
+ __func__);
+ } else {
+ ret = eprom_read_platform_config(dd,
+ (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = esize;
+ return;
+ }
+ /* fail, try EFI variable */
+
+ ret = read_hfi1_efi_var(dd, "configuration", &size,
+ (void **)&temp_platform_config);
+ if (!ret) {
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = size;
+ return;
+ }
}
- /* fail, try EFI variable */
-
- ret = read_hfi1_efi_var(dd, "configuration", &size,
- (void **)&temp_platform_config);
- if (!ret)
- goto success;
-
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
+ dd_dev_err(dd,
+ "%s: Failed to get platform config, falling back to sub-optimal default file\n",
+ __func__);
/* fall back to request firmware */
platform_config_load = 1;
- return;
-
-success:
- dd->platform_config.data = temp_platform_config;
- dd->platform_config.size = size;
}
void free_platform_config(struct hfi1_devdata *dd)
{
if (!platform_config_load) {
/*
- * was loaded from EFI, release memory
- * allocated by read_efi_var
+ * was loaded from EFI or the EPROM, release memory
+ * allocated by read_efi_var/eprom_read_platform_config
*/
kfree(dd->platform_config.data);
}
@@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd)
void get_port_type(struct hfi1_pportdata *ppd)
{
int ret;
+ u32 temp;
ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
- PORT_TABLE_PORT_TYPE, &ppd->port_type,
+ PORT_TABLE_PORT_TYPE, &temp,
4);
- if (ret)
+ if (ret) {
ppd->port_type = PORT_TYPE_UNKNOWN;
+ return;
+ }
+ ppd->port_type = temp;
}
int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
@@ -538,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
}
}
+/*
+ * Return a special SerDes setting for low power AOC cables. The power class
+ * threshold and setting being used were all found by empirical testing.
+ *
+ * Summary of the logic:
+ *
+ * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4)
+ * return 0xe
+ * return 0; // leave at default
+ */
+static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+ int power_class;
+
+ /* QSFP only */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0; /* leave at default */
+
+ /* active optical cables only */
+ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+ case 0x0 ... 0x9: /* fallthrough */
+ case 0xC: /* fallthrough */
+ case 0xE:
+ /* active AOC */
+ power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
+ if (power_class < QSFP_POWER_CLASS_4)
+ return 0xe;
+ }
+ return 0; /* leave at default */
+}
+
static void apply_tunings(
struct hfi1_pportdata *ppd, u32 tx_preset_index,
u8 tuning_method, u32 total_atten, u8 limiting_active)
@@ -606,7 +737,17 @@ static void apply_tunings(
tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
postcur = tx_preset;
- config_data = precur | (attn << 8) | (postcur << 16);
+ /*
+ * NOTES:
+ * o The aoc_low_power_setting is applied to all lanes even
+ * though only lane 0's value is examined by the firmware.
+ * o A lingering low power setting after a cable swap does
+ * not occur. On cable unplug the 8051 is reset and
+ * restarted on cable insert. This resets all settings to
+ * their default, erasing any previous low power setting.
+ */
+ config_data = precur | (attn << 8) | (postcur << 16) |
+ (aoc_low_power_setting(ppd) << 24);
apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
"Applying TX settings");
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index e2c21613c326..eed0aa9124fa 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -168,16 +168,6 @@ struct platform_config_cache {
struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX];
};
-static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
- 0,
- SYSTEM_TABLE_MAX,
- PORT_TABLE_MAX,
- RX_PRESET_TABLE_MAX,
- TX_PRESET_TABLE_MAX,
- QSFP_ATTEN_TABLE_MAX,
- VARIABLE_SETTINGS_TABLE_MAX
-};
-
/* This section defines default values and encodings for the
* fields defined for each table above
*/
@@ -295,6 +285,123 @@ enum link_tuning_encoding {
OPA_UNKNOWN_TUNING
};
+/*
+ * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch
+ * registers for integrated platforms
+ */
+#define PORT0_PORT_TYPE_SHIFT 0
+#define PORT0_LOCAL_ATTEN_SHIFT 4
+#define PORT0_REMOTE_ATTEN_SHIFT 10
+#define PORT0_DEFAULT_ATTEN_SHIFT 32
+
+#define PORT1_PORT_TYPE_SHIFT 16
+#define PORT1_LOCAL_ATTEN_SHIFT 20
+#define PORT1_REMOTE_ATTEN_SHIFT 26
+#define PORT1_DEFAULT_ATTEN_SHIFT 40
+
+#define PORT0_PORT_TYPE_MASK 0xFUL
+#define PORT0_LOCAL_ATTEN_MASK 0x3FUL
+#define PORT0_REMOTE_ATTEN_MASK 0x3FUL
+#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL
+
+#define PORT1_PORT_TYPE_MASK 0xFUL
+#define PORT1_LOCAL_ATTEN_MASK 0x3FUL
+#define PORT1_REMOTE_ATTEN_MASK 0x3FUL
+#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL
+
+#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \
+ PORT0_PORT_TYPE_SHIFT)
+#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \
+ PORT0_LOCAL_ATTEN_SHIFT)
+#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \
+ PORT0_REMOTE_ATTEN_SHIFT)
+#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \
+ PORT0_DEFAULT_ATTEN_SHIFT)
+
+#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \
+ PORT1_PORT_TYPE_SHIFT)
+#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \
+ PORT1_LOCAL_ATTEN_SHIFT)
+#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \
+ PORT1_REMOTE_ATTEN_SHIFT)
+#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \
+ PORT1_DEFAULT_ATTEN_SHIFT)
+
+#define QSFP_MAX_POWER_SHIFT 0
+#define TX_NO_EQ_SHIFT 4
+#define TX_EQ_SHIFT 25
+#define RX_SHIFT 46
+
+#define QSFP_MAX_POWER_MASK 0xFUL
+#define TX_NO_EQ_MASK 0x1FFFFFUL
+#define TX_EQ_MASK 0x1FFFFFUL
+#define RX_MASK 0xFFFFUL
+
+#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \
+ QSFP_MAX_POWER_SHIFT)
+#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT)
+#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT)
+#define RX_SMASK (RX_MASK << RX_SHIFT)
+
+#define TX_PRECUR_SHIFT 0
+#define TX_ATTN_SHIFT 4
+#define QSFP_TX_CDR_APPLY_SHIFT 9
+#define QSFP_TX_EQ_APPLY_SHIFT 10
+#define QSFP_TX_CDR_SHIFT 11
+#define QSFP_TX_EQ_SHIFT 12
+#define TX_POSTCUR_SHIFT 16
+
+#define TX_PRECUR_MASK 0xFUL
+#define TX_ATTN_MASK 0x1FUL
+#define QSFP_TX_CDR_APPLY_MASK 0x1UL
+#define QSFP_TX_EQ_APPLY_MASK 0x1UL
+#define QSFP_TX_CDR_MASK 0x1UL
+#define QSFP_TX_EQ_MASK 0xFUL
+#define TX_POSTCUR_MASK 0x1FUL
+
+#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT)
+#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT)
+#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \
+ QSFP_TX_CDR_APPLY_SHIFT)
+#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \
+ QSFP_TX_EQ_APPLY_SHIFT)
+#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT)
+#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT)
+#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT)
+
+#define QSFP_RX_CDR_APPLY_SHIFT 0
+#define QSFP_RX_EMP_APPLY_SHIFT 1
+#define QSFP_RX_AMP_APPLY_SHIFT 2
+#define QSFP_RX_CDR_SHIFT 3
+#define QSFP_RX_EMP_SHIFT 4
+#define QSFP_RX_AMP_SHIFT 8
+
+#define QSFP_RX_CDR_APPLY_MASK 0x1UL
+#define QSFP_RX_EMP_APPLY_MASK 0x1UL
+#define QSFP_RX_AMP_APPLY_MASK 0x1UL
+#define QSFP_RX_CDR_MASK 0x1UL
+#define QSFP_RX_EMP_MASK 0xFUL
+#define QSFP_RX_AMP_MASK 0x3UL
+
+#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \
+ QSFP_RX_CDR_APPLY_SHIFT)
+#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \
+ QSFP_RX_EMP_APPLY_SHIFT)
+#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \
+ QSFP_RX_AMP_APPLY_SHIFT)
+#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT)
+#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT)
+#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT)
+
+#define BITMAP_VERSION 1
+#define BITMAP_VERSION_SHIFT 44
+#define BITMAP_VERSION_MASK 0xFUL
+#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \
+ BITMAP_VERSION_SHIFT)
+#define CHECKSUM_SHIFT 48
+#define CHECKSUM_MASK 0xFFFFUL
+#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT)
+
/* platform.c */
void get_platform_config(struct hfi1_devdata *dd);
void free_platform_config(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9fc75e7e8781..d752d6768a49 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp)
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
+ seqlock_t *lock = priv->s_iowait.lock;
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(lock, flags);
}
static inline int opa_mtu_enum_to_int(int mtu)
@@ -543,6 +546,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
@@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 8bc5013f39a1..9db260fe782a 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
- qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
@@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else {
/* COMPARE_SWAP or FETCH_ADD */
- qp->s_cur_sge = NULL;
+ ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
@@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
@@ -335,7 +335,7 @@ normal:
*/
qp->s_ack_state = OP(SEND_ONLY);
qp->s_flags &= ~RVT_S_ACK_PENDING;
- qp->s_cur_sge = NULL;
+ ps->s_txreq->ss = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
@@ -351,7 +351,7 @@ normal:
qp->s_rdma_ack_cnt++;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_size = len;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
/* pbc */
ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
@@ -801,8 +801,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_sge = ss;
- qp->s_cur_size = len;
+ ps->s_txreq->ss = ss;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(
qp,
ohdr,
@@ -1146,8 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- struct ib_wc wc;
- unsigned i;
u32 opcode;
u32 psn;
@@ -1195,22 +1193,8 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
qp->s_last = s_last;
/* see post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_put_swqe(wqe);
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1240,9 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
{
- struct ib_wc wc;
- unsigned i;
-
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
@@ -1253,28 +1234,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -2295,7 +2262,7 @@ send_last:
hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+ if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -2410,8 +2377,7 @@ send_last:
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (len > pmtu)
- qp->r_psn += (len - 1) / pmtu;
+ qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a1576aea4756..717ed4b159d3 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -239,16 +239,6 @@ bail:
return ret;
}
-static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
-{
- if (!index) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
- return cpu_to_be64(ppd->guid);
- }
- return ibp->guids[index - 1];
-}
-
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
hdr->sgid.global.interface_id =
- grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
- ibp->guids[grh->sgid_index - 1] :
- cpu_to_be64(ppd_from_ibp(ibp)->guid);
+ grh->sgid_index < HFI1_GUIDS_PER_PORT ?
+ get_sguid(ibp, grh->sgid_index) :
+ get_sguid(ibp, HFI1_PORT_GUID_INDEX);
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
@@ -777,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth1;
/* Construct the header. */
- extra_bytes = -qp->s_cur_size & 3;
- nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ extra_bytes = -ps->s_txreq->s_cur_size & 3;
+ nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += hfi1_make_grh(ibp,
@@ -952,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
- unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -964,32 +953,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->s_last = last;
/* See post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- /* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.qp = &qp->ibqp;
- if (status == IB_WC_SUCCESS)
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
+ rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index fd39bcaa062d..7102a076146d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde,
sde->head_sn, tx->sn);
sde->head_sn++;
#endif
- sdma_txclean(sde->dd, tx);
+ __sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
if (wait && iowait_sdma_dec(wait))
@@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx)
}
/**
- * sdma_txclean() - clean tx of mappings, descp *kmalloc's
+ * __sdma_txclean() - clean tx of mappings, descp *kmalloc's
* @dd: hfi1_devdata for unmapping
* @tx: tx request to clean
*
@@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx)
* The code can be called multiple times without issue.
*
*/
-void sdma_txclean(
+void __sdma_txclean(
struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
@@ -3080,7 +3080,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
tx->descp[i] = tx->descs[i];
return 0;
enomem:
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOMEM;
}
@@ -3109,14 +3109,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return rval;
}
/* If coalesce buffer is allocated, copy data into it */
if (tx->coalesce_buf) {
if (type == SDMA_MAP_NONE) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -EINVAL;
}
@@ -3124,7 +3124,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
kvaddr = kmap(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -EINVAL;
}
@@ -3154,7 +3154,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
@@ -3196,7 +3196,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return rval;
}
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 56257ea3598f..21f1e2834f37 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
int type, void *kvaddr, struct page *page,
unsigned long offset, u16 len);
int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
-void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+
+static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
+{
+ if (tx->num_desc)
+ __sdma_txclean(dd, tx);
+}
/* helpers used by public routines */
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
@@ -753,7 +759,7 @@ static inline int sdma_txadd_page(
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
@@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr(
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5e6d1bac4914..b141a78ae38b 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -258,8 +258,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_sge = &qp->s_sge;
- qp->s_cur_size = len;
+ ps->s_txreq->ss = &qp->s_sge;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps);
/* pbc */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 97ae24b6314c..c071955c0272 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -354,8 +354,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
qp->s_hdrwords = 7;
- qp->s_cur_size = wqe->length;
- qp->s_cur_sge = &qp->s_sge;
+ ps->s_txreq->s_cur_size = wqe->length;
+ ps->s_txreq->ss = &qp->s_sge;
qp->s_srate = ah_attr->static_rate;
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index a761f804111e..663980ef01a8 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_MASK 0xff
#define AHG_KDETH_INTR_SHIFT 12
+#define AHG_KDETH_SH_SHIFT 13
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_OM_LARGE 64
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-/* Last packet in the request */
-#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+/* Tx request flag bits */
+#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
+#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
@@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
+ /*
+ * For the last packet set the ACK request
+ * and disable header suppression.
+ */
if (req->seqnum == req->info.npkts - 1)
- tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
+ tx->flags |= (TXREQ_FLAGS_REQ_ACK |
+ TXREQ_FLAGS_REQ_DISABLE_SH);
/*
* Calculate the payload size - this is min of the fragment
@@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
datalen = compute_data_length(req, tx);
+
+ /*
+ * Disable header suppression for the payload <= 8DWS.
+ * If there is an uncorrectable error in the receive
+ * data FIFO when the received payload size is less than
+ * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
+ * not reported.There is set RHF.EccErr if the header
+ * is not suppressed.
+ */
if (!datalen) {
SDMA_DBG(req,
"Request has data but pkt len is 0");
ret = -EFAULT;
goto free_tx;
+ } else if (datalen <= 32) {
+ tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
}
}
@@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
LRH2PBC(lrhlen);
tx->hdr.pbc[0] = cpu_to_le16(pbclen);
}
+ ret = check_header_template(req, &tx->hdr,
+ lrhlen, datalen);
+ if (ret)
+ goto free_tx;
ret = sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_AHG_COPY,
sizeof(tx->hdr) + datalen,
@@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req,
req->seqnum));
/* Set ACK request on last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
hdr->bth[2] |= cpu_to_be32(1UL << 31);
/* Set the new offset */
@@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req,
/* Set KDETH.TID based on value for this TID */
KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
EXP_TID_GET(tidval, IDX));
- /* Clear KDETH.SH only on the last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ /* Clear KDETH.SH when DISABLE_SH flag is set */
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
/*
* Set the KDETH.OFFSET and KDETH.OM based on size of
@@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* BTH.PSN and BTH.A */
val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
@@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
((req->tidoffset / req->omfactor) & 0x7fff)));
- /* KDETH.TIDCtrl, KDETH.TID */
+ /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
- (EXP_TID_GET(tidval, IDX) & 0x3ff));
- /* Clear KDETH.SH on last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
- val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) <<
- AHG_KDETH_INTR_SHIFT);
- val &= cpu_to_le16(~(1U << 13));
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+ (EXP_TID_GET(tidval, IDX) & 0x3ff));
+
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
+ val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT));
} else {
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+ val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
+ cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
+ cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT));
}
+
+ AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
}
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 4b7a16ceb362..95ed4d6da510 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -297,22 +297,6 @@ static inline int wss_exceeds_threshold(void)
}
/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
- [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
- [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
- [IB_WR_REG_MR] = IB_WC_REG_MR
-};
-
-/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -694,6 +678,7 @@ static void mem_timer(unsigned long data)
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -769,6 +754,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
@@ -788,10 +774,10 @@ static int wait_kmem(struct hfi1_ibdev *dev,
*/
static noinline int build_verbs_ulp_payload(
struct sdma_engine *sde,
- struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx)
{
+ struct rvt_sge_state *ss = tx->ss;
struct rvt_sge *sg_list = ss->sg_list;
struct rvt_sge sge = ss->sge;
u8 num_sge = ss->num_sge;
@@ -835,7 +821,6 @@ bail_txadd:
/* New API */
static int build_verbs_tx_desc(
struct sdma_engine *sde,
- struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
struct hfi1_ahg_info *ahg_info,
@@ -879,9 +864,9 @@ static int build_verbs_tx_desc(
goto bail_txadd;
}
- /* add the ulp payload - if any. ss can be NULL for acks */
- if (ss)
- ret = build_verbs_ulp_payload(sde, ss, length, tx);
+ /* add the ulp payload - if any. tx->ss can be NULL for acks */
+ if (tx->ss)
+ ret = build_verbs_ulp_payload(sde, length, tx);
bail_txadd:
return ret;
}
@@ -892,8 +877,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ahg_info *ahg_info = priv->s_ahg;
u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
+ u32 len = ps->s_txreq->s_cur_size;
u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
@@ -918,7 +902,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
plen);
}
tx->wqe = qp->s_wqe;
- ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
+ ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
@@ -980,6 +964,7 @@ static int pio_wait(struct rvt_qp *qp,
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
@@ -1008,8 +993,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
{
struct hfi1_qp_priv *priv = qp->priv;
u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
+ struct rvt_sge_state *ss = ps->s_txreq->ss;
+ u32 len = ps->s_txreq->s_cur_size;
u32 dwords = (len + 3) >> 2;
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
@@ -1237,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
u8 op = get_opcode(h);
if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+ tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
(BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
@@ -1483,15 +1468,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
int guid_index, __be64 *guid)
{
struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- if (guid_index == 0)
- *guid = cpu_to_be64(ppd->guid);
- else if (guid_index < HFI1_GUIDS_PER_PORT)
- *guid = ibp->guids[guid_index - 1];
- else
+ if (guid_index >= HFI1_GUIDS_PER_PORT)
return -EINVAL;
+ *guid = get_sguid(ibp, guid_index);
return 0;
}
@@ -1610,6 +1591,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
dc8051_ver_min(ver));
}
+static const char * const driver_cntr_names[] = {
+ /* must be element 0*/
+ "DRIVER_KernIntr",
+ "DRIVER_ErrorIntr",
+ "DRIVER_Tx_Errs",
+ "DRIVER_Rcv_Errs",
+ "DRIVER_HW_Errs",
+ "DRIVER_NoPIOBufs",
+ "DRIVER_CtxtsOpen",
+ "DRIVER_RcvLen_Errs",
+ "DRIVER_EgrBufFull",
+ "DRIVER_EgrHdrFull"
+};
+
+static const char **dev_cntr_names;
+static const char **port_cntr_names;
+static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
+static int num_dev_cntrs;
+static int num_port_cntrs;
+static int cntr_names_initialized;
+
+/*
+ * Convert a list of names separated by '\n' into an array of NULL terminated
+ * strings. Optionally some entries can be reserved in the array to hold extra
+ * external strings.
+ */
+static int init_cntr_names(const char *names_in,
+ const int names_len,
+ int num_extra_names,
+ int *num_cntrs,
+ const char ***cntr_names)
+{
+ char *names_out, *p, **q;
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < names_len; i++)
+ if (names_in[i] == '\n')
+ n++;
+
+ names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+ GFP_KERNEL);
+ if (!names_out) {
+ *num_cntrs = 0;
+ *cntr_names = NULL;
+ return -ENOMEM;
+ }
+
+ p = names_out + (n + num_extra_names) * sizeof(char *);
+ memcpy(p, names_in, names_len);
+
+ q = (char **)names_out;
+ for (i = 0; i < n; i++) {
+ q[i] = p;
+ p = strchr(p, '\n');
+ *p++ = '\0';
+ }
+
+ *num_cntrs = n;
+ *cntr_names = (const char **)names_out;
+ return 0;
+}
+
+static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ int i, err;
+
+ if (!cntr_names_initialized) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ err = init_cntr_names(dd->cntrnames,
+ dd->cntrnameslen,
+ num_driver_cntrs,
+ &num_dev_cntrs,
+ &dev_cntr_names);
+ if (err)
+ return NULL;
+
+ for (i = 0; i < num_driver_cntrs; i++)
+ dev_cntr_names[num_dev_cntrs + i] =
+ driver_cntr_names[i];
+
+ err = init_cntr_names(dd->portcntrnames,
+ dd->portcntrnameslen,
+ 0,
+ &num_port_cntrs,
+ &port_cntr_names);
+ if (err) {
+ kfree(dev_cntr_names);
+ dev_cntr_names = NULL;
+ return NULL;
+ }
+ cntr_names_initialized = 1;
+ }
+
+ if (!port_num)
+ return rdma_alloc_hw_stats_struct(
+ dev_cntr_names,
+ num_dev_cntrs + num_driver_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ else
+ return rdma_alloc_hw_stats_struct(
+ port_cntr_names,
+ num_port_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static u64 hfi1_sps_ints(void)
+{
+ unsigned long flags;
+ struct hfi1_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&hfi1_devs_lock, flags);
+ list_for_each_entry(dd, &hfi1_dev_list, list) {
+ sps_ints += get_all_cpu_total(dd->int_counter);
+ }
+ spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ return sps_ints;
+}
+
+static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ u64 *values;
+ int count;
+
+ if (!port) {
+ u64 *stats = (u64 *)&hfi1_stats;
+ int i;
+
+ hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values);
+ values[num_dev_cntrs] = hfi1_sps_ints();
+ for (i = 1; i < num_driver_cntrs; i++)
+ values[num_dev_cntrs + i] = stats[i];
+ count = num_dev_cntrs + num_driver_cntrs;
+ } else {
+ struct hfi1_ibport *ibp = to_iport(ibdev, port);
+
+ hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values);
+ count = num_port_cntrs;
+ }
+
+ memcpy(stats->value, values, count * sizeof(u64));
+ return count;
+}
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1620,6 +1749,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
struct hfi1_ibdev *dev = &dd->verbs_dev;
struct ib_device *ibdev = &dev->rdi.ibdev;
struct hfi1_pportdata *ppd = dd->pport;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
size_t lcpysz = IB_DEVICE_NAME_MAX;
@@ -1632,6 +1762,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
seqlock_init(&dev->iowait_lock);
+ seqlock_init(&dev->txwait_lock);
INIT_LIST_HEAD(&dev->txwait);
INIT_LIST_HEAD(&dev->memwait);
@@ -1639,20 +1770,24 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
if (ret)
goto err_verbs_txreq;
+ /* Use first-port GUID as node guid */
+ ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX);
+
/*
* The system image GUID is supposed to be the same for all
* HFIs in a single system but since there can be other
* device types in the system, we can't be sure this is unique.
*/
if (!ib_hfi1_sys_image_guid)
- ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid);
+ ib_hfi1_sys_image_guid = ibdev->node_guid;
lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
- ibdev->node_guid = cpu_to_be64(ppd->guid);
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dma_device = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
+ ibdev->alloc_hw_stats = alloc_hw_stats;
+ ibdev->get_hw_stats = get_hw_stats;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1767,6 +1902,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+
+ kfree(dev_cntr_names);
+ kfree(port_cntr_names);
+ cntr_names_initialized = 0;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 1c3815d89eb7..e6b893010e6d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -73,7 +73,6 @@ struct hfi1_packet;
#include "iowait.h"
#define HFI1_MAX_RDMA_ATOMIC 16
-#define HFI1_GUIDS_PER_PORT 5
/*
* Increment this value if any changes that break userspace ABI
@@ -169,8 +168,6 @@ struct hfi1_ibport {
struct rvt_qp __rcu *qp[2];
struct rvt_ibport rvp;
- __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */
-
/* the first 16 entries are sl_to_vl for !OPA */
u8 sl_to_sc[32];
u8 sc_to_sl[32];
@@ -180,18 +177,19 @@ struct hfi1_ibdev {
struct rvt_dev_info rdi; /* Must be first */
/* QP numbers are shared by all IB ports */
- /* protect wait lists */
- seqlock_t iowait_lock;
+ /* protect txwait list */
+ seqlock_t txwait_lock ____cacheline_aligned_in_smp;
struct list_head txwait; /* list for wait verbs_txreq */
struct list_head memwait; /* list for wait kernel memory */
- struct list_head txreq_free;
struct kmem_cache *verbs_txreq_cache;
- struct timer_list mem_timer;
+ u64 n_txwait;
+ u64 n_kmem_wait;
+ /* protect iowait lists */
+ seqlock_t iowait_lock ____cacheline_aligned_in_smp;
u64 n_piowait;
u64 n_piodrain;
- u64 n_txwait;
- u64 n_kmem_wait;
+ struct timer_list mem_timer;
#ifdef CONFIG_DEBUG_FS
/* per HFI debugfs */
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 094ab829ec42..5d23172c470f 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
kmem_cache_free(dev->verbs_txreq_cache, tx);
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&dev->txwait_lock);
if (!list_empty(&dev->txwait)) {
struct iowait *wait;
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&dev->txwait_lock, flags);
wait = list_first_entry(&dev->txwait, struct iowait,
list);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
/* refcount held until actual wake up */
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&dev->txwait_lock, flags);
hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&dev->txwait_lock, seq));
}
struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
@@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
{
struct verbs_txreq *tx = ERR_PTR(-EBUSY);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&dev->txwait_lock);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
@@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
dev->n_txwait++;
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
+ priv->s_iowait.lock = &dev->txwait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
out:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&dev->txwait_lock);
return tx;
}
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 5660897593ba..76216f2ef35a 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -65,6 +65,7 @@ struct verbs_txreq {
struct sdma_engine *sde;
struct send_context *psc;
u16 hdr_dwords;
+ u16 s_cur_size;
};
struct hfi1_ibdev;
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 728e0a030d2e..2b5982f743ef 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
if (list_empty(&qp->rspwait)) {
qp->r_flags |=
RVT_R_RSP_NAK;
- atomic_inc(
- &qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(
&qp->rspwait,
&rcd->qp_wait_list);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 2097512e75aa..031433cb7206 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -941,8 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- struct ib_wc wc;
- unsigned i;
u32 opcode;
u32 psn;
@@ -988,22 +986,8 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
qp->s_last = s_last;
/* see post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_put_swqe(wqe);
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@@ -1032,9 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct qib_ibport *ibp)
{
- struct ib_wc wc;
- unsigned i;
-
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1044,28 +1025,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
@@ -2112,8 +2079,7 @@ send_last:
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (len > pmtu)
- qp->r_psn += (len - 1) / pmtu;
+ qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index de1bde5950f5..e54a2feeeb10 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
- unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -805,32 +804,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->s_last = last;
/* See post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- /* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.qp = &qp->ibqp;
- if (status == IB_WC_SUCCESS)
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
+ rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 954f15064514..4b54c0ddd08a 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,19 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_qib_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
* System image GUID.
*/
__be64 ib_qib_sys_image_guid;
@@ -464,7 +451,7 @@ static void mem_timer(unsigned long data)
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
}
@@ -477,8 +464,7 @@ static void mem_timer(unsigned long data)
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
@@ -762,7 +748,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -772,8 +758,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
} else
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
}
@@ -808,7 +793,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
break;
avail -= qpp->s_tx->txreq.sg_count;
list_del_init(&qpp->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
qps[n++] = qp;
}
@@ -822,8 +807,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
qib_schedule_send(qp);
}
spin_unlock(&qp->s_lock);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
@@ -1288,7 +1272,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
qps[n++] = qp;
}
dd->f_wantpiobuf_intr(dd, 0);
@@ -1306,8 +1290,7 @@ full:
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify qib_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 6d9904a4a0ab..4d0b6992e847 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
- struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
- smp_read_barrier_depends(); /* see rvt_cq_exit */
- worker = cq->rdi->worker;
- if (likely(worker)) {
+ spin_lock(&cq->rdi->n_cqs_lock);
+ if (likely(cq->rdi->worker)) {
cq->notify = RVT_CQ_NONE;
cq->triggered++;
- kthread_queue_work(worker, &cq->comptask);
+ kthread_queue_work(cq->rdi->worker, &cq->comptask);
}
+ spin_unlock(&cq->rdi->n_cqs_lock);
}
spin_unlock_irqrestore(&cq->lock, flags);
@@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
}
}
- spin_lock(&rdi->n_cqs_lock);
+ spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
rdi->n_cqs_allocated++;
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip) {
spin_lock_irq(&rdi->pending_lock);
@@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
struct rvt_dev_info *rdi = cq->rdi;
kthread_flush_work(&cq->comptask);
- spin_lock(&rdi->n_cqs_lock);
+ spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--;
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
@@ -504,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
*/
int rvt_driver_cq_init(struct rvt_dev_info *rdi)
{
- int ret = 0;
int cpu;
- struct task_struct *task;
+ struct kthread_worker *worker;
if (rdi->worker)
return 0;
+
spin_lock_init(&rdi->n_cqs_lock);
- rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
- if (!rdi->worker)
- return -ENOMEM;
- kthread_init_worker(rdi->worker);
- task = kthread_create_on_node(
- kthread_worker_fn,
- rdi->worker,
- rdi->dparms.node,
- "%s", rdi->dparms.cq_name);
- if (IS_ERR(task)) {
- kfree(rdi->worker);
- rdi->worker = NULL;
- return PTR_ERR(task);
- }
- set_user_nice(task, MIN_NICE);
cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
- kthread_bind(task, cpu);
- wake_up_process(task);
- return ret;
+ worker = kthread_create_worker_on_cpu(cpu, 0,
+ "%s", rdi->dparms.cq_name);
+ if (IS_ERR(worker))
+ return PTR_ERR(worker);
+
+ set_user_nice(worker->task, MIN_NICE);
+ rdi->worker = worker;
+ return 0;
}
/**
@@ -541,13 +530,14 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
{
struct kthread_worker *worker;
- worker = rdi->worker;
- if (!worker)
+ /* block future queuing from send_complete() */
+ spin_lock_irq(&rdi->n_cqs_lock);
+ if (!rdi->worker) {
+ spin_unlock_irq(&rdi->n_cqs_lock);
return;
- /* blocks future queuing from send_complete() */
+ }
rdi->worker = NULL;
- smp_wmb(); /* See rdi_cq_enter */
- kthread_flush_worker(worker);
- kthread_stop(worker->task);
- kfree(worker);
+ spin_unlock_irq(&rdi->n_cqs_lock);
+
+ kthread_destroy_worker(worker);
}
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 983d319ac976..05c8c2afb0e3 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
goto bail;
mqp->qp = qp;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
bail:
return mqp;
@@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
struct rvt_qp *qp = mqp->qp;
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
kfree(mqp);
}
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 46b64970058e..52fd15276ee6 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include "vt.h"
#include "mr.h"
+#include "trace.h"
/**
* rvt_driver_mr_init - Init MR resources per driver
@@ -84,6 +85,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
lkey_table_size = rdi->dparms.lkey_table_size;
}
rdi->lkey_table.max = 1 << lkey_table_size;
+ rdi->lkey_table.shift = 32 - lkey_table_size;
lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
rdi->lkey_table.table = (struct rvt_mregion __rcu **)
vmalloc_node(lk_tab_size, rdi->dparms.node);
@@ -402,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
n++;
if (n == RVT_SEGSZ) {
m++;
@@ -506,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
mr->mr.map[m]->segs[n].length = ps;
+ trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
mr->mr.length += ps;
return 0;
@@ -692,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
+ trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
if (++n == RVT_SEGSZ) {
m++;
n = 0;
@@ -774,7 +779,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_mregion *mr;
unsigned n, m;
size_t off;
- struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
/*
* We use LKEY == zero for kernel virtual addresses
@@ -782,12 +786,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
*/
rcu_read_lock();
if (sge->lkey == 0) {
+ struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
isge->mr = mr;
@@ -798,8 +804,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0;
goto ok;
}
- mr = rcu_dereference(
- rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+ mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
@@ -809,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
@@ -887,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
mr = rcu_dereference(rdi->dma_mr);
if (!mr)
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
sge->mr = mr;
@@ -899,8 +904,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
goto ok;
}
- mr = rcu_dereference(
- rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+ mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
@@ -909,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 6500c3b5a89c..2a13ac660f2b 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+EXPORT_SYMBOL(ib_rvt_wc_opcode);
+
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@@ -884,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
return ret;
bail_ip:
- kref_put(&qp->ip->ref, rvt_release_mmap_info);
+ if (qp->ip)
+ kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index 6c0457db5499..e2d23acb6a7d 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -45,143 +45,10 @@
*
*/
-#undef TRACE_SYSTEM_VAR
-#define TRACE_SYSTEM_VAR rdmavt
-
-#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __RDMAVT_TRACE_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_seq.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/rdma_vt.h>
-
#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi))
#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi))
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rdmavt
-
-TRACE_EVENT(rvt_dbg,
- TP_PROTO(struct rvt_dev_info *rdi,
- const char *msg),
- TP_ARGS(rdi, msg),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(rdi)
- __string(msg, msg)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(rdi);
- __assign_str(msg, msg);
- ),
- TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_qphash
-DECLARE_EVENT_CLASS(rvt_qphash_template,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, bucket)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->bucket = bucket;
- ),
- TP_printk(
- "[%s] qpn 0x%x bucket %u",
- __get_str(dev),
- __entry->qpn,
- __entry->bucket
- )
-);
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_tx
-
-#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
-#define show_wr_opcode(opcode) \
-__print_symbolic(opcode, \
- wr_opcode_name(RDMA_WRITE), \
- wr_opcode_name(RDMA_WRITE_WITH_IMM), \
- wr_opcode_name(SEND), \
- wr_opcode_name(SEND_WITH_IMM), \
- wr_opcode_name(RDMA_READ), \
- wr_opcode_name(ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
- wr_opcode_name(LSO), \
- wr_opcode_name(SEND_WITH_INV), \
- wr_opcode_name(RDMA_READ_WITH_INV), \
- wr_opcode_name(LOCAL_INV), \
- wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
-
-#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
-
-TRACE_EVENT(
- rvt_post_one_wr,
- TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
- TP_ARGS(qp, wqe),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
- __field(u64, wr_id)
- __field(u32, qpn)
- __field(u32, psn)
- __field(u32, lpsn)
- __field(u32, length)
- __field(u32, opcode)
- __field(u32, size)
- __field(u32, avail)
- __field(u32, head)
- __field(u32, last)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
- __entry->wr_id = wqe->wr.wr_id;
- __entry->qpn = qp->ibqp.qp_num;
- __entry->psn = wqe->psn;
- __entry->lpsn = wqe->lpsn;
- __entry->length = wqe->length;
- __entry->opcode = wqe->wr.opcode;
- __entry->size = qp->s_size;
- __entry->avail = qp->s_avail;
- __entry->head = qp->s_head;
- __entry->last = qp->s_last;
- ),
- TP_printk(
- POS_PRN,
- __get_str(dev),
- __entry->wr_id,
- __entry->qpn,
- __entry->psn,
- __entry->lpsn,
- __entry->length,
- __entry->opcode, show_wr_opcode(__entry->opcode),
- __entry->size,
- __entry->avail,
- __entry->head,
- __entry->last
- )
-);
-
-#endif /* __RDMAVT_TRACE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
+#include "trace_rvt.h"
+#include "trace_qp.h"
+#include "trace_tx.h"
+#include "trace_mr.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
new file mode 100644
index 000000000000..3318a6c36373
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_MR_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_mr.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_mr
+DECLARE_EVENT_CLASS(
+ rvt_mr_template,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
+ __field(void *, vaddr)
+ __field(struct page *, page)
+ __field(size_t, len)
+ __field(u32, lkey)
+ __field(u16, m)
+ __field(u16, n)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
+ __entry->vaddr = v;
+ __entry->page = virt_to_page(v);
+ __entry->m = m;
+ __entry->n = n;
+ __entry->len = len;
+ ),
+ TP_printk(
+ "[%s] vaddr %p page %p m %u n %u len %ld",
+ __get_str(dev),
+ __entry->vaddr,
+ __entry->page,
+ __entry->m,
+ __entry->n,
+ __entry->len
+ )
+);
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_page_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_fmr_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_user_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+#endif /* __RVT_TRACE_MR_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_mr
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
new file mode 100644
index 000000000000..4c77a3119bda
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_QP_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qp
+
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, bucket)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->bucket = bucket;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x bucket %u",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->bucket
+ )
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+
+#endif /* __RVT_TRACE_QP_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_qp
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h
new file mode 100644
index 000000000000..746f33461d9a
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RVT_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt
+
+TRACE_EVENT(rvt_dbg,
+ TP_PROTO(struct rvt_dev_info *rdi,
+ const char *msg),
+ TP_ARGS(rdi, msg),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(rdi)
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(rdi);
+ __assign_str(msg, msg);
+ ),
+ TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#endif /* __RVT_TRACE_MISC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rvt
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
new file mode 100644
index 000000000000..0e03173662d8
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_TX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
+#define show_wr_opcode(opcode) \
+__print_symbolic(opcode, \
+ wr_opcode_name(RDMA_WRITE), \
+ wr_opcode_name(RDMA_WRITE_WITH_IMM), \
+ wr_opcode_name(SEND), \
+ wr_opcode_name(SEND_WITH_IMM), \
+ wr_opcode_name(RDMA_READ), \
+ wr_opcode_name(ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(LSO), \
+ wr_opcode_name(SEND_WITH_INV), \
+ wr_opcode_name(RDMA_READ_WITH_INV), \
+ wr_opcode_name(LOCAL_INV), \
+ wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+ rvt_post_one_wr,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+ TP_ARGS(qp, wqe),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, psn)
+ __field(u32, lpsn)
+ __field(u32, length)
+ __field(u32, opcode)
+ __field(u32, size)
+ __field(u32, avail)
+ __field(u32, head)
+ __field(u32, last)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->psn = wqe->psn;
+ __entry->lpsn = wqe->lpsn;
+ __entry->length = wqe->length;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->size = qp->s_size;
+ __entry->avail = qp->s_avail;
+ __entry->head = qp->s_head;
+ __entry->last = qp->s_last;
+ ),
+ TP_printk(
+ POS_PRN,
+ __get_str(dev),
+ __entry->wr_id,
+ __entry->qpn,
+ __entry->psn,
+ __entry->lpsn,
+ __entry->length,
+ __entry->opcode, show_wr_opcode(__entry->opcode),
+ __entry->size,
+ __entry->avail,
+ __entry->head,
+ __entry->last
+ )
+);
+
+#endif /* __RVT_TRACE_TX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tx
+#include <trace/define_trace.h>
+