summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/ibm/ibmvnic.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/ibm/ibmvnic.c')
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c552
1 files changed, 386 insertions, 166 deletions
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e19a6bb3f444..3808148c1fc7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *,
static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
struct ibmvnic_sub_crq_queue *);
static int ibmvnic_poll(struct napi_struct *napi, int data);
+static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
static void send_query_map(struct ibmvnic_adapter *adapter);
static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
static int send_request_unmap(struct ibmvnic_adapter *, u8);
@@ -114,6 +116,8 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
static void free_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb);
static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+static void flush_reset_queue(struct ibmvnic_adapter *adapter);
+static void print_subcrq_error(struct device *dev, int rc, const char *func);
struct ibmvnic_stat {
char name[ETH_GSTRING_LEN];
@@ -191,9 +195,8 @@ static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter)
struct ibmvnic_sub_crq_queue **rxqs;
struct ibmvnic_sub_crq_queue **txqs;
int num_rxqs, num_txqs;
- int rc, i;
+ int i;
- rc = 0;
rxqs = adapter->rx_scrq;
txqs = adapter->tx_scrq;
num_txqs = adapter->num_active_tx_scrqs;
@@ -231,11 +234,17 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
(*stragglers)--;
}
/* atomic write is safer than writing bit by bit directly */
- for (i = 0; i < stride; i++) {
- cpumask_set_cpu(*cpu, mask);
- *cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
- nr_cpu_ids, false);
+ for_each_online_cpu_wrap(i, *cpu) {
+ if (!stride--) {
+ /* For the next queue we start from the first
+ * unused CPU in this queue
+ */
+ *cpu = i;
+ break;
+ }
+ cpumask_set_cpu(i, mask);
}
+
/* set queue affinity mask */
cpumask_copy(queue->affinity_mask, mask);
rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
@@ -250,10 +259,11 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq;
struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq;
struct ibmvnic_sub_crq_queue *queue;
- int num_rxqs = adapter->num_active_rx_scrqs;
- int num_txqs = adapter->num_active_tx_scrqs;
+ int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
+ int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
int total_queues, stride, stragglers, i;
- unsigned int num_cpu, cpu;
+ unsigned int num_cpu, cpu = 0;
+ bool is_rx_queue;
int rc = 0;
netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__);
@@ -270,33 +280,33 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
stride = max_t(int, num_cpu / total_queues, 1);
/* number of leftover cpu's */
stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
- /* next available cpu to assign irq to */
- cpu = cpumask_next(-1, cpu_online_mask);
- for (i = 0; i < num_txqs; i++) {
- queue = txqs[i];
+ for (i = 0; i < total_queues; i++) {
+ is_rx_queue = false;
+ /* balance core load by alternating rx and tx assignments
+ * ex: TX0 -> RX0 -> TX1 -> RX1 etc.
+ */
+ if ((i % 2 == 1 && i_rxqs < num_rxqs) || i_txqs == num_txqs) {
+ queue = rxqs[i_rxqs++];
+ is_rx_queue = true;
+ } else {
+ queue = txqs[i_txqs++];
+ }
+
rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers,
stride);
if (rc)
goto out;
- if (!queue)
+ if (!queue || is_rx_queue)
continue;
rc = __netif_set_xps_queue(adapter->netdev,
cpumask_bits(queue->affinity_mask),
- i, XPS_CPUS);
+ i_txqs - 1, XPS_CPUS);
if (rc)
netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n",
- __func__, i, rc);
- }
-
- for (i = 0; i < num_rxqs; i++) {
- queue = rxqs[i];
- rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers,
- stride);
- if (rc)
- goto out;
+ __func__, i_txqs - 1, rc);
}
out:
@@ -746,6 +756,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter)
adapter->rx_pool[i].active = 0;
}
+static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter)
+{
+ if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) {
+ netdev_info(adapter->netdev,
+ "set max ind descs from %u to safe limit %u\n",
+ adapter->cur_max_ind_descs,
+ IBMVNIC_SAFE_IND_DESC);
+ adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC;
+ }
+}
+
static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
struct ibmvnic_rx_pool *pool)
{
@@ -833,7 +854,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
/* if send_subcrq_indirect queue is full, flush to VIOS */
- if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
+ if (ind_bufp->index == adapter->cur_max_ind_descs ||
i == count - 1) {
lpar_rc =
send_subcrq_indirect(adapter, handle,
@@ -852,6 +873,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
failure:
if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
+
+ /* Detect platform limit H_PARAMETER */
+ if (lpar_rc == H_PARAMETER)
+ ibmvnic_set_safe_max_ind_descs(adapter);
+
+ /* For all error case, temporarily drop only this batch
+ * Rely on TCP/IP retransmissions to retry and recover
+ */
for (i = ind_bufp->index - 1; i >= 0; --i) {
struct ibmvnic_rx_buff *rx_buff;
@@ -1502,8 +1531,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
static int ibmvnic_login(struct net_device *netdev)
{
+ unsigned long flags, timeout = msecs_to_jiffies(20000);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- unsigned long timeout = msecs_to_jiffies(20000);
int retry_count = 0;
int retries = 10;
bool retry;
@@ -1524,11 +1553,9 @@ static int ibmvnic_login(struct net_device *netdev)
if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
- netdev_warn(netdev, "Login timed out, retrying...\n");
- retry = true;
- adapter->init_done_rc = 0;
- retry_count++;
- continue;
+ netdev_warn(netdev, "Login timed out\n");
+ adapter->login_pending = false;
+ goto partial_reset;
}
if (adapter->init_done_rc == ABORTED) {
@@ -1570,10 +1597,69 @@ static int ibmvnic_login(struct net_device *netdev)
"SCRQ irq initialization failed\n");
return rc;
}
+ /* Default/timeout error handling, reset and start fresh */
} else if (adapter->init_done_rc) {
netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
adapter->init_done_rc);
- return -EIO;
+
+partial_reset:
+ /* adapter login failed, so free any CRQs or sub-CRQs
+ * and register again before attempting to login again.
+ * If we don't do this then the VIOS may think that
+ * we are already logged in and reject any subsequent
+ * attempts
+ */
+ netdev_warn(netdev,
+ "Freeing and re-registering CRQs before attempting to login again\n");
+ retry = true;
+ adapter->init_done_rc = 0;
+ release_sub_crqs(adapter, true);
+ /* Much of this is similar logic as ibmvnic_probe(),
+ * we are essentially re-initializing communication
+ * with the server. We really should not run any
+ * resets/failovers here because this is already a form
+ * of reset and we do not want parallel resets occurring
+ */
+ do {
+ reinit_init_done(adapter);
+ /* Clear any failovers we got in the previous
+ * pass since we are re-initializing the CRQ
+ */
+ adapter->failover_pending = false;
+ release_crq_queue(adapter);
+ /* If we don't sleep here then we risk an
+ * unnecessary failover event from the VIOS.
+ * This is a known VIOS issue caused by a vnic
+ * device freeing and registering a CRQ too
+ * quickly.
+ */
+ msleep(1500);
+ /* Avoid any resets, since we are currently
+ * resetting.
+ */
+ spin_lock_irqsave(&adapter->rwi_lock, flags);
+ flush_reset_queue(adapter);
+ spin_unlock_irqrestore(&adapter->rwi_lock,
+ flags);
+
+ rc = init_crq_queue(adapter);
+ if (rc) {
+ netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+ rc);
+ return -EIO;
+ }
+
+ rc = ibmvnic_reset_init(adapter, false);
+ if (rc)
+ netdev_err(netdev, "login recovery: Reset init failed %d\n",
+ rc);
+ /* IBMVNIC_CRQ_INIT will return EAGAIN if it
+ * fails, since ibmvnic_reset_init will free
+ * irq's in failure, we won't be able to receive
+ * new CRQs so we need to keep trying. probe()
+ * handles this similarly.
+ */
+ } while (rc == -EAGAIN && retry_count++ < retries);
}
} while (retry);
@@ -1585,12 +1671,22 @@ static int ibmvnic_login(struct net_device *netdev)
static void release_login_buffer(struct ibmvnic_adapter *adapter)
{
+ if (!adapter->login_buf)
+ return;
+
+ dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
+ adapter->login_buf_sz, DMA_TO_DEVICE);
kfree(adapter->login_buf);
adapter->login_buf = NULL;
}
static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
{
+ if (!adapter->login_rsp_buf)
+ return;
+
+ dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
+ adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
kfree(adapter->login_rsp_buf);
adapter->login_rsp_buf = NULL;
}
@@ -1813,7 +1909,14 @@ static int __ibmvnic_open(struct net_device *netdev)
if (prev_state == VNIC_CLOSED)
enable_irq(adapter->tx_scrq[i]->irq);
enable_scrq_irq(adapter, adapter->tx_scrq[i]);
- netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
+ /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL
+ * resets, don't reset the stats because there could be batched
+ * skb's waiting to be sent. If we reset dql stats, we risk
+ * num_completed being greater than num_queued. This will cause
+ * a BUG_ON in dql_completed().
+ */
+ if (adapter->reset_reason != VNIC_RESET_NON_FATAL)
+ netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
}
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
@@ -2061,63 +2164,49 @@ static int ibmvnic_close(struct net_device *netdev)
}
/**
- * build_hdr_data - creates L2/L3/L4 header data buffer
+ * get_hdr_lens - fills list of L2/L3/L4 hdr lens
* @hdr_field: bitfield determining needed headers
* @skb: socket buffer
- * @hdr_len: array of header lengths
- * @hdr_data: buffer to write the header to
+ * @hdr_len: array of header lengths to be filled
*
* Reads hdr_field to determine which headers are needed by firmware.
* Builds a buffer containing these headers. Saves individual header
* lengths and total buffer length to be used to build descriptors.
+ *
+ * Return: total len of all headers
*/
-static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
- int *hdr_len, u8 *hdr_data)
+static int get_hdr_lens(u8 hdr_field, struct sk_buff *skb,
+ int *hdr_len)
{
int len = 0;
- u8 *hdr;
- if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb))
- hdr_len[0] = sizeof(struct vlan_ethhdr);
- else
- hdr_len[0] = sizeof(struct ethhdr);
+
+ if ((hdr_field >> 6) & 1) {
+ hdr_len[0] = skb_mac_header_len(skb);
+ len += hdr_len[0];
+ }
+
+ if ((hdr_field >> 5) & 1) {
+ hdr_len[1] = skb_network_header_len(skb);
+ len += hdr_len[1];
+ }
+
+ if (!((hdr_field >> 4) & 1))
+ return len;
if (skb->protocol == htons(ETH_P_IP)) {
- hdr_len[1] = ip_hdr(skb)->ihl * 4;
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
hdr_len[2] = tcp_hdrlen(skb);
else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
hdr_len[2] = sizeof(struct udphdr);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
- hdr_len[1] = sizeof(struct ipv6hdr);
if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
hdr_len[2] = tcp_hdrlen(skb);
else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
hdr_len[2] = sizeof(struct udphdr);
- } else if (skb->protocol == htons(ETH_P_ARP)) {
- hdr_len[1] = arp_hdr_len(skb->dev);
- hdr_len[2] = 0;
}
- memset(hdr_data, 0, 120);
- if ((hdr_field >> 6) & 1) {
- hdr = skb_mac_header(skb);
- memcpy(hdr_data, hdr, hdr_len[0]);
- len += hdr_len[0];
- }
-
- if ((hdr_field >> 5) & 1) {
- hdr = skb_network_header(skb);
- memcpy(hdr_data + len, hdr, hdr_len[1]);
- len += hdr_len[1];
- }
-
- if ((hdr_field >> 4) & 1) {
- hdr = skb_transport_header(skb);
- memcpy(hdr_data + len, hdr, hdr_len[2]);
- len += hdr_len[2];
- }
- return len;
+ return len + hdr_len[2];
}
/**
@@ -2130,12 +2219,14 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb,
*
* Creates header and, if needed, header extension descriptors and
* places them in a descriptor array, scrq_arr
+ *
+ * Return: Number of header descs
*/
static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
union sub_crq *scrq_arr)
{
- union sub_crq hdr_desc;
+ union sub_crq *hdr_desc;
int tmp_len = len;
int num_descs = 0;
u8 *data, *cur;
@@ -2144,28 +2235,26 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
while (tmp_len > 0) {
cur = hdr_data + len - tmp_len;
- memset(&hdr_desc, 0, sizeof(hdr_desc));
- if (cur != hdr_data) {
- data = hdr_desc.hdr_ext.data;
+ hdr_desc = &scrq_arr[num_descs];
+ if (num_descs) {
+ data = hdr_desc->hdr_ext.data;
tmp = tmp_len > 29 ? 29 : tmp_len;
- hdr_desc.hdr_ext.first = IBMVNIC_CRQ_CMD;
- hdr_desc.hdr_ext.type = IBMVNIC_HDR_EXT_DESC;
- hdr_desc.hdr_ext.len = tmp;
+ hdr_desc->hdr_ext.first = IBMVNIC_CRQ_CMD;
+ hdr_desc->hdr_ext.type = IBMVNIC_HDR_EXT_DESC;
+ hdr_desc->hdr_ext.len = tmp;
} else {
- data = hdr_desc.hdr.data;
+ data = hdr_desc->hdr.data;
tmp = tmp_len > 24 ? 24 : tmp_len;
- hdr_desc.hdr.first = IBMVNIC_CRQ_CMD;
- hdr_desc.hdr.type = IBMVNIC_HDR_DESC;
- hdr_desc.hdr.len = tmp;
- hdr_desc.hdr.l2_len = (u8)hdr_len[0];
- hdr_desc.hdr.l3_len = cpu_to_be16((u16)hdr_len[1]);
- hdr_desc.hdr.l4_len = (u8)hdr_len[2];
- hdr_desc.hdr.flag = hdr_field << 1;
+ hdr_desc->hdr.first = IBMVNIC_CRQ_CMD;
+ hdr_desc->hdr.type = IBMVNIC_HDR_DESC;
+ hdr_desc->hdr.len = tmp;
+ hdr_desc->hdr.l2_len = (u8)hdr_len[0];
+ hdr_desc->hdr.l3_len = cpu_to_be16((u16)hdr_len[1]);
+ hdr_desc->hdr.l4_len = (u8)hdr_len[2];
+ hdr_desc->hdr.flag = hdr_field << 1;
}
memcpy(data, cur, tmp);
tmp_len -= tmp;
- *scrq_arr = hdr_desc;
- scrq_arr++;
num_descs++;
}
@@ -2188,13 +2277,11 @@ static void build_hdr_descs_arr(struct sk_buff *skb,
int *num_entries, u8 hdr_field)
{
int hdr_len[3] = {0, 0, 0};
- u8 hdr_data[140] = {0};
int tot_len;
- tot_len = build_hdr_data(hdr_field, skb, hdr_len,
- hdr_data);
- *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len,
- indir_arr + 1);
+ tot_len = get_hdr_lens(hdr_field, skb, hdr_len);
+ *num_entries += create_hdr_descs(hdr_field, skb_mac_header(skb),
+ tot_len, hdr_len, indir_arr + 1);
}
static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
@@ -2244,9 +2331,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
tx_pool->num_buffers - 1 :
tx_pool->consumer_index - 1;
tx_buff = &tx_pool->tx_buff[index];
- adapter->netdev->stats.tx_packets--;
- adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
- adapter->tx_stats_buffers[queue_num].packets--;
+ adapter->tx_stats_buffers[queue_num].batched_packets--;
adapter->tx_stats_buffers[queue_num].bytes -=
tx_buff->skb->len;
dev_kfree_skb_any(tx_buff->skb);
@@ -2271,8 +2356,29 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
}
}
+static int send_subcrq_direct(struct ibmvnic_adapter *adapter,
+ u64 remote_handle, u64 *entry)
+{
+ unsigned int ua = adapter->vdev->unit_address;
+ struct device *dev = &adapter->vdev->dev;
+ int rc;
+
+ /* Make sure the hypervisor sees the complete request */
+ dma_wmb();
+ rc = plpar_hcall_norets(H_SEND_SUB_CRQ, ua,
+ cpu_to_be64(remote_handle),
+ cpu_to_be64(entry[0]), cpu_to_be64(entry[1]),
+ cpu_to_be64(entry[2]), cpu_to_be64(entry[3]));
+
+ if (rc)
+ print_subcrq_error(dev, rc, __func__);
+
+ return rc;
+}
+
static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
- struct ibmvnic_sub_crq_queue *tx_scrq)
+ struct ibmvnic_sub_crq_queue *tx_scrq,
+ bool indirect)
{
struct ibmvnic_ind_xmit_queue *ind_bufp;
u64 dma_addr;
@@ -2287,17 +2393,35 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
if (!entries)
return 0;
- rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
- if (rc)
- ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
+
+ if (indirect)
+ rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
else
+ rc = send_subcrq_direct(adapter, handle,
+ (u64 *)ind_bufp->indir_arr);
+
+ if (rc) {
+ dev_err_ratelimited(&adapter->vdev->dev,
+ "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n",
+ rc, entries, &dma_addr, handle);
+ /* Detect platform limit H_PARAMETER */
+ if (rc == H_PARAMETER)
+ ibmvnic_set_safe_max_ind_descs(adapter);
+
+ /* For all error case, temporarily drop only this batch
+ * Rely on TCP/IP retransmissions to retry and recover
+ */
+ ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
+ } else {
ind_bufp->index = 0;
- return 0;
+ }
+ return rc;
}
static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ u32 cur_max_ind_descs = adapter->cur_max_ind_descs;
int queue_num = skb_get_queue_mapping(skb);
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
struct device *dev = &adapter->vdev->dev;
@@ -2311,13 +2435,16 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
unsigned int tx_map_failed = 0;
union sub_crq indir_arr[16];
unsigned int tx_dropped = 0;
- unsigned int tx_packets = 0;
+ unsigned int tx_dpackets = 0;
+ unsigned int tx_bpackets = 0;
unsigned int tx_bytes = 0;
dma_addr_t data_dma_addr;
struct netdev_queue *txq;
unsigned long lpar_rc;
+ unsigned int skblen;
union sub_crq tx_crq;
unsigned int offset;
+ bool use_scrq_send_direct = false;
int num_entries = 1;
unsigned char *dst;
int bufidx = 0;
@@ -2345,7 +2472,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_dropped++;
tx_send_failed++;
ret = NETDEV_TX_OK;
- ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+ lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
+ if (lpar_rc != H_SUCCESS)
+ goto tx_err;
goto out;
}
@@ -2360,8 +2489,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
dev_kfree_skb_any(skb);
tx_send_failed++;
tx_dropped++;
- ibmvnic_tx_scrq_flush(adapter, tx_scrq);
ret = NETDEV_TX_OK;
+ lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
+ if (lpar_rc != H_SUCCESS)
+ goto tx_err;
goto out;
}
@@ -2373,6 +2504,20 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
memset(dst, 0, tx_pool->buf_size);
data_dma_addr = ltb->addr + offset;
+ /* if we are going to send_subcrq_direct this then we need to
+ * update the checksum before copying the data into ltb. Essentially
+ * these packets force disable CSO so that we can guarantee that
+ * FW does not need header info and we can send direct. Also, vnic
+ * server must be able to xmit standard packets without header data
+ */
+ if (*hdrs == 0 && !skb_is_gso(skb) &&
+ !ind_bufp->index && !netdev_xmit_more()) {
+ use_scrq_send_direct = true;
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ use_scrq_send_direct = false;
+ }
+
if (skb_shinfo(skb)->nr_frags) {
int cur, i;
@@ -2392,16 +2537,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
skb_copy_from_linear_data(skb, dst, skb->len);
}
- /* post changes to long_term_buff *dst before VIOS accessing it */
- dma_wmb();
-
tx_pool->consumer_index =
(tx_pool->consumer_index + 1) % tx_pool->num_buffers;
tx_buff = &tx_pool->tx_buff[bufidx];
+
+ /* Sanity checks on our free map to make sure it points to an index
+ * that is not being occupied by another skb. If skb memory is
+ * not freed then we see congestion control kick in and halt tx.
+ */
+ if (unlikely(tx_buff->skb)) {
+ dev_warn_ratelimited(dev, "TX free map points to untracked skb (%s %d idx=%d)\n",
+ skb_is_gso(skb) ? "tso_pool" : "tx_pool",
+ queue_num, bufidx);
+ dev_kfree_skb_any(tx_buff->skb);
+ }
+
tx_buff->skb = skb;
tx_buff->index = bufidx;
tx_buff->pool_index = queue_num;
+ skblen = skb->len;
memset(&tx_crq, 0, sizeof(tx_crq));
tx_crq.v1.first = IBMVNIC_CRQ_CMD;
@@ -2445,6 +2600,19 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.flags1 |= IBMVNIC_TX_LSO;
tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
hdrs += 2;
+ } else if (use_scrq_send_direct) {
+ /* See above comment, CSO disabled with direct xmit */
+ tx_crq.v1.flags1 &= ~(IBMVNIC_TX_CHKSUM_OFFLOAD);
+ ind_bufp->index = 1;
+ tx_buff->num_entries = 1;
+ netdev_tx_sent_queue(txq, skb->len);
+ ind_bufp->indir_arr[0] = tx_crq;
+ lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, false);
+ if (lpar_rc != H_SUCCESS)
+ goto tx_err;
+
+ tx_dpackets++;
+ goto early_exit;
}
if ((*hdrs >> 7) & 1)
@@ -2453,8 +2621,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.n_crq_elem = num_entries;
tx_buff->num_entries = num_entries;
/* flush buffer if current entry can not fit */
- if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
- lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+ if (num_entries + ind_bufp->index > cur_max_ind_descs) {
+ lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
if (lpar_rc != H_SUCCESS)
goto tx_flush_err;
}
@@ -2462,23 +2630,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
indir_arr[0] = tx_crq;
memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0],
num_entries * sizeof(struct ibmvnic_generic_scrq));
+
ind_bufp->index += num_entries;
if (__netdev_tx_sent_queue(txq, skb->len,
netdev_xmit_more() &&
- ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
- lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+ ind_bufp->index < cur_max_ind_descs)) {
+ lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true);
if (lpar_rc != H_SUCCESS)
goto tx_err;
}
+ tx_bpackets++;
+
+early_exit:
if (atomic_add_return(num_entries, &tx_scrq->used)
>= adapter->req_tx_entries_per_subcrq) {
netdev_dbg(netdev, "Stopping queue %d\n", queue_num);
netif_stop_subqueue(netdev, queue_num);
}
- tx_packets++;
- tx_bytes += skb->len;
+ tx_bytes += skblen;
txq_trans_cond_update(txq);
ret = NETDEV_TX_OK;
goto out;
@@ -2505,12 +2676,10 @@ tx_err:
}
out:
rcu_read_unlock();
- netdev->stats.tx_dropped += tx_dropped;
- netdev->stats.tx_bytes += tx_bytes;
- netdev->stats.tx_packets += tx_packets;
adapter->tx_send_failed += tx_send_failed;
adapter->tx_map_failed += tx_map_failed;
- adapter->tx_stats_buffers[queue_num].packets += tx_packets;
+ adapter->tx_stats_buffers[queue_num].batched_packets += tx_bpackets;
+ adapter->tx_stats_buffers[queue_num].direct_packets += tx_dpackets;
adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
@@ -3309,6 +3478,25 @@ err:
return -ret;
}
+static void ibmvnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ for (i = 0; i < adapter->req_rx_queues; i++) {
+ stats->rx_packets += adapter->rx_stats_buffers[i].packets;
+ stats->rx_bytes += adapter->rx_stats_buffers[i].bytes;
+ }
+
+ for (i = 0; i < adapter->req_tx_queues; i++) {
+ stats->tx_packets += adapter->tx_stats_buffers[i].batched_packets;
+ stats->tx_packets += adapter->tx_stats_buffers[i].direct_packets;
+ stats->tx_bytes += adapter->tx_stats_buffers[i].bytes;
+ stats->tx_dropped += adapter->tx_stats_buffers[i].dropped_packets;
+ }
+}
+
static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ibmvnic_adapter *adapter = netdev_priv(dev);
@@ -3424,23 +3612,20 @@ restart_poll:
length = skb->len;
napi_gro_receive(napi, skb); /* send it up */
- netdev->stats.rx_packets++;
- netdev->stats.rx_bytes += length;
adapter->rx_stats_buffers[scrq_num].packets++;
adapter->rx_stats_buffers[scrq_num].bytes += length;
frames_processed++;
}
if (adapter->state != VNIC_CLOSING &&
- ((atomic_read(&adapter->rx_pool[scrq_num].available) <
- adapter->req_rx_add_entries_per_subcrq / 2) ||
- frames_processed < budget))
+ (atomic_read(&adapter->rx_pool[scrq_num].available) <
+ adapter->req_rx_add_entries_per_subcrq / 2))
replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]);
if (frames_processed < budget) {
if (napi_complete_done(napi, frames_processed)) {
enable_scrq_irq(adapter, rx_scrq);
if (pending_scrq(adapter, rx_scrq)) {
- if (napi_reschedule(napi)) {
+ if (napi_schedule(napi)) {
disable_scrq_irq(adapter, rx_scrq);
goto restart_poll;
}
@@ -3536,6 +3721,7 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
.ndo_set_rx_mode = ibmvnic_set_multi,
.ndo_set_mac_address = ibmvnic_set_mac,
.ndo_validate_addr = eth_validate_addr,
+ .ndo_get_stats64 = ibmvnic_get_stats64,
.ndo_tx_timeout = ibmvnic_tx_timeout,
.ndo_change_mtu = ibmvnic_change_mtu,
.ndo_features_check = ibmvnic_features_check,
@@ -3672,29 +3858,20 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
if (stringset != ETH_SS_STATS)
return;
- for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
- memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+ for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
+ ethtool_puts(&data, ibmvnic_stats[i].name);
for (i = 0; i < adapter->req_tx_queues; i++) {
- snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
- data += ETH_GSTRING_LEN;
-
- snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
- data += ETH_GSTRING_LEN;
-
- snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
- data += ETH_GSTRING_LEN;
+ ethtool_sprintf(&data, "tx%d_batched_packets", i);
+ ethtool_sprintf(&data, "tx%d_direct_packets", i);
+ ethtool_sprintf(&data, "tx%d_bytes", i);
+ ethtool_sprintf(&data, "tx%d_dropped_packets", i);
}
for (i = 0; i < adapter->req_rx_queues; i++) {
- snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
- data += ETH_GSTRING_LEN;
-
- snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
- data += ETH_GSTRING_LEN;
-
- snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
- data += ETH_GSTRING_LEN;
+ ethtool_sprintf(&data, "rx%d_packets", i);
+ ethtool_sprintf(&data, "rx%d_bytes", i);
+ ethtool_sprintf(&data, "rx%d_interrupts", i);
}
}
@@ -3741,7 +3918,9 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
(adapter, ibmvnic_stats[i].offset));
for (j = 0; j < adapter->req_tx_queues; j++) {
- data[i] = adapter->tx_stats_buffers[j].packets;
+ data[i] = adapter->tx_stats_buffers[j].batched_packets;
+ i++;
+ data[i] = adapter->tx_stats_buffers[j].direct_packets;
i++;
data[i] = adapter->tx_stats_buffers[j].bytes;
i++;
@@ -3858,7 +4037,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
}
dma_free_coherent(dev,
- IBMVNIC_IND_ARR_SZ,
+ IBMVNIC_IND_MAX_ARR_SZ,
scrq->ind_buf.indir_arr,
scrq->ind_buf.indir_dma);
@@ -3915,7 +4094,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
scrq->ind_buf.indir_arr =
dma_alloc_coherent(dev,
- IBMVNIC_IND_ARR_SZ,
+ IBMVNIC_IND_MAX_ARR_SZ,
&scrq->ind_buf.indir_dma,
GFP_KERNEL);
@@ -3978,6 +4157,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
adapter->num_active_tx_scrqs = 0;
}
+ /* Clean any remaining outstanding SKBs
+ * we freed the irq so we won't be hearing
+ * from them
+ */
+ clean_tx_pools(adapter);
+
if (adapter->rx_scrq) {
for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
if (!adapter->rx_scrq[i])
@@ -4068,20 +4253,17 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
struct ibmvnic_sub_crq_queue *scrq)
{
struct device *dev = &adapter->vdev->dev;
+ int num_packets = 0, total_bytes = 0;
struct ibmvnic_tx_pool *tx_pool;
struct ibmvnic_tx_buff *txbuff;
struct netdev_queue *txq;
union sub_crq *next;
- int index;
- int i;
+ int index, i;
restart_loop:
while (pending_scrq(adapter, scrq)) {
unsigned int pool = scrq->pool_index;
int num_entries = 0;
- int total_bytes = 0;
- int num_packets = 0;
-
next = ibmvnic_next_scrq(adapter, scrq);
for (i = 0; i < next->tx_comp.num_comps; i++) {
index = be32_to_cpu(next->tx_comp.correlators[i]);
@@ -4117,8 +4299,6 @@ restart_loop:
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
- txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
- netdev_tx_completed_queue(txq, num_packets, total_bytes);
if (atomic_sub_return(num_entries, &scrq->used) <=
(adapter->req_tx_entries_per_subcrq / 2) &&
@@ -4143,6 +4323,9 @@ restart_loop:
goto restart_loop;
}
+ txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
+ netdev_tx_completed_queue(txq, num_packets, total_bytes);
+
return 0;
}
@@ -4694,6 +4877,18 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter,
strscpy(vlcd->name, adapter->netdev->name, len);
}
+static void ibmvnic_print_hex_dump(struct net_device *dev, void *buf,
+ size_t len)
+{
+ unsigned char hex_str[16 * 3];
+
+ for (size_t i = 0; i < len; i += 16) {
+ hex_dump_to_buffer((unsigned char *)buf + i, len - i, 16, 8,
+ hex_str, sizeof(hex_str), false);
+ netdev_dbg(dev, "%s\n", hex_str);
+ }
+}
+
static int send_login(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_login_rsp_buffer *login_rsp_buffer;
@@ -4804,10 +4999,8 @@ static int send_login(struct ibmvnic_adapter *adapter)
vnic_add_client_data(adapter, vlcd);
netdev_dbg(adapter->netdev, "Login Buffer:\n");
- for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) {
- netdev_dbg(adapter->netdev, "%016lx\n",
- ((unsigned long *)(adapter->login_buf))[i]);
- }
+ ibmvnic_print_hex_dump(adapter->netdev, adapter->login_buf,
+ adapter->login_buf_sz);
memset(&crq, 0, sizeof(crq));
crq.login.first = IBMVNIC_CRQ_CMD;
@@ -4820,11 +5013,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
if (rc) {
adapter->login_pending = false;
netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
- goto buf_rsp_map_failed;
+ goto buf_send_failed;
}
return 0;
+buf_send_failed:
+ dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
+ DMA_FROM_DEVICE);
buf_rsp_map_failed:
kfree(login_rsp_buffer);
adapter->login_rsp_buf = NULL;
@@ -5165,7 +5361,8 @@ static void handle_vpd_rsp(union ibmvnic_crq *crq,
/* copy firmware version string from vpd into adapter */
if ((substr + 3 + fw_level_len) <
(adapter->vpd->buff + adapter->vpd->len)) {
- strncpy((char *)adapter->fw_version, substr + 3, fw_level_len);
+ strscpy(adapter->fw_version, substr + 3,
+ sizeof(adapter->fw_version));
} else {
dev_info(dev, "FW substr extrapolated VPD buff\n");
}
@@ -5180,15 +5377,13 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
{
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
- int i;
dma_unmap_single(dev, adapter->ip_offload_tok,
sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE);
netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n");
- for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++)
- netdev_dbg(adapter->netdev, "%016lx\n",
- ((unsigned long *)(buf))[i]);
+ ibmvnic_print_hex_dump(adapter->netdev, buf,
+ sizeof(adapter->ip_offload_buf));
netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum);
netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum);
@@ -5386,6 +5581,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
int num_tx_pools;
int num_rx_pools;
u64 *size_array;
+ u32 rsp_len;
int i;
/* CHECK: Test/set of login_pending does not need to be atomic
@@ -5397,11 +5593,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
}
adapter->login_pending = false;
- dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
- DMA_TO_DEVICE);
- dma_unmap_single(dev, adapter->login_rsp_buf_token,
- adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
-
/* If the number of queues requested can't be allocated by the
* server, the login response will return with code 1. We will need
* to resend the login buffer with fewer queues requested.
@@ -5423,10 +5614,8 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
netdev->mtu = adapter->req_mtu - ETH_HLEN;
netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
- for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) {
- netdev_dbg(adapter->netdev, "%016lx\n",
- ((unsigned long *)(adapter->login_rsp_buf))[i]);
- }
+ ibmvnic_print_hex_dump(netdev, adapter->login_rsp_buf,
+ adapter->login_rsp_buf_sz);
/* Sanity checks */
if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs ||
@@ -5437,6 +5626,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
ibmvnic_reset(adapter, VNIC_RESET_FATAL);
return -EIO;
}
+
+ rsp_len = be32_to_cpu(login_rsp->len);
+ if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
+ rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
+ /* This can happen if a login request times out and there are
+ * 2 outstanding login requests sent, the LOGIN_RSP crq
+ * could have been for the older login request. So we are
+ * parsing the newer response buffer which may be incomplete
+ */
+ dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+ return -EIO;
+ }
+
size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
/* variable buffer sizes are not supported, so just read the
@@ -6194,6 +6400,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
rc = reset_sub_crq_queues(adapter);
}
} else {
+ if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+ /* After an LPM, reset the max number of indirect
+ * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT
+ * hcall to the default max (e.g POWER8 -> POWER10)
+ *
+ * If the new destination platform does not support
+ * the higher limit max (e.g. POWER10-> POWER8 LPM)
+ * H_PARAMETER will trigger automatic fallback to the
+ * safe minimum limit.
+ */
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
+ }
+
rc = init_sub_crqs(adapter);
}
@@ -6345,6 +6564,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
adapter->wait_for_reset = false;
adapter->last_reset_time = jiffies;
+ adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS;
rc = register_netdev(netdev);
if (rc) {