diff options
Diffstat (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c')
-rw-r--r-- | drivers/net/ethernet/amazon/ena/ena_netdev.c | 324 |
1 files changed, 211 insertions, 113 deletions
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 1c0a7828d397..c1295dfad0d0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -32,7 +32,7 @@ MODULE_LICENSE("GPL"); #define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus()) #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ - NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) + NETIF_MSG_IFDOWN | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) static struct ena_aenq_handlers aenq_handlers; @@ -42,24 +42,49 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); static void check_for_admin_com_state(struct ena_adapter *adapter); -static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); +static int ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { + enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_OS_NETDEV_WD; struct ena_adapter *adapter = netdev_priv(dev); + unsigned int time_since_last_napi, threshold; + struct ena_ring *tx_ring; + int napi_scheduled; + + if (txqueue >= adapter->num_io_queues) { + netdev_err(dev, "TX timeout on invalid queue %u\n", txqueue); + goto schedule_reset; + } + + threshold = jiffies_to_usecs(dev->watchdog_timeo); + tx_ring = &adapter->tx_ring[txqueue]; + time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies); + napi_scheduled = !!(tx_ring->napi->state & NAPIF_STATE_SCHED); + + netdev_err(dev, + "TX q %d is paused for too long (threshold %u). Time since last napi %u usec. napi scheduled: %d\n", + txqueue, + threshold, + time_since_last_napi, + napi_scheduled); + + if (threshold < time_since_last_napi && napi_scheduled) { + netdev_err(dev, + "napi handler hasn't been called for a long time but is scheduled\n"); + reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION; + } +schedule_reset: /* Change the state of the device to trigger reset * Check that we are not in the middle or a trigger already */ - if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) return; - ena_reset_device(adapter, ENA_REGS_RESET_OS_NETDEV_WD); + ena_reset_device(adapter, reset_reason); ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp); - - netif_err(adapter, tx_err, dev, "Transmit time out\n"); } static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) @@ -79,7 +104,7 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) if (!ret) { netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu); update_rx_ring_mtu(adapter, new_mtu); - dev->mtu = new_mtu; + WRITE_ONCE(dev->mtu, new_mtu); } else { netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", new_mtu); @@ -116,11 +141,9 @@ int ena_xmit_common(struct ena_adapter *adapter, if (unlikely(rc)) { netif_err(adapter, tx_queued, adapter->netdev, "Failed to prepare tx bufs\n"); - ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, - &ring->syncp); + ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp); if (rc != -ENOMEM) - ena_reset_device(adapter, - ENA_REGS_RESET_DRIVER_INVALID_STATE); + ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE); return rc; } @@ -485,8 +508,7 @@ static struct page *ena_alloc_map_page(struct ena_ring *rx_ring, */ page = dev_alloc_page(); if (!page) { - ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, - &rx_ring->syncp); + ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp); return ERR_PTR(-ENOSPC); } @@ -523,7 +545,7 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, /* We handle DMA here */ page = ena_alloc_map_page(rx_ring, &dma); - if (unlikely(IS_ERR(page))) + if (IS_ERR(page)) return PTR_ERR(page); netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, @@ -545,8 +567,8 @@ static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info, unsigned long attrs) { - dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, - DMA_BIDIRECTIONAL, attrs); + dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL, + attrs); } static void ena_free_rx_page(struct ena_ring *rx_ring, @@ -696,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring, static void ena_free_tx_bufs(struct ena_ring *tx_ring) { bool print_once = true; + bool is_xdp_ring; u32 i; + is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid); + for (i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -717,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring) ena_unmap_tx_buff(tx_ring, tx_info); - dev_kfree_skb_any(tx_info->skb); + if (is_xdp_ring) + xdp_return_frame(tx_info->xdpf); + else + dev_kfree_skb_any(tx_info->skb); } - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->qid)); + + if (!is_xdp_ring) + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) @@ -819,8 +849,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) &req_id); if (rc) { if (unlikely(rc == -EINVAL)) - handle_invalid_req_id(tx_ring, req_id, NULL, - false); + handle_invalid_req_id(tx_ring, req_id, NULL, false); break; } @@ -856,7 +885,6 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) tx_ring->next_to_clean = next_to_clean; ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); - ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); @@ -1046,8 +1074,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, DMA_FROM_DEVICE); if (!reuse_rx_buf_page) - ena_unmap_rx_buff_attrs(rx_ring, rx_info, - DMA_ATTR_SKIP_CPU_SYNC); + ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, page_offset + buf_offset, len, buf_len); @@ -1303,10 +1330,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ENA_RX_REFILL_THRESH_PACKET); /* Optimization, try to batch new rx buffers */ - if (refill_required > refill_threshold) { - ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); + if (refill_required > refill_threshold) ena_refill_rx_bufs(rx_ring, refill_required); - } if (xdp_flags & ENA_XDP_REDIRECT) xdp_do_flush(); @@ -1320,9 +1345,10 @@ error: adapter = netdev_priv(rx_ring->netdev); if (rc == -ENOSPC) { - ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, - &rx_ring->syncp); + ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp); ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS); + } else if (rc == -EFAULT) { + ena_reset_device(adapter, ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED); } else { ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp); @@ -1357,7 +1383,7 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) rx_ring->rx_stats.bytes, &dim_sample); - net_dim(&ena_napi->dim, dim_sample); + net_dim(&ena_napi->dim, &dim_sample); rx_ring->per_napi_packets = 0; } @@ -1651,9 +1677,9 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter) static int ena_request_io_irq(struct ena_adapter *adapter) { u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; + int rc = 0, i, k, irq_idx; unsigned long flags = 0; struct ena_irq *irq; - int rc = 0, i, k; if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { netif_err(adapter, ifup, adapter->netdev, @@ -1679,6 +1705,16 @@ static int ena_request_io_irq(struct ena_adapter *adapter) irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); } + /* Now that IO IRQs have been successfully allocated map them to the + * corresponding IO NAPI instance. Note that the mgmnt IRQ does not + * have a NAPI, so care must be taken to correctly map IRQs to NAPIs. + */ + for (i = 0; i < io_queue_count; i++) { + irq_idx = ENA_IO_IRQ_IDX(i); + irq = &adapter->irq_tbl[irq_idx]; + netif_napi_set_irq(&adapter->ena_napi[i].napi, irq->vector); + } + return rc; err: @@ -1785,20 +1821,40 @@ static void ena_napi_disable_in_range(struct ena_adapter *adapter, int first_index, int count) { + struct napi_struct *napi; int i; - for (i = first_index; i < first_index + count; i++) - napi_disable(&adapter->ena_napi[i].napi); + for (i = first_index; i < first_index + count; i++) { + napi = &adapter->ena_napi[i].napi; + if (!ENA_IS_XDP_INDEX(adapter, i)) { + /* This API is supported for non-XDP queues only */ + netif_queue_set_napi(adapter->netdev, i, + NETDEV_QUEUE_TYPE_TX, NULL); + netif_queue_set_napi(adapter->netdev, i, + NETDEV_QUEUE_TYPE_RX, NULL); + } + napi_disable(napi); + } } static void ena_napi_enable_in_range(struct ena_adapter *adapter, int first_index, int count) { + struct napi_struct *napi; int i; - for (i = first_index; i < first_index + count; i++) - napi_enable(&adapter->ena_napi[i].napi); + for (i = first_index; i < first_index + count; i++) { + napi = &adapter->ena_napi[i].napi; + napi_enable(napi); + if (!ENA_IS_XDP_INDEX(adapter, i)) { + /* This API is supported for non-XDP queues only */ + netif_queue_set_napi(adapter->netdev, i, + NETDEV_QUEUE_TYPE_RX, napi); + netif_queue_set_napi(adapter->netdev, i, + NETDEV_QUEUE_TYPE_TX, napi); + } + } } /* Configure the Rx forwarding */ @@ -1811,8 +1867,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) if (!ena_dev->rss.tbl_log_size) { rc = ena_rss_init_default(adapter); if (rc && (rc != -EOPNOTSUPP)) { - netif_err(adapter, ifup, adapter->netdev, - "Failed to init RSS rc: %d\n", rc); + netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc); return rc; } } @@ -2134,6 +2189,12 @@ int ena_up(struct ena_adapter *adapter) */ ena_init_napi_in_range(adapter, 0, io_queue_count); + /* Enabling DIM needs to happen before enabling IRQs since DIM + * is run from napi routine + */ + if (ena_com_interrupt_moderation_supported(adapter->ena_dev)) + ena_com_enable_adaptive_moderation(adapter->ena_dev); + rc = ena_request_io_irq(adapter); if (rc) goto err_req_irq; @@ -2184,7 +2245,7 @@ void ena_down(struct ena_adapter *adapter) { int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; - netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); + netif_dbg(adapter, ifdown, adapter->netdev, "%s\n", __func__); clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); @@ -2197,8 +2258,6 @@ void ena_down(struct ena_adapter *adapter) /* After this point the napi handler won't enable the tx queue */ ena_napi_disable_in_range(adapter, 0, io_queue_count); - /* After destroy the queue there won't be any new interrupts */ - if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) { int rc; @@ -2588,8 +2647,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(rc)) goto error_drop_packet; - skb_tx_timestamp(skb); - next_to_use = tx_ring->next_to_use; req_id = tx_ring->free_ids[next_to_use]; tx_info = &tx_ring->tx_buffer_info[req_id]; @@ -2653,6 +2710,8 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) } } + skb_tx_timestamp(skb); + if (netif_xmit_stopped(txq) || !netdev_xmit_more()) /* trigger the dma engine. ena_ring_tx_doorbell() * calls a memory barrier inside it. @@ -2670,26 +2729,11 @@ error_drop_packet: return NETDEV_TX_OK; } -static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - u16 qid; - /* we suspect that this is good for in--kernel network services that - * want to loop incoming skb rx to tx in normal user generated traffic, - * most probably we will not get to this - */ - if (skb_rx_queue_recorded(skb)) - qid = skb_get_rx_queue(skb); - else - qid = netdev_pick_tx(dev, skb, NULL); - - return qid; -} - static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct ena_admin_host_info *host_info; + ssize_t ret; int rc; /* Allocate only the host info */ @@ -2704,11 +2748,19 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd host_info->bdf = pci_dev_id(pdev); host_info->os_type = ENA_ADMIN_OS_LINUX; host_info->kernel_ver = LINUX_VERSION_CODE; - strscpy(host_info->kernel_ver_str, utsname()->version, - sizeof(host_info->kernel_ver_str) - 1); + ret = strscpy(host_info->kernel_ver_str, utsname()->version, + sizeof(host_info->kernel_ver_str)); + if (ret < 0) + dev_dbg(dev, + "kernel version string will be truncated, status = %zd\n", ret); + host_info->os_dist = 0; - strscpy(host_info->os_dist_str, utsname()->release, - sizeof(host_info->os_dist_str)); + ret = strscpy(host_info->os_dist_str, utsname()->release, + sizeof(host_info->os_dist_str)); + if (ret < 0) + dev_dbg(dev, + "OS distribution string will be truncated, status = %zd\n", ret); + host_info->driver_version = (DRV_MODULE_GEN_MAJOR) | (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | @@ -2764,8 +2816,7 @@ static void ena_config_debug_area(struct ena_adapter *adapter) rc = ena_com_set_host_attributes(adapter->ena_dev); if (rc) { if (rc == -EOPNOTSUPP) - netif_warn(adapter, drv, adapter->netdev, - "Cannot set host attributes\n"); + netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n"); else netif_err(adapter, drv, adapter->netdev, "Cannot set host attributes\n"); @@ -2777,19 +2828,6 @@ err: ena_com_delete_debug_area(adapter->ena_dev); } -int ena_update_hw_stats(struct ena_adapter *adapter) -{ - int rc; - - rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats); - if (rc) { - netdev_err(adapter->netdev, "Failed to get ENI stats\n"); - return rc; - } - - return 0; -} - static void ena_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -2863,18 +2901,16 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_open = ena_open, .ndo_stop = ena_close, .ndo_start_xmit = ena_start_xmit, - .ndo_select_queue = ena_select_queue, .ndo_get_stats64 = ena_get_stats64, .ndo_tx_timeout = ena_tx_timeout, .ndo_change_mtu = ena_change_mtu, - .ndo_set_mac_address = NULL, .ndo_validate_addr = eth_validate_addr, .ndo_bpf = ena_xdp, .ndo_xdp_xmit = ena_xdp_xmit, }; -static void ena_calc_io_queue_size(struct ena_adapter *adapter, - struct ena_com_dev_get_features_ctx *get_feat_ctx) +static int ena_calc_io_queue_size(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *get_feat_ctx) { struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq; struct ena_com_dev *ena_dev = adapter->ena_dev; @@ -2933,6 +2969,18 @@ static void ena_calc_io_queue_size(struct ena_adapter *adapter, max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); + if (max_tx_queue_size < ENA_MIN_RING_SIZE) { + netdev_err(adapter->netdev, "Device max TX queue size: %d < minimum: %d\n", + max_tx_queue_size, ENA_MIN_RING_SIZE); + return -EINVAL; + } + + if (max_rx_queue_size < ENA_MIN_RING_SIZE) { + netdev_err(adapter->netdev, "Device max RX queue size: %d < minimum: %d\n", + max_rx_queue_size, ENA_MIN_RING_SIZE); + return -EINVAL; + } + /* When forcing large headers, we multiply the entry size by 2, and therefore divide * the queue size by 2, leaving the amount of memory used by the queues unchanged. */ @@ -2963,6 +3011,8 @@ static void ena_calc_io_queue_size(struct ena_adapter *adapter, adapter->max_rx_ring_size = max_rx_queue_size; adapter->requested_tx_ring_size = tx_queue_size; adapter->requested_rx_ring_size = rx_queue_size; + + return 0; } static int ena_device_validate_params(struct ena_adapter *adapter, @@ -3070,6 +3120,7 @@ static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev, bool *wd_state) { struct ena_com_dev *ena_dev = adapter->ena_dev; + struct net_device *netdev = adapter->netdev; struct ena_llq_configurations llq_config; struct device *dev = &pdev->dev; bool readless_supported; @@ -3159,15 +3210,19 @@ static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev, rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq, &llq_config); if (rc) { - dev_err(dev, "ENA device init failed\n"); + netdev_err(netdev, "Cannot set queues placement policy rc= %d\n", rc); goto err_admin_init; } - ena_calc_io_queue_size(adapter, get_feat_ctx); + rc = ena_calc_io_queue_size(adapter, get_feat_ctx); + if (unlikely(rc)) + goto err_admin_init; return 0; err_admin_init: + ena_com_abort_admin_commands(ena_dev); + ena_com_wait_for_abort_completion(ena_dev); ena_com_delete_host_info(ena_dev); ena_com_admin_destroy(ena_dev); err_mmio_read_less: @@ -3208,14 +3263,15 @@ err_disable_msix: return rc; } -static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) +static int ena_destroy_device(struct ena_adapter *adapter, bool graceful) { struct net_device *netdev = adapter->netdev; struct ena_com_dev *ena_dev = adapter->ena_dev; bool dev_up; + int rc = 0; if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) - return; + return 0; netif_carrier_off(netdev); @@ -3226,14 +3282,14 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) if (!graceful) ena_com_set_admin_running_state(ena_dev, false); - if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + if (dev_up) ena_down(adapter); /* Stop the device from sending AENQ events (in case reset flag is set * and device is up, ena_down() already reset the device. */ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) - ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); + rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); ena_free_mgmnt_irq(adapter); @@ -3252,6 +3308,8 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + return rc; } static int ena_restore_device(struct ena_adapter *adapter) @@ -3328,14 +3386,17 @@ err: static void ena_fw_reset_device(struct work_struct *work) { + int rc = 0; + struct ena_adapter *adapter = container_of(work, struct ena_adapter, reset_task); rtnl_lock(); if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { - ena_destroy_device(adapter, false); - ena_restore_device(adapter); + rc |= ena_destroy_device(adapter, false); + rc |= ena_restore_device(adapter); + adapter->dev_stats.reset_fail += !!rc; dev_err(&adapter->pdev->dev, "Device reset completed successfully\n"); } @@ -3372,14 +3433,18 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, struct ena_ring *tx_ring) { struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi); + enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL; unsigned int time_since_last_napi; unsigned int missing_tx_comp_to; bool is_tx_comp_time_expired; struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; + int napi_scheduled; u32 missed_tx = 0; int i, rc = 0; + missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to); + for (i = 0; i < tx_ring->ring_size; i++) { tx_buf = &tx_ring->tx_buffer_info[i]; last_jiffies = tx_buf->last_jiffies; @@ -3406,25 +3471,45 @@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, adapter->missing_tx_completion_to); if (unlikely(is_tx_comp_time_expired)) { - if (!tx_buf->print_once) { - time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies); - missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to); - netif_notice(adapter, tx_err, adapter->netdev, - "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n", - tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to); + time_since_last_napi = + jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies); + napi_scheduled = !!(ena_napi->napi.state & NAPIF_STATE_SCHED); + + if (missing_tx_comp_to < time_since_last_napi && napi_scheduled) { + /* We suspect napi isn't called because the + * bottom half is not run. Require a bigger + * timeout for these cases + */ + if (!time_is_before_jiffies(last_jiffies + + 2 * adapter->missing_tx_completion_to)) + continue; + + reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION; } - tx_buf->print_once = 1; missed_tx++; + + if (tx_buf->print_once) + continue; + + netif_notice(adapter, tx_err, adapter->netdev, + "TX hasn't completed, qid %d, index %d. %u usecs from last napi execution, napi scheduled: %d\n", + tx_ring->qid, i, time_since_last_napi, napi_scheduled); + + tx_buf->print_once = 1; } } if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { netif_err(adapter, tx_err, adapter->netdev, - "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", + "Lost TX completions are above the threshold (%d > %d). Completion transmission timeout: %u.\n", missed_tx, - adapter->missing_tx_completion_threshold); - ena_reset_device(adapter, ENA_REGS_RESET_MISS_TX_CMPL); + adapter->missing_tx_completion_threshold, + missing_tx_comp_to); + netif_err(adapter, tx_err, adapter->netdev, + "Resetting the device\n"); + + ena_reset_device(adapter, reset_reason); rc = -EIO; } @@ -3438,10 +3523,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_ring *rx_ring; - int i, budget, rc; + int qid, budget, rc; int io_queue_count; io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; + /* Make sure the driver doesn't turn the device in other process */ smp_rmb(); @@ -3454,27 +3540,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter) if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) return; - budget = ENA_MONITORED_TX_QUEUES; + budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES); - for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { - tx_ring = &adapter->tx_ring[i]; - rx_ring = &adapter->rx_ring[i]; + qid = adapter->last_monitored_tx_qid; + + while (budget) { + qid = (qid + 1) % io_queue_count; + + tx_ring = &adapter->tx_ring[qid]; + rx_ring = &adapter->rx_ring[qid]; rc = check_missing_comp_in_tx_queue(adapter, tx_ring); if (unlikely(rc)) return; - rc = !ENA_IS_XDP_INDEX(adapter, i) ? + rc = !ENA_IS_XDP_INDEX(adapter, qid) ? check_for_rx_interrupt_queue(adapter, rx_ring) : 0; if (unlikely(rc)) return; budget--; - if (!budget) - break; } - adapter->last_monitored_tx_qid = i % io_queue_count; + adapter->last_monitored_tx_qid = qid; } /* trigger napi schedule after 2 consecutive detections */ @@ -3762,8 +3850,8 @@ static int ena_rss_init_default(struct ena_adapter *adapter) } } - rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, - ENA_HASH_KEY_SIZE, 0xFFFFFFFF); + rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE, + 0xFFFFFFFF); if (unlikely(rc && (rc != -EOPNOTSUPP))) { dev_err(dev, "Cannot fill hash function\n"); goto err_fill_indir; @@ -3873,10 +3961,16 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, adapter); + rc = ena_com_allocate_customer_metrics_buffer(ena_dev); + if (rc) { + netdev_err(netdev, "ena_com_allocate_customer_metrics_buffer failed\n"); + goto err_netdev_destroy; + } + rc = ena_map_llq_mem_bar(pdev, ena_dev, bars); if (rc) { dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n"); - goto err_netdev_destroy; + goto err_metrics_destroy; } rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state); @@ -3884,7 +3978,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "ENA device init failed\n"); if (rc == -ETIME) rc = -EPROBE_DEFER; - goto err_netdev_destroy; + goto err_metrics_destroy; } /* Initial TX and RX interrupt delay. Assumes 1 usec granularity. @@ -4005,6 +4099,8 @@ err_worker_destroy: err_device_destroy: ena_com_delete_host_info(ena_dev); ena_com_admin_destroy(ena_dev); +err_metrics_destroy: + ena_com_delete_customer_metrics_buffer(ena_dev); err_netdev_destroy: free_netdev(netdev); err_free_region: @@ -4040,8 +4136,8 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) free_irq_cpu_rmap(netdev->rx_cpu_rmap); netdev->rx_cpu_rmap = NULL; } -#endif /* CONFIG_RFS_ACCEL */ +#endif /* CONFIG_RFS_ACCEL */ /* Make sure timer and reset routine won't be called after * freeing device resources. */ @@ -4068,6 +4164,8 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) ena_com_delete_host_info(ena_dev); + ena_com_delete_customer_metrics_buffer(ena_dev); + ena_release_bars(ena_dev, pdev); pci_disable_device(pdev); |