From ce1f352162828ba07470328828a32f47aa759020 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:39 +0200 Subject: net: ena: fix incorrect setting of the number of msix vectors Overview: We don't frequently change the msix vectors throughout the life cycle of the driver. We do so in two functions: ena_probe() and ena_restore(). ena_probe() is only called when the driver is loaded. ena_restore() on the other hand is called during device reset / resume operations. We use num_io_queues for calculating and allocating the number of msix vectors. At ena_probe() this value is equal to max_num_io_queues and thus this is not an issue, however ena_restore() might be called after the number of io queues has changed. A possible bug scenario is as follows: * Change number of queues from 8 to 4. (num_io_queues = 4, max_num_io_queues = 8, msix_vecs = 9,) * Trigger reset occurs -> ena_restore is called. (num_io_queues = 4, max_num_io_queues =8 , msix_vecs = 5) * Change number of queues from 4 to 6. (num_io_queues = 6, max_num_io_queues = 8, msix_vecs = 5) * The driver will reset due to failure of check_for_rx_interrupt_queue() Fix: This can be easily fixed by always using max_num_io_queues to init the msix_vecs, since this number won't change as opposed to num_io_queues. Fixes: 4d19266022ec ("net: ena: multiple queue creation related cleanups") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 0b2fd96b93d7..39f290f4458f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1968,7 +1968,7 @@ static int ena_enable_msix(struct ena_adapter *adapter) } /* Reserved the max msix vectors we might need */ - msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues); + msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues); netif_dbg(adapter, probe, adapter->netdev, "trying to enable MSI-X, vectors %d\n", msix_vecs); -- cgit From e02ae6ed51be3d28923bfd318ae57000f5643da5 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:40 +0200 Subject: net: ena: fix request of incorrect number of IRQ vectors Bug: In short the main issue is caused by the fact that the number of queues is changed using ethtool after ena_probe() has been called and before ena_up() was executed. Here is the full scenario in detail: * ena_probe() is called when the driver is loaded, the driver is not up yet at the end of ena_probe(). * The number of queues is changed -> io_queue_count is changed as well - ena_up() is not called since the "dev_was_up" boolean in ena_update_queue_count() is false. * ena_up() is called by the kernel (it's called asynchronously some time after ena_probe()). ena_setup_io_intr() is called by ena_up() and it uses io_queue_count to get the suitable irq lines for each msix vector. The function ena_request_io_irq() is called right after that and it uses msix_vecs - This value only changes during ena_probe() and ena_restore() - to request the irq vectors. This results in "Failed to request I/O IRQ" error for i > io_queue_count. Numeric example: * After ena_probe() io_queue_count = 8, msix_vecs = 9. * The number of queues changes to 4 -> io_queue_count = 4, msix_vecs = 9. * ena_up() is executed for the first time: ** ena_setup_io_intr() inits the vectors only up to io_queue_count. ** ena_request_io_irq() calls request_irq() and fails for i = 5. How to reproduce: simply run the following commands: sudo rmmod ena && sudo insmod ena.ko; sudo ethtool -L eth1 combined 3; Fix: Use ENA_MAX_MSIX_VEC(adapter->num_io_queues + adapter->xdp_num_queues) instead of adapter->msix_vecs. We need to take XDP queues into consideration as they need to have msix vectors assigned to them as well. Note that the XDP cannot be attached before the driver is up and running but in XDP mode the issue might occur when the number of queues changes right after a reset trigger. The ENA_MAX_MSIX_VEC simply adds one to the argument since the first msix vector is reserved for management queue. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 39f290f4458f..836fda585391 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2068,6 +2068,7 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter) static int ena_request_io_irq(struct ena_adapter *adapter) { + u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; unsigned long flags = 0; struct ena_irq *irq; int rc = 0, i, k; @@ -2078,7 +2079,7 @@ static int ena_request_io_irq(struct ena_adapter *adapter) return -EINVAL; } - for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { irq = &adapter->irq_tbl[i]; rc = request_irq(irq->vector, irq->handler, flags, irq->name, irq->data); @@ -2119,6 +2120,7 @@ static void ena_free_mgmnt_irq(struct ena_adapter *adapter) static void ena_free_io_irq(struct ena_adapter *adapter) { + u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; struct ena_irq *irq; int i; @@ -2129,7 +2131,7 @@ static void ena_free_io_irq(struct ena_adapter *adapter) } #endif /* CONFIG_RFS_ACCEL */ - for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { irq = &adapter->irq_tbl[i]; irq_set_affinity_hint(irq->vector, NULL); free_irq(irq->vector, irq->data); @@ -2144,12 +2146,13 @@ static void ena_disable_msix(struct ena_adapter *adapter) static void ena_disable_io_intr_sync(struct ena_adapter *adapter) { + u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; int i; if (!netif_running(adapter->netdev)) return; - for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) + for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) synchronize_irq(adapter->irq_tbl[i].vector); } -- cgit From 30623e1ed116bcd1785217d0a98eec643687e091 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:41 +0200 Subject: net: ena: avoid memory access violation by validating req_id properly Rx req_id is an index in struct ena_eth_io_rx_cdesc_base. The driver should validate that the Rx req_id it received from the device is in range [0, ring_size -1]. Failure to do so could yield to potential memory access violoation. The validation was mistakenly done when refilling the Rx submission queue and not in Rx completion queue. Fixes: ad974baef2a1 ("net: ena: add support for out of order rx buffers refill") Signed-off-by: Noam Dagan Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 836fda585391..51333a05c14d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1018,13 +1018,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) struct ena_rx_buffer *rx_info; req_id = rx_ring->free_ids[next_to_use]; - rc = validate_rx_req_id(rx_ring, req_id); - if (unlikely(rc < 0)) - break; rx_info = &rx_ring->rx_buffer_info[req_id]; - rc = ena_alloc_rx_page(rx_ring, rx_info, GFP_ATOMIC | __GFP_COMP); if (unlikely(rc < 0)) { @@ -1379,9 +1375,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info; u16 len, req_id, buf = 0; void *va; + int rc; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc < 0)) + return NULL; + rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(!rx_info->page)) { @@ -1454,6 +1456,11 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, buf++; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc < 0)) + return NULL; + rx_info = &rx_ring->rx_buffer_info[req_id]; } while (1); -- cgit From dfdde1345bc124816f0fd42fa91b8748051e758e Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:42 +0200 Subject: net: ena: fix continuous keep-alive resets last_keep_alive_jiffies is updated in probe and when a keep-alive event is received. In case the driver times-out on a keep-alive event, it has high chances of continuously timing-out on keep-alive events. This is because when the driver recovers from the keep-alive-timeout reset the value of last_keep_alive_jiffies is very old, and if a keep-alive event is not received before the next timer expires, the value of last_keep_alive_jiffies will cause another keep-alive-timeout reset and so forth in a loop. Solution: Update last_keep_alive_jiffies whenever the device is restored after reset. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Noam Dagan Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 51333a05c14d..4647d7656761 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3486,6 +3486,7 @@ static int ena_restore_device(struct ena_adapter *adapter) netif_carrier_on(adapter->netdev); mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + adapter->last_keep_alive_jiffies = jiffies; dev_err(&pdev->dev, "Device reset completed successfully, Driver info: %s\n", version); -- cgit From 428c491332bca498c8eb2127669af51506c346c7 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 20 Mar 2020 09:55:34 -0300 Subject: net: ena: Add PCI shutdown handler to allow safe kexec Currently ENA only provides the PCI remove() handler, used during rmmod for example. This is not called on shutdown/kexec path; we are potentially creating a failure scenario on kexec: (a) Kexec is triggered, no shutdown() / remove() handler is called for ENA; instead pci_device_shutdown() clears the master bit of the PCI device, stopping all DMA transactions; (b) Kexec reboot happens and the device gets enabled again, likely having its FW with that DMA transaction buffered; then it may trigger the (now invalid) memory operation in the new kernel, corrupting kernel memory area. This patch aims to prevent this, by implementing a shutdown() handler quite similar to the remove() one - the difference being the handling of the netdev, which is unregistered on remove(), but following the convention observed in other drivers, it's only detached on shutdown(). This prevents an odd issue in AWS Nitro instances, in which after the 2nd kexec the next one will fail with an initrd corruption, caused by a wild DMA write to invalid kernel memory. The lspci output for the adapter present in my instance is: 00:05.0 Ethernet controller [0200]: Amazon.com, Inc. Elastic Network Adapter (ENA) [1d0f:ec20] Suggested-by: Gavin Shan Signed-off-by: Guilherme G. Piccoli Acked-by: Sameeh Jubran Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 51 ++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet/amazon/ena/ena_netdev.c') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4647d7656761..cada6e7e30f4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4336,13 +4336,15 @@ err_disable_device: /*****************************************************************************/ -/* ena_remove - Device Removal Routine +/* __ena_shutoff - Helper used in both PCI remove/shutdown routines * @pdev: PCI device information struct + * @shutdown: Is it a shutdown operation? If false, means it is a removal * - * ena_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. + * __ena_shutoff is a helper routine that does the real work on shutdown and + * removal paths; the difference between those paths is with regards to whether + * dettach or unregister the netdevice. */ -static void ena_remove(struct pci_dev *pdev) +static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) { struct ena_adapter *adapter = pci_get_drvdata(pdev); struct ena_com_dev *ena_dev; @@ -4361,13 +4363,17 @@ static void ena_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); - rtnl_lock(); + rtnl_lock(); /* lock released inside the below if-else block */ ena_destroy_device(adapter, true); - rtnl_unlock(); - - unregister_netdev(netdev); - - free_netdev(netdev); + if (shutdown) { + netif_device_detach(netdev); + dev_close(netdev); + rtnl_unlock(); + } else { + rtnl_unlock(); + unregister_netdev(netdev); + free_netdev(netdev); + } ena_com_rss_destroy(ena_dev); @@ -4382,6 +4388,30 @@ static void ena_remove(struct pci_dev *pdev) vfree(ena_dev); } +/* ena_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ena_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ + +static void ena_remove(struct pci_dev *pdev) +{ + __ena_shutoff(pdev, false); +} + +/* ena_shutdown - Device Shutdown Routine + * @pdev: PCI device information struct + * + * ena_shutdown is called by the PCI subsystem to alert the driver that + * a shutdown/reboot (or kexec) is happening and device must be disabled. + */ + +static void ena_shutdown(struct pci_dev *pdev) +{ + __ena_shutoff(pdev, true); +} + #ifdef CONFIG_PM /* ena_suspend - PM suspend callback * @pdev: PCI device information struct @@ -4431,6 +4461,7 @@ static struct pci_driver ena_pci_driver = { .id_table = ena_pci_tbl, .probe = ena_probe, .remove = ena_remove, + .shutdown = ena_shutdown, #ifdef CONFIG_PM .suspend = ena_suspend, .resume = ena_resume, -- cgit