From 5be5955425c22df5e25ee0628b57f523437af6dc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 14 Dec 2016 15:05:30 -0800 Subject: igb: update driver to make use of DMA_ATTR_SKIP_CPU_SYNC The ARM architecture provides a mechanism for deferring cache line invalidation in the case of map/unmap. This patch makes use of this mechanism to avoid unnecessary synchronization. A secondary effect of this change is that the portion of the page that has been synchronized for use by the CPU should be writable and could be passed up the stack (at least on ARM). The last bit that occurred to me is that on architectures where the sync_for_cpu call invalidates cache lines we were prefetching and then invalidating the first 128 bytes of the packet. To avoid that I have moved the sync up to before we perform the prefetch and allocate the skbuff so that we can actually make use of it. Link: http://lkml.kernel.org/r/20161110113611.76501.98897.stgit@ahduyck-blue-test.jf.intel.com Signed-off-by: Alexander Duyck Acked-by: Jeff Kirsher Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Hans-Christian Noren Egtvedt Cc: Helge Deller Cc: James Hogan Cc: Jonas Bonn Cc: Keguang Zhang Cc: Ley Foon Tan Cc: Mark Salter Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Kuo Cc: Russell King Cc: Steven Miao Cc: Tobias Klauser Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/ethernet/intel/igb/igb_main.c | 53 +++++++++++++++++++------------ 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cae24a8ccf47..60f122531e29 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3947,10 +3947,21 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) if (!buffer_info->page) continue; - dma_unmap_page(rx_ring->dev, - buffer_info->dma, - PAGE_SIZE, - DMA_FROM_DEVICE); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + buffer_info->dma, + buffer_info->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, + buffer_info->dma, + PAGE_SIZE, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); __free_page(buffer_info->page); buffer_info->page = NULL; @@ -6812,12 +6823,6 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, /* transfer page from old buffer to new buffer */ *new_buff = *old_buff; - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, - old_buff->page_offset, - IGB_RX_BUFSZ, - DMA_FROM_DEVICE); } static inline bool igb_page_is_reserved(struct page *page) @@ -6938,6 +6943,13 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, page = rx_buffer->page; prefetchw(page); + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -6962,21 +6974,15 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, prefetchw(skb->data); } - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - /* pull page into skb */ if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) { /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); } /* clear contents of rx_buffer */ @@ -7234,7 +7240,8 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use @@ -7275,6 +7282,12 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) if (!igb_alloc_mapped_page(rx_ring, bi)) break; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ -- cgit From bd4171a5d4c2827f4ae7a235389648d7c149a41b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 14 Dec 2016 15:05:34 -0800 Subject: igb: update code to better handle incrementing page count Update the driver code so that we do bulk updates of the page reference count instead of just incrementing it by one reference at a time. The advantage to doing this is that we cut down on atomic operations and this in turn should give us a slight improvement in cycles per packet. In addition if we eventually move this over to using build_skb the gains will be more noticeable. Link: http://lkml.kernel.org/r/20161110113616.76501.17072.stgit@ahduyck-blue-test.jf.intel.com Signed-off-by: Alexander Duyck Acked-by: Jeff Kirsher Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Hans-Christian Noren Egtvedt Cc: Helge Deller Cc: James Hogan Cc: Jonas Bonn Cc: Keguang Zhang Cc: Ley Foon Tan Cc: Mark Salter Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Ralf Baechle Cc: Rich Felker Cc: Richard Kuo Cc: Russell King Cc: Steven Miao Cc: Tobias Klauser Cc: Vineet Gupta Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/ethernet/intel/igb/igb.h | 7 ++++++- drivers/net/ethernet/intel/igb/igb_main.c | 24 +++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index d11093dce1b9..acbc3abe2ddd 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -210,7 +210,12 @@ struct igb_tx_buffer { struct igb_rx_buffer { dma_addr_t dma; struct page *page; - unsigned int page_offset; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 page_offset; +#else + __u16 page_offset; +#endif + __u16 pagecnt_bias; }; struct igb_tx_queue_stats { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 60f122531e29..a761001308dc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3962,7 +3962,8 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) PAGE_SIZE, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - __free_page(buffer_info->page); + __page_frag_drain(buffer_info->page, 0, + buffer_info->pagecnt_bias); buffer_info->page = NULL; } @@ -6834,13 +6835,15 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, struct page *page, unsigned int truesize) { + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--; + /* avoid re-using remote pages */ if (unlikely(igb_page_is_reserved(page))) return false; #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) + if (unlikely(page_ref_count(page) != pagecnt_bias)) return false; /* flip page offset to other buffer */ @@ -6853,10 +6856,14 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, return false; #endif - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. */ - page_ref_inc(page); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } return true; } @@ -6908,7 +6915,6 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, return true; /* this page cannot be reused so discard it */ - __free_page(page); return false; } @@ -6979,10 +6985,13 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { - /* we are not reusing the buffer so unmap it */ + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_drain(page, 0, rx_buffer->pagecnt_bias); } /* clear contents of rx_buffer */ @@ -7256,6 +7265,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, bi->dma = dma; bi->page = page; bi->page_offset = 0; + bi->pagecnt_bias = 1; return true; } -- cgit From b5fc8c6c000aa083a671cd8f1736d04081bf65aa Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Dec 2016 15:06:15 -0800 Subject: drivers/net/wireless/intel/iwlwifi/dvm/calib.c: simplfy min() expression This cast is no longer needed. Cc: Johannes Berg Cc: Emmanuel Grumbach Cc: Intel Linux Wireless Cc: Kalle Valo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/wireless/intel/iwlwifi/dvm/calib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c index e9cef9de9ed8..c96f9b1d948a 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/calib.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/calib.c @@ -900,8 +900,7 @@ static void iwlagn_gain_computation(struct iwl_priv *priv, /* bound gain by 2 bits value max, 3rd bit is sign */ data->delta_gain_code[i] = - min(abs(delta_g), - (s32) CHAIN_NOISE_MAX_DELTA_GAIN_CODE); + min(abs(delta_g), CHAIN_NOISE_MAX_DELTA_GAIN_CODE); if (delta_g < 0) /* -- cgit