Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller: 1) Add TX fast path in mac80211, from Johannes Berg. 2) Add TSO/GRO support to ibmveth, from Thomas Falcon 3) Move away from cached routes in ipv6, just like ipv4, from Martin KaFai Lau. 4) Lots of new rhashtable tests, from Thomas Graf. 5) Run ingress qdisc lockless, from Alexei Starovoitov. 6) Allow servers to fetch TCP packet headers for SYN packets of new connections, for fingerprinting. From Eric Dumazet. 7) Add mode parameter to pktgen, for testing receive. From Alexei Starovoitov. 8) Cache access optimizations via simplifications of build_skb(), from Alexander Duyck. 9) Move page frag allocator under mm/, also from Alexander. 10) Add xmit_more support to hv_netvsc, from KY Srinivasan. 11) Add a counter guard in case we try to perform endless reclassify loops in the packet scheduler. 12) Extern flow dissector to be programmable and use it in new "Flower" classifier. From Jiri Pirko. 13) AF_PACKET fanout rollover fixes, performance improvements, and new statistics. From Willem de Bruijn. 14) Add netdev driver for GENEVE tunnels, from John W Linville. 15) Add ingress netfilter hooks and filtering, from Pablo Neira Ayuso. 16) Fix handling of epoll edge triggers in TCP, from Eric Dumazet. 17) Add an ECN retry fallback for the initial TCP handshake, from Daniel Borkmann. 18) Add tail call support to BPF, from Alexei Starovoitov. 19) Add several pktgen helper scripts, from Jesper Dangaard Brouer. 20) Add zerocopy support to AF_UNIX, from Hannes Frederic Sowa. 21) Favor even port numbers for allocation to connect() requests, and odd port numbers for bind(0), in an effort to help avoid ip_local_port_range exhaustion. From Eric Dumazet. 22) Add Cavium ThunderX driver, from Sunil Goutham. 23) Allow bpf programs to access skb_iif and dev->ifindex SKB metadata, from Alexei Starovoitov. 24) Add support for T6 chips in cxgb4vf driver, from Hariprasad Shenai. 25) Double TCP Small Queues default to 256K to accomodate situations like the XEN driver and wireless aggregation. From Wei Liu. 26) Add more entropy inputs to flow dissector, from Tom Herbert. 27) Add CDG congestion control algorithm to TCP, from Kenneth Klette Jonassen. 28) Convert ipset over to RCU locking, from Jozsef Kadlecsik. 29) Track and act upon link status of ipv4 route nexthops, from Andy Gospodarek. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1670 commits) bridge: vlan: flush the dynamically learned entries on port vlan delete bridge: multicast: add a comment to br_port_state_selection about blocking state net: inet_diag: export IPV6_V6ONLY sockopt stmmac: troubleshoot unexpected bits in des0 & des1 net: ipv4 sysctl option to ignore routes when nexthop link is down net: track link-status of ipv4 nexthops net: switchdev: ignore unsupported bridge flags net: Cavium: Fix MAC address setting in shutdown state drivers: net: xgene: fix for ACPI support without ACPI ip: report the original address of ICMP messages net/mlx5e: Prefetch skb data on RX net/mlx5e: Pop cq outside mlx5e_get_cqe net/mlx5e: Remove mlx5e_cq.sqrq back-pointer net/mlx5e: Remove extra spaces net/mlx5e: Avoid TX CQE generation if more xmit packets expected net/mlx5e: Avoid redundant dev_kfree_skb() upon NOP completion net/mlx5e: Remove re-assignment of wq type in mlx5e_enable_rq() net/mlx5e: Use skb_shinfo(skb)->gso_segs rather than counting them net/mlx5e: Static mapping of netdev priv resources to/from netdev TX queues net/mlx4_en: Use HW counters for rx/tx bytes/packets in PF device ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-06-24 16:49:49 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-06-24 16:49:49 -0700
commit: e0456717e483bb8a9431b80a5bdc99a928b9b003 (patch)
tree: 5eb5add2bafd1f20326d70f5cb3b711d00a40b10 /drivers/net/ethernet/via/via-rhine.c
parent: 98ec21a01896751b673b6c731ca8881daa8b2c6d (diff)
parent: 1ea2d020ba477cb7011a7174e8501a9e04a325d4 (diff)
1 files changed, 162 insertions, 88 deletions
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index de2850497c09..a83263743665 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -96,7 +96,6 @@ static const int multicast_filter_limit = 32;
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
-#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
@@ -472,8 +471,7 @@ struct rhine_private {
 
 	/* Frequently used values: keep some adjacent for cache effect. */
 	u32 quirks;
-	struct rx_desc *rx_head_desc;
-	unsigned int cur_rx, dirty_rx;	/* Producer/consumer ring indices */
+	unsigned int cur_rx;
 	unsigned int cur_tx, dirty_tx;
 	unsigned int rx_buf_sz;		/* Based on MTU+slack. */
 	struct rhine_stats rx_stats;
@@ -1213,17 +1211,61 @@ static void free_ring(struct net_device* dev)
 
 }
 
-static void alloc_rbufs(struct net_device *dev)
+struct rhine_skb_dma {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+};
+
+static inline int rhine_skb_dma_init(struct net_device *dev,
+				     struct rhine_skb_dma *sd)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	struct device *hwdev = dev->dev.parent;
-	dma_addr_t next;
+	const int size = rp->rx_buf_sz;
+
+	sd->skb = netdev_alloc_skb(dev, size);
+	if (!sd->skb)
+		return -ENOMEM;
+
+	sd->dma = dma_map_single(hwdev, sd->skb->data, size, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(hwdev, sd->dma))) {
+		netif_err(rp, drv, dev, "Rx DMA mapping failure\n");
+		dev_kfree_skb_any(sd->skb);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void rhine_reset_rbufs(struct rhine_private *rp)
+{
 	int i;
 
-	rp->dirty_rx = rp->cur_rx = 0;
+	rp->cur_rx = 0;
+
+	for (i = 0; i < RX_RING_SIZE; i++)
+		rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
+}
+
+static inline void rhine_skb_dma_nic_store(struct rhine_private *rp,
+					   struct rhine_skb_dma *sd, int entry)
+{
+	rp->rx_skbuff_dma[entry] = sd->dma;
+	rp->rx_skbuff[entry] = sd->skb;
+
+	rp->rx_ring[entry].addr = cpu_to_le32(sd->dma);
+	dma_wmb();
+}
+
+static void free_rbufs(struct net_device* dev);
+
+static int alloc_rbufs(struct net_device *dev)
+{
+	struct rhine_private *rp = netdev_priv(dev);
+	dma_addr_t next;
+	int rc, i;
 
 	rp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32);
-	rp->rx_head_desc = &rp->rx_ring[0];
 	next = rp->rx_ring_dma;
 
 	/* Init the ring entries */
@@ -1239,23 +1281,20 @@ static void alloc_rbufs(struct net_device *dev)
 
 	/* Fill in the Rx buffers.  Handle allocation failure gracefully. */
 	for (i = 0; i < RX_RING_SIZE; i++) {
-		struct sk_buff *skb = netdev_alloc_skb(dev, rp->rx_buf_sz);
-		rp->rx_skbuff[i] = skb;
-		if (skb == NULL)
-			break;
+		struct rhine_skb_dma sd;
 
-		rp->rx_skbuff_dma[i] =
-			dma_map_single(hwdev, skb->data, rp->rx_buf_sz,
-				       DMA_FROM_DEVICE);
-		if (dma_mapping_error(hwdev, rp->rx_skbuff_dma[i])) {
-			rp->rx_skbuff_dma[i] = 0;
-			dev_kfree_skb(skb);
-			break;
+		rc = rhine_skb_dma_init(dev, &sd);
+		if (rc < 0) {
+			free_rbufs(dev);
+			goto out;
 		}
-		rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]);
-		rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
+
+		rhine_skb_dma_nic_store(rp, &sd, i);
 	}
-	rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
+
+	rhine_reset_rbufs(rp);
+out:
+	return rc;
 }
 
 static void free_rbufs(struct net_device* dev)
@@ -1659,16 +1698,18 @@ static int rhine_open(struct net_device *dev)
 
 	rc = request_irq(rp->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev);
 	if (rc)
-		return rc;
+		goto out;
 
 	netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->irq);
 
 	rc = alloc_ring(dev);
-	if (rc) {
-		free_irq(rp->irq, dev);
-		return rc;
-	}
-	alloc_rbufs(dev);
+	if (rc < 0)
+		goto out_free_irq;
+
+	rc = alloc_rbufs(dev);
+	if (rc < 0)
+		goto out_free_ring;
+
 	alloc_tbufs(dev);
 	rhine_chip_reset(dev);
 	rhine_task_enable(rp);
@@ -1680,7 +1721,14 @@ static int rhine_open(struct net_device *dev)
 
 	netif_start_queue(dev);
 
-	return 0;
+out:
+	return rc;
+
+out_free_ring:
+	free_ring(dev);
+out_free_irq:
+	free_irq(rp->irq, dev);
+	goto out;
 }
 
 static void rhine_reset_task(struct work_struct *work)
@@ -1700,9 +1748,9 @@ static void rhine_reset_task(struct work_struct *work)
 
 	/* clear all descriptors */
 	free_tbufs(dev);
-	free_rbufs(dev);
 	alloc_tbufs(dev);
-	alloc_rbufs(dev);
+
+	rhine_reset_rbufs(rp);
 
 	/* Reinitialize the hardware. */
 	rhine_chip_reset(dev);
@@ -1730,6 +1778,11 @@ static void rhine_tx_timeout(struct net_device *dev)
 	schedule_work(&rp->reset_task);
 }
 
+static inline bool rhine_tx_queue_full(struct rhine_private *rp)
+{
+	return (rp->cur_tx - rp->dirty_tx) >= TX_QUEUE_LEN;
+}
+
 static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 				  struct net_device *dev)
 {
@@ -1800,11 +1853,17 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 
 	netdev_sent_queue(dev, skb->len);
 	/* lock eth irq */
-	wmb();
+	dma_wmb();
 	rp->tx_ring[entry].tx_status |= cpu_to_le32(DescOwn);
 	wmb();
 
 	rp->cur_tx++;
+	/*
+	 * Nobody wants cur_tx write to rot for ages after the NIC will have
+	 * seen the transmit request, especially as the transmit completion
+	 * handler could miss it.
+	 */
+	smp_wmb();
 
 	/* Non-x86 Todo: explicitly flush cache lines here. */
 
@@ -1817,8 +1876,14 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 	       ioaddr + ChipCmd1);
 	IOSYNC;
 
-	if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN)
+	/* dirty_tx may be pessimistically out-of-sync. See rhine_tx. */
+	if (rhine_tx_queue_full(rp)) {
 		netif_stop_queue(dev);
+		smp_rmb();
+		/* Rejuvenate. */
+		if (!rhine_tx_queue_full(rp))
+			netif_wake_queue(dev);
+	}
 
 	netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n",
 		  rp->cur_tx - 1, entry);
@@ -1866,13 +1931,24 @@ static void rhine_tx(struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	struct device *hwdev = dev->dev.parent;
-	int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
+	unsigned int dirty_tx = rp->dirty_tx;
+	unsigned int cur_tx;
 	struct sk_buff *skb;
 
+	/*
+	 * The race with rhine_start_tx does not matter here as long as the
+	 * driver enforces a value of cur_tx that was relevant when the
+	 * packet was scheduled to the network chipset.
+	 * Executive summary: smp_rmb() balances smp_wmb() in rhine_start_tx.
+	 */
+	smp_rmb();
+	cur_tx = rp->cur_tx;
 	/* find and cleanup dirty tx descriptors */
-	while (rp->dirty_tx != rp->cur_tx) {
-		txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
+	while (dirty_tx != cur_tx) {
+		unsigned int entry = dirty_tx % TX_RING_SIZE;
+		u32 txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
+
 		netif_dbg(rp, tx_done, dev, "Tx scavenge %d status %08x\n",
 			  entry, txstatus);
 		if (txstatus & DescOwn)
@@ -1921,12 +1997,23 @@ static void rhine_tx(struct net_device *dev)
 		pkts_compl++;
 		dev_consume_skb_any(skb);
 		rp->tx_skbuff[entry] = NULL;
-		entry = (++rp->dirty_tx) % TX_RING_SIZE;
+		dirty_tx++;
 	}
 
+	rp->dirty_tx = dirty_tx;
+	/* Pity we can't rely on the nearby BQL completion implicit barrier. */
+	smp_wmb();
+
 	netdev_completed_queue(dev, pkts_compl, bytes_compl);
-	if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
+
+	/* cur_tx may be optimistically out-of-sync. See rhine_start_tx. */
+	if (!rhine_tx_queue_full(rp) && netif_queue_stopped(dev)) {
 		netif_wake_queue(dev);
+		smp_rmb();
+		/* Rejuvenate. */
+		if (rhine_tx_queue_full(rp))
+			netif_stop_queue(dev);
+	}
 }
 
 /**
@@ -1944,22 +2031,33 @@ static inline u16 rhine_get_vlan_tci(struct sk_buff *skb, int data_size)
 	return be16_to_cpup((__be16 *)trailer);
 }
 
+static inline void rhine_rx_vlan_tag(struct sk_buff *skb, struct rx_desc *desc,
+				     int data_size)
+{
+	dma_rmb();
+	if (unlikely(desc->desc_length & cpu_to_le32(DescTag))) {
+		u16 vlan_tci;
+
+		vlan_tci = rhine_get_vlan_tci(skb, data_size);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+	}
+}
+
 /* Process up to limit frames from receive ring */
 static int rhine_rx(struct net_device *dev, int limit)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	struct device *hwdev = dev->dev.parent;
-	int count;
 	int entry = rp->cur_rx % RX_RING_SIZE;
+	int count;
 
 	netif_dbg(rp, rx_status, dev, "%s(), entry %d status %08x\n", __func__,
-		  entry, le32_to_cpu(rp->rx_head_desc->rx_status));
+		  entry, le32_to_cpu(rp->rx_ring[entry].rx_status));
 
 	/* If EOP is set on the next entry, it's a new packet. Send it up. */
 	for (count = 0; count < limit; ++count) {
-		struct rx_desc *desc = rp->rx_head_desc;
+		struct rx_desc *desc = rp->rx_ring + entry;
 		u32 desc_status = le32_to_cpu(desc->rx_status);
-		u32 desc_length = le32_to_cpu(desc->desc_length);
 		int data_size = desc_status >> 16;
 
 		if (desc_status & DescOwn)
@@ -1975,10 +2073,6 @@ static int rhine_rx(struct net_device *dev, int limit)
 	"entry %#x length %d status %08x!\n",
 					    entry, data_size,
 					    desc_status);
-				netdev_warn(dev,
-					    "Oversized Ethernet frame %p vs %p\n",
-					    rp->rx_head_desc,
-					    &rp->rx_ring[entry]);
 				dev->stats.rx_length_errors++;
 			} else if (desc_status & RxErr) {
 				/* There was a error. */
@@ -2000,16 +2094,17 @@ static int rhine_rx(struct net_device *dev, int limit)
 				}
 			}
 		} else {
-			struct sk_buff *skb = NULL;
 			/* Length should omit the CRC */
 			int pkt_len = data_size - 4;
-			u16 vlan_tci = 0;
+			struct sk_buff *skb;
 
 			/* Check if the packet is long enough to accept without
 			   copying to a minimally-sized skbuff. */
-			if (pkt_len < rx_copybreak)
+			if (pkt_len < rx_copybreak) {
 				skb = netdev_alloc_skb_ip_align(dev, pkt_len);
-			if (skb) {
+				if (unlikely(!skb))
+					goto drop;
+
 				dma_sync_single_for_cpu(hwdev,
 							rp->rx_skbuff_dma[entry],
 							rp->rx_buf_sz,
@@ -2018,32 +2113,31 @@ static int rhine_rx(struct net_device *dev, int limit)
 				skb_copy_to_linear_data(skb,
 						 rp->rx_skbuff[entry]->data,
 						 pkt_len);
-				skb_put(skb, pkt_len);
+
 				dma_sync_single_for_device(hwdev,
 							   rp->rx_skbuff_dma[entry],
 							   rp->rx_buf_sz,
 							   DMA_FROM_DEVICE);
 			} else {
+				struct rhine_skb_dma sd;
+
+				if (unlikely(rhine_skb_dma_init(dev, &sd) < 0))
+					goto drop;
+
 				skb = rp->rx_skbuff[entry];
-				if (skb == NULL) {
-					netdev_err(dev, "Inconsistent Rx descriptor chain\n");
-					break;
-				}
-				rp->rx_skbuff[entry] = NULL;
-				skb_put(skb, pkt_len);
+
 				dma_unmap_single(hwdev,
 						 rp->rx_skbuff_dma[entry],
 						 rp->rx_buf_sz,
 						 DMA_FROM_DEVICE);
+				rhine_skb_dma_nic_store(rp, &sd, entry);
 			}
 
-			if (unlikely(desc_length & DescTag))
-				vlan_tci = rhine_get_vlan_tci(skb, data_size);
-
+			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, dev);
 
-			if (unlikely(desc_length & DescTag))
-				__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+			rhine_rx_vlan_tag(skb, desc, data_size);
+
 			netif_receive_skb(skb);
 
 			u64_stats_update_begin(&rp->rx_stats.syncp);
@@ -2051,35 +2145,16 @@ static int rhine_rx(struct net_device *dev, int limit)
 			rp->rx_stats.packets++;
 			u64_stats_update_end(&rp->rx_stats.syncp);
 		}
+give_descriptor_to_nic:
+		desc->rx_status = cpu_to_le32(DescOwn);
 		entry = (++rp->cur_rx) % RX_RING_SIZE;
-		rp->rx_head_desc = &rp->rx_ring[entry];
-	}
-
-	/* Refill the Rx ring buffers. */
-	for (; rp->cur_rx - rp->dirty_rx > 0; rp->dirty_rx++) {
-		struct sk_buff *skb;
-		entry = rp->dirty_rx % RX_RING_SIZE;
-		if (rp->rx_skbuff[entry] == NULL) {
-			skb = netdev_alloc_skb(dev, rp->rx_buf_sz);
-			rp->rx_skbuff[entry] = skb;
-			if (skb == NULL)
-				break;	/* Better luck next round. */
-			rp->rx_skbuff_dma[entry] =
-				dma_map_single(hwdev, skb->data,
-					       rp->rx_buf_sz,
-					       DMA_FROM_DEVICE);
-			if (dma_mapping_error(hwdev,
-					      rp->rx_skbuff_dma[entry])) {
-				dev_kfree_skb(skb);
-				rp->rx_skbuff_dma[entry] = 0;
-				break;
-			}
-			rp->rx_ring[entry].addr = cpu_to_le32(rp->rx_skbuff_dma[entry]);
-		}
-		rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
 	}
 
 	return count;
+
+drop:
+	dev->stats.rx_dropped++;
+	goto give_descriptor_to_nic;
 }
 
 static void rhine_restart_tx(struct net_device *dev) {
@@ -2484,9 +2559,8 @@ static int rhine_resume(struct device *device)
 	enable_mmio(rp->pioaddr, rp->quirks);
 	rhine_power_init(dev);
 	free_tbufs(dev);
-	free_rbufs(dev);
 	alloc_tbufs(dev);
-	alloc_rbufs(dev);
+	rhine_reset_rbufs(rp);
 	rhine_task_enable(rp);
 	spin_lock_bh(&rp->lock);
 	init_registers(dev);
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-06-24 16:49:49 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-06-24 16:49:49 -0700
commit	e0456717e483bb8a9431b80a5bdc99a928b9b003 (patch)
tree	5eb5add2bafd1f20326d70f5cb3b711d00a40b10 /drivers/net/ethernet/via/via-rhine.c
parent	98ec21a01896751b673b6c731ca8881daa8b2c6d (diff)
parent	1ea2d020ba477cb7011a7174e8501a9e04a325d4 (diff)