summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/Kconfig8
-rw-r--r--include/linux/etherdevice.h3
-rw-r--r--include/linux/netdevice.h80
-rw-r--r--include/linux/skbuff.h25
-rw-r--r--net/core/dev.c36
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/pktgen.c10
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ethernet/eth.c9
-rw-r--r--net/sched/sch_teql.c6
10 files changed, 158 insertions, 30 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c251cca295c1..d4e39ff1545b 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -25,6 +25,14 @@ menuconfig NETDEVICES
# that for each of the symbols.
if NETDEVICES
+config NETDEVICES_MULTIQUEUE
+ bool "Netdevice multiple hardware queue support"
+ ---help---
+ Say Y here if you want to allow the network stack to use multiple
+ hardware TX queues on an ethernet device.
+
+ Most people will say N here.
+
config IFB
tristate "Intermediate Functional Block support"
depends on NET_CLS_ACT
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f48eb89efd0f..6cdb97365e47 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -39,7 +39,8 @@ extern void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev
extern int eth_header_cache(struct neighbour *neigh,
struct hh_cache *hh);
-extern struct net_device *alloc_etherdev(int sizeof_priv);
+extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
+#define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
/**
* is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c0cc19edfb2..9817821729c4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -108,6 +108,14 @@ struct wireless_dev;
#define MAX_HEADER (LL_MAX_HEADER + 48)
#endif
+struct net_device_subqueue
+{
+ /* Give a control state for each queue. This struct may contain
+ * per-queue locks in the future.
+ */
+ unsigned long state;
+};
+
/*
* Network device statistics. Akin to the 2.0 ether stats but
* with byte counters.
@@ -331,6 +339,7 @@ struct net_device
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_GSO 2048 /* Enable software GSO. */
#define NETIF_F_LLTX 4096 /* LockLess TX */
+#define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */
/* Segmentation offload features */
#define NETIF_F_GSO_SHIFT 16
@@ -557,6 +566,10 @@ struct net_device
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
+
+ /* The TX queue control structures */
+ unsigned int egress_subqueue_count;
+ struct net_device_subqueue egress_subqueue[0];
};
#define to_net_dev(d) container_of(d, struct net_device, dev)
@@ -565,9 +578,7 @@ struct net_device
static inline void *netdev_priv(const struct net_device *dev)
{
- return (char *)dev + ((sizeof(struct net_device)
- + NETDEV_ALIGN_CONST)
- & ~NETDEV_ALIGN_CONST);
+ return dev->priv;
}
#define SET_MODULE_OWNER(dev) do { } while (0)
@@ -719,6 +730,62 @@ static inline int netif_running(const struct net_device *dev)
return test_bit(__LINK_STATE_START, &dev->state);
}
+/*
+ * Routines to manage the subqueues on a device. We only need start
+ * stop, and a check if it's stopped. All other device management is
+ * done at the overall netdevice level.
+ * Also test the device if we're multiqueue.
+ */
+static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ clear_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+#endif
+}
+
+static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap())
+ return;
+#endif
+ set_bit(__LINK_STATE_XOFF, &dev->egress_subqueue[queue_index].state);
+#endif
+}
+
+static inline int netif_subqueue_stopped(const struct net_device *dev,
+ u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ return test_bit(__LINK_STATE_XOFF,
+ &dev->egress_subqueue[queue_index].state);
+#else
+ return 0;
+#endif
+}
+
+static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+#ifdef CONFIG_NETPOLL_TRAP
+ if (netpoll_trap())
+ return;
+#endif
+ if (test_and_clear_bit(__LINK_STATE_XOFF,
+ &dev->egress_subqueue[queue_index].state))
+ __netif_schedule(dev);
+#endif
+}
+
+static inline int netif_is_multiqueue(const struct net_device *dev)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ return (!!(NETIF_F_MULTI_QUEUE & dev->features));
+#else
+ return 0;
+#endif
+}
/* Use this variant when it is known for sure that it
* is executing from interrupt context.
@@ -1009,8 +1076,11 @@ static inline void netif_tx_disable(struct net_device *dev)
extern void ether_setup(struct net_device *dev);
/* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev(int sizeof_priv, const char *name,
- void (*setup)(struct net_device *));
+extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+ void (*setup)(struct net_device *),
+ unsigned int queue_count);
+#define alloc_netdev(sizeof_priv, name, setup) \
+ alloc_netdev_mq(sizeof_priv, name, setup, 1)
extern int register_netdev(struct net_device *dev);
extern void unregister_netdev(struct net_device *dev);
/* Functions used for secondary unicast and multicast support */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 881fe80f01d0..2d6a14f5f2f1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -196,7 +196,6 @@ typedef unsigned char *sk_buff_data_t;
* @sk: Socket we are owned by
* @tstamp: Time we arrived
* @dev: Device we arrived on/are leaving by
- * @iif: ifindex of device we arrived on
* @transport_header: Transport layer header
* @network_header: Network layer header
* @mac_header: Link layer header
@@ -231,6 +230,8 @@ typedef unsigned char *sk_buff_data_t;
* @nfctinfo: Relationship of this skb to the connection
* @nfct_reasm: netfilter conntrack re-assembly pointer
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
+ * @iif: ifindex of device we arrived on
+ * @queue_mapping: Queue mapping for multiqueue devices
* @tc_index: Traffic control index
* @tc_verd: traffic control verdict
* @dma_cookie: a cookie to one of several possible DMA operations
@@ -246,8 +247,6 @@ struct sk_buff {
struct sock *sk;
ktime_t tstamp;
struct net_device *dev;
- int iif;
- /* 4 byte hole on 64 bit*/
struct dst_entry *dst;
struct sec_path *sp;
@@ -290,12 +289,18 @@ struct sk_buff {
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
+
+ int iif;
+ __u16 queue_mapping;
+
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#ifdef CONFIG_NET_CLS_ACT
__u16 tc_verd; /* traffic control verdict */
#endif
#endif
+ /* 2 byte hole */
+
#ifdef CONFIG_NET_DMA
dma_cookie_t dma_cookie;
#endif
@@ -1725,6 +1730,20 @@ static inline void skb_init_secmark(struct sk_buff *skb)
{ }
#endif
+static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ skb->queue_mapping = queue_mapping;
+#endif
+}
+
+static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from)
+{
+#ifdef CONFIG_NETDEVICES_MULTIQUEUE
+ to->queue_mapping = from->queue_mapping;
+#endif
+}
+
static inline int skb_is_gso(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_size;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6dce9d2d46f2..7ddf66d0ad5e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1429,7 +1429,9 @@ gso:
skb->next = nskb;
return rc;
}
- if (unlikely(netif_queue_stopped(dev) && skb->next))
+ if (unlikely((netif_queue_stopped(dev) ||
+ netif_subqueue_stopped(dev, skb->queue_mapping)) &&
+ skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -1547,6 +1549,8 @@ gso:
spin_lock(&dev->queue_lock);
q = dev->qdisc;
if (q->enqueue) {
+ /* reset queue_mapping to zero */
+ skb->queue_mapping = 0;
rc = q->enqueue(skb, q);
qdisc_run(dev);
spin_unlock(&dev->queue_lock);
@@ -1576,7 +1580,8 @@ gso:
HARD_TX_LOCK(dev, cpu);
- if (!netif_queue_stopped(dev)) {
+ if (!netif_queue_stopped(dev) &&
+ !netif_subqueue_stopped(dev, skb->queue_mapping)) {
rc = 0;
if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
@@ -3539,16 +3544,18 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
}
/**
- * alloc_netdev - allocate network device
+ * alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
+ * @queue_count: the number of subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
- * and performs basic initialization.
+ * and performs basic initialization. Also allocates subquue structs
+ * for each queue on the device at the end of the netdevice.
*/
-struct net_device *alloc_netdev(int sizeof_priv, const char *name,
- void (*setup)(struct net_device *))
+struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+ void (*setup)(struct net_device *), unsigned int queue_count)
{
void *p;
struct net_device *dev;
@@ -3557,7 +3564,9 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
BUG_ON(strlen(name) >= sizeof(dev->name));
/* ensure 32-byte alignment of both the device and private area */
- alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
+ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
+ (sizeof(struct net_device_subqueue) * queue_count)) &
+ ~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
p = kzalloc(alloc_size, GFP_KERNEL);
@@ -3570,15 +3579,22 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
- if (sizeof_priv)
- dev->priv = netdev_priv(dev);
+ if (sizeof_priv) {
+ dev->priv = ((char *)dev +
+ ((sizeof(struct net_device) +
+ (sizeof(struct net_device_subqueue) *
+ queue_count) + NETDEV_ALIGN_CONST)
+ & ~NETDEV_ALIGN_CONST));
+ }
+
+ dev->egress_subqueue_count = queue_count;
dev->get_stats = internal_stats;
setup(dev);
strcpy(dev->name, name);
return dev;
}
-EXPORT_SYMBOL(alloc_netdev);
+EXPORT_SYMBOL(alloc_netdev_mq);
/**
* free_netdev - free network device
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a0efdd7a6b37..4b06d1936375 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -66,8 +66,9 @@ static void queue_process(struct work_struct *work)
local_irq_save(flags);
netif_tx_lock(dev);
- if (netif_queue_stopped(dev) ||
- dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ if ((netif_queue_stopped(dev) ||
+ netif_subqueue_stopped(dev, skb->queue_mapping)) ||
+ dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
netif_tx_unlock(dev);
local_irq_restore(flags);
@@ -254,7 +255,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (netif_tx_trylock(dev)) {
- if (!netif_queue_stopped(dev))
+ if (!netif_queue_stopped(dev) &&
+ !netif_subqueue_stopped(dev, skb->queue_mapping))
status = dev->hard_start_xmit(skb, dev);
netif_tx_unlock(dev);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9cd3a1cb60ef..dffe067e7a7b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3139,7 +3139,9 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- if (netif_queue_stopped(odev) || need_resched()) {
+ if ((netif_queue_stopped(odev) ||
+ netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) ||
+ need_resched()) {
idle_start = getCurUs();
if (!netif_running(odev)) {
@@ -3154,7 +3156,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->idle_acc += getCurUs() - idle_start;
- if (netif_queue_stopped(odev)) {
+ if (netif_queue_stopped(odev) ||
+ netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) {
pkt_dev->next_tx_us = getCurUs(); /* TODO */
pkt_dev->next_tx_ns = 0;
goto out; /* Try the next interface */
@@ -3181,7 +3184,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
netif_tx_lock_bh(odev);
- if (!netif_queue_stopped(odev)) {
+ if (!netif_queue_stopped(odev) &&
+ !netif_subqueue_stopped(odev, pkt_dev->skb->queue_mapping)) {
atomic_inc(&(pkt_dev->skb->users));
retry_now:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c989c3a0f907..6a41b96b3d37 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -419,6 +419,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->nohdr = 0;
C(pkt_type);
C(ip_summed);
+ skb_copy_queue_mapping(n, skb);
C(priority);
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
C(ipvs_property);
@@ -460,6 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
new->sk = NULL;
new->dev = old->dev;
+ skb_copy_queue_mapping(new, old);
new->priority = old->priority;
new->protocol = old->protocol;
new->dst = dst_clone(old->dst);
@@ -1932,6 +1934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
tail = nskb;
nskb->dev = skb->dev;
+ skb_copy_queue_mapping(nskb, skb);
nskb->priority = skb->priority;
nskb->protocol = skb->protocol;
nskb->dst = dst_clone(skb->dst);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 0ac2524f3b68..1387e5411f77 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -316,9 +316,10 @@ void ether_setup(struct net_device *dev)
EXPORT_SYMBOL(ether_setup);
/**
- * alloc_etherdev - Allocates and sets up an Ethernet device
+ * alloc_etherdev_mq - Allocates and sets up an Ethernet device
* @sizeof_priv: Size of additional driver-private structure to be allocated
* for this Ethernet device
+ * @queue_count: The number of queues this device has.
*
* Fill in the fields of the device structure with Ethernet-generic
* values. Basically does everything except registering the device.
@@ -328,8 +329,8 @@ EXPORT_SYMBOL(ether_setup);
* this private data area.
*/
-struct net_device *alloc_etherdev(int sizeof_priv)
+struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
{
- return alloc_netdev(sizeof_priv, "eth%d", ether_setup);
+ return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
}
-EXPORT_SYMBOL(alloc_etherdev);
+EXPORT_SYMBOL(alloc_etherdev_mq);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index f05ad9a30b4c..dfe7e4520988 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -277,6 +277,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
int busy;
int nores;
int len = skb->len;
+ int subq = skb->queue_mapping;
struct sk_buff *skb_res = NULL;
start = master->slaves;
@@ -293,7 +294,9 @@ restart:
if (slave->qdisc_sleeping != q)
continue;
- if (netif_queue_stopped(slave) || ! netif_running(slave)) {
+ if (netif_queue_stopped(slave) ||
+ netif_subqueue_stopped(slave, subq) ||
+ !netif_running(slave)) {
busy = 1;
continue;
}
@@ -302,6 +305,7 @@ restart:
case 0:
if (netif_tx_trylock(slave)) {
if (!netif_queue_stopped(slave) &&
+ !netif_subqueue_stopped(slave, subq) &&
slave->hard_start_xmit(skb, slave) == 0) {
netif_tx_unlock(slave);
master->slaves = NEXT_SLAVE(q);