summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c220
1 files changed, 173 insertions, 47 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index aa82f9ab6a36..6778a9999d52 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
#include "net-sysfs.h"
@@ -468,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack);
*/
void dev_add_offload(struct packet_offload *po)
{
- struct list_head *head = &offload_base;
+ struct packet_offload *elem;
spin_lock(&offload_lock);
- list_add_rcu(&po->list, head);
+ list_for_each_entry(elem, &offload_base, list) {
+ if (po->priority < elem->priority)
+ break;
+ }
+ list_add_rcu(&po->list, elem->list.prev);
spin_unlock(&offload_lock);
}
EXPORT_SYMBOL(dev_add_offload);
@@ -1630,7 +1635,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly;
void net_inc_ingress_queue(void)
@@ -2343,6 +2348,34 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+ unsigned int num_tx_queues)
+{
+ u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
+
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ while (unlikely(hash >= num_tx_queues))
+ hash -= num_tx_queues;
+ return hash;
+ }
+
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
+ return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
@@ -2901,6 +2934,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dev_loopback_xmit);
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[skb->sender_cpu - 1]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else
+ queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+ map->len)];
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ int queue_index = sk_tx_queue_get(sk);
+
+ if (queue_index < 0 || skb->ooo_okay ||
+ queue_index >= dev->real_num_tx_queues) {
+ int new_index = get_xps_queue(dev, skb);
+ if (new_index < 0)
+ new_index = skb_tx_hash(dev, skb);
+
+ if (queue_index != new_index && sk &&
+ rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, new_index);
+
+ queue_index = new_index;
+ }
+
+ return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb,
+ void *accel_priv)
+{
+ int queue_index = 0;
+
+#ifdef CONFIG_XPS
+ if (skb->sender_cpu == 0)
+ skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+ if (dev->real_num_tx_queues != 1) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ if (ops->ndo_select_queue)
+ queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ __netdev_pick_tx);
+ else
+ queue_index = __netdev_pick_tx(dev, skb);
+
+ if (!accel_priv)
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+}
+
/**
* __dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -3513,66 +3624,47 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-#ifdef CONFIG_NET_CLS_ACT
-/* TODO: Maybe we should just force sch_ingress to be compiled in
- * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
- * a compare and 2 stores extra right now if we dont have it on
- * but have CONFIG_NET_CLS_ACT
- * NOTE: This doesn't stop any functionality; if you dont have
- * the ingress scheduler, you just can't add policies on ingress.
- *
- */
-static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
-{
- struct net_device *dev = skb->dev;
- u32 ttl = G_TC_RTTL(skb->tc_verd);
- int result = TC_ACT_OK;
- struct Qdisc *q;
-
- if (unlikely(MAX_RED_LOOP < ttl++)) {
- net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
- skb->skb_iif, dev->ifindex);
- return TC_ACT_SHOT;
- }
-
- skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
-
- q = rcu_dereference(rxq->qdisc);
- if (q != &noop_qdisc) {
- spin_lock(qdisc_lock(q));
- if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
- result = qdisc_enqueue_root(skb, q);
- spin_unlock(qdisc_lock(q));
- }
-
- return result;
-}
-
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+#ifdef CONFIG_NET_CLS_ACT
+ struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct tcf_result cl_res;
- if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
+ /* If there's at least one ingress present somewhere (so
+ * we get here via enabled static key), remaining devices
+ * that are not configured with an ingress qdisc will bail
+ * out here.
+ */
+ if (!cl)
return skb;
-
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- switch (ing_filter(skb, rxq)) {
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+ qdisc_bstats_update_cpu(cl->q, skb);
+
+ switch (tc_classify(skb, cl, &cl_res)) {
+ case TC_ACT_OK:
+ case TC_ACT_RECLASSIFY:
+ skb->tc_index = TC_H_MIN(cl_res.classid);
+ break;
case TC_ACT_SHOT:
+ qdisc_qstats_drop_cpu(cl->q);
case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ default:
+ break;
}
-
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-#endif
/**
* netdev_rx_handler_register - register receive handler
@@ -3645,6 +3737,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
}
}
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+ int *ret, struct net_device *orig_dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (nf_hook_ingress_active(skb)) {
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ return nf_hook_ingress(skb);
+ }
+#endif /* CONFIG_NETFILTER_INGRESS */
+ return 0;
+}
+
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
{
struct packet_type *ptype, *pt_prev;
@@ -3704,13 +3812,17 @@ another_round:
}
skip_taps:
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto unlock;
- }
+ if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+ goto unlock;
+ }
+#endif
+#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
ncls:
#endif
@@ -6313,6 +6425,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
return 0;
}
+void netif_tx_stop_all_queues(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+ netif_tx_stop_queue(txq);
+ }
+}
+EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -6862,6 +6985,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
+
+ nf_hook_ingress_init(dev);
+
return dev;
free_all: