summaryrefslogtreecommitdiff
path: root/net/sched/sch_generic.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/sch_generic.c')
-rw-r--r--net/sched/sch_generic.c137
1 files changed, 76 insertions, 61 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5d7e23f4cc0e..852e603c1755 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -24,9 +24,11 @@
#include <linux/if_vlan.h>
#include <linux/skb_array.h>
#include <linux/if_macvlan.h>
+#include <linux/bpf.h>
#include <net/sch_generic.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/hotdata.h>
#include <trace/events/qdisc.h>
#include <trace/events/net.h>
#include <net/xfrm.h>
@@ -178,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
static void try_bulk_dequeue_skb(struct Qdisc *q,
struct sk_buff *skb,
const struct netdev_queue *txq,
- int *packets)
+ int *packets, int budget)
{
int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
+ int cnt = 0;
while (bytelimit > 0) {
struct sk_buff *nskb = q->dequeue(q);
@@ -191,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
bytelimit -= nskb->len; /* covers GSO len */
skb->next = nskb;
skb = nskb;
- (*packets)++; /* GSO counts as one pkt */
+ if (++cnt >= budget)
+ break;
}
+ (*packets) += cnt;
skb_mark_not_on_list(skb);
}
@@ -226,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
* A requeued skb (via q->gso_skb) can also be a SKB list.
*/
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
- int *packets)
+ int *packets, int budget)
{
const struct netdev_queue *txq = q->dev_queue;
struct sk_buff *skb = NULL;
@@ -293,7 +298,7 @@ validate:
if (skb) {
bulk:
if (qdisc_may_bulk(q))
- try_bulk_dequeue_skb(q, skb, txq, packets);
+ try_bulk_dequeue_skb(q, skb, txq, packets, budget);
else
try_bulk_dequeue_skb_slow(q, skb, packets);
}
@@ -385,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
* >0 - queue is not empty.
*
*/
-static inline bool qdisc_restart(struct Qdisc *q, int *packets)
+static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget)
{
spinlock_t *root_lock = NULL;
struct netdev_queue *txq;
@@ -394,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
bool validate;
/* Dequeue packet */
- skb = dequeue_skb(q, &validate, packets);
+ skb = dequeue_skb(q, &validate, packets, budget);
if (unlikely(!skb))
return false;
@@ -409,10 +414,10 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = READ_ONCE(dev_tx_weight);
+ int quota = READ_ONCE(net_hotdata.dev_tx_weight);
int packets;
- while (qdisc_restart(q, &packets)) {
+ while (qdisc_restart(q, &packets, quota)) {
quota -= packets;
if (quota <= 0) {
if (q->flags & TCQ_F_NOLOCK)
@@ -494,7 +499,7 @@ EXPORT_SYMBOL(netif_tx_unlock);
static void dev_watchdog(struct timer_list *t)
{
- struct net_device *dev = from_timer(dev, t, watchdog_timer);
+ struct net_device *dev = timer_container_of(dev, t, watchdog_timer);
bool release = true;
spin_lock(&dev->tx_global_lock);
@@ -505,31 +510,41 @@ static void dev_watchdog(struct timer_list *t)
unsigned int timedout_ms = 0;
unsigned int i;
unsigned long trans_start;
+ unsigned long oldest_start = jiffies;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
+ if (!netif_xmit_stopped(txq))
+ continue;
+
+ /* Paired with WRITE_ONCE() + smp_mb...() in
+ * netdev_tx_sent_queue() and netif_tx_stop_queue().
+ */
+ smp_mb();
trans_start = READ_ONCE(txq->trans_start);
- if (netif_xmit_stopped(txq) &&
- time_after(jiffies, (trans_start +
- dev->watchdog_timeo))) {
+
+ if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
timedout_ms = jiffies_to_msecs(jiffies - trans_start);
atomic_long_inc(&txq->trans_timeout);
break;
}
+ if (time_after(oldest_start, trans_start))
+ oldest_start = trans_start;
}
if (unlikely(timedout_ms)) {
trace_net_dev_xmit_timeout(dev, i);
- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
- dev->name, netdev_drivername(dev), i, timedout_ms);
+ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
+ raw_smp_processor_id(),
+ i, timedout_ms);
netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies +
+ round_jiffies(oldest_start +
dev->watchdog_timeo)))
release = false;
}
@@ -540,28 +555,23 @@ static void dev_watchdog(struct timer_list *t)
netdev_put(dev, &dev->watchdog_dev_tracker);
}
-void __netdev_watchdog_up(struct net_device *dev)
-{
- if (dev->netdev_ops->ndo_tx_timeout) {
- if (dev->watchdog_timeo <= 0)
- dev->watchdog_timeo = 5*HZ;
- if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies + dev->watchdog_timeo)))
- netdev_hold(dev, &dev->watchdog_dev_tracker,
- GFP_ATOMIC);
- }
-}
-EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
-
-static void dev_watchdog_up(struct net_device *dev)
+void netdev_watchdog_up(struct net_device *dev)
{
- __netdev_watchdog_up(dev);
+ if (!dev->netdev_ops->ndo_tx_timeout)
+ return;
+ if (dev->watchdog_timeo <= 0)
+ dev->watchdog_timeo = 5*HZ;
+ if (!mod_timer(&dev->watchdog_timer,
+ round_jiffies(jiffies + dev->watchdog_timeo)))
+ netdev_hold(dev, &dev->watchdog_dev_tracker,
+ GFP_ATOMIC);
}
+EXPORT_SYMBOL_GPL(netdev_watchdog_up);
-static void dev_watchdog_down(struct net_device *dev)
+static void netdev_watchdog_down(struct net_device *dev)
{
netif_tx_lock_bh(dev);
- if (del_timer(&dev->watchdog_timer))
+ if (timer_delete(&dev->watchdog_timer))
netdev_put(dev, &dev->watchdog_dev_tracker);
netif_tx_unlock_bh(dev);
}
@@ -580,7 +590,7 @@ void netif_carrier_on(struct net_device *dev)
atomic_inc(&dev->carrier_up_count);
linkwatch_fire_event(dev);
if (netif_running(dev))
- __netdev_watchdog_up(dev);
+ netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(netif_carrier_on);
@@ -628,6 +638,7 @@ EXPORT_SYMBOL_GPL(netif_carrier_event);
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
+ dev_core_stats_tx_dropped_inc(skb->dev);
__qdisc_drop(skb, to_free);
return NET_XMIT_CN;
}
@@ -658,7 +669,6 @@ struct Qdisc noop_qdisc = {
.ops = &noop_qdisc_ops,
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
- .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
.gso_skb = {
.next = (struct sk_buff *)&noop_qdisc.gso_skb,
.prev = (struct sk_buff *)&noop_qdisc.gso_skb,
@@ -694,9 +704,10 @@ struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
-static const u8 prio2band[TC_PRIO_MAX + 1] = {
- 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+const u8 sch_default_prio2band[TC_PRIO_MAX + 1] = {
+ 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1
};
+EXPORT_SYMBOL(sch_default_prio2band);
/* 3-band FIFO queue: old style, but should be a bit faster than
generic prio+fifo combination.
@@ -721,7 +732,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
- int band = prio2band[skb->priority & TC_PRIO_MAX];
+ int band = sch_default_prio2band[skb->priority & TC_PRIO_MAX];
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct skb_array *q = band2list(priv, band);
unsigned int pkt_len = qdisc_pkt_len(skb);
@@ -730,6 +741,8 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
err = skb_array_produce(q, skb);
if (unlikely(err)) {
+ tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT);
+
if (qdisc_is_percpu_stats(qdisc))
return qdisc_drop_cpu(skb, qdisc, to_free);
else
@@ -830,7 +843,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
- memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
+ memcpy(&opt.priomap, sch_default_prio2band, TC_PRIO_MAX + 1);
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
return skb->len;
@@ -897,8 +910,8 @@ static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
bands[prio] = q;
}
- return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
- GFP_KERNEL);
+ return skb_array_resize_multiple_bh(bands, PFIFO_FAST_BANDS, new_len,
+ GFP_KERNEL);
}
struct Qdisc_ops pfifo_fast_ops __read_mostly = {
@@ -942,7 +955,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
gnet_stats_basic_sync_init(&sch->bstats);
+ lockdep_register_key(&sch->root_lock_key);
spin_lock_init(&sch->q.lock);
+ lockdep_set_class(&sch->q.lock, &sch->root_lock_key);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
@@ -957,10 +972,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
}
- spin_lock_init(&sch->busylock);
- lockdep_set_class(&sch->busylock,
- dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
lockdep_set_class(&sch->seqlock,
@@ -976,6 +987,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
return sch;
errout1:
+ lockdep_unregister_key(&sch->root_lock_key);
kfree(sch);
errout:
return ERR_PTR(err);
@@ -988,14 +1000,14 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
{
struct Qdisc *sch;
- if (!try_module_get(ops->owner)) {
+ if (!bpf_try_module_get(ops, ops->owner)) {
NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
return NULL;
}
sch = qdisc_alloc(dev_queue, ops, extack);
if (IS_ERR(sch)) {
- module_put(ops->owner);
+ bpf_module_put(ops, ops->owner);
return NULL;
}
sch->parent = parentid;
@@ -1049,6 +1061,7 @@ static void qdisc_free_cb(struct rcu_head *head)
static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct net_device *dev = qdisc_dev(qdisc);
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -1059,11 +1072,13 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
- module_put(ops->owner);
- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
+ lockdep_unregister_key(&qdisc->root_lock_key);
+ bpf_module_put(ops, ops->owner);
+ netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
@@ -1246,7 +1261,7 @@ void dev_activate(struct net_device *dev)
if (need_watchdog) {
netif_trans_update(dev);
- dev_watchdog_up(dev);
+ netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(dev_activate);
@@ -1261,15 +1276,17 @@ static void qdisc_deactivate(struct Qdisc *qdisc)
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
- void *_qdisc_default)
+ void *_sync_needed)
{
- struct Qdisc *qdisc_default = _qdisc_default;
+ bool *sync_needed = _sync_needed;
struct Qdisc *qdisc;
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
+ if (qdisc->enqueue)
+ *sync_needed = true;
qdisc_deactivate(qdisc);
- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
}
}
@@ -1336,24 +1353,22 @@ static bool some_qdisc_is_busy(struct net_device *dev)
*/
void dev_deactivate_many(struct list_head *head)
{
+ bool sync_needed = false;
struct net_device *dev;
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
- &noop_qdisc);
+ &sync_needed);
if (dev_ingress_queue(dev))
dev_deactivate_queue(dev, dev_ingress_queue(dev),
- &noop_qdisc);
+ &sync_needed);
- dev_watchdog_down(dev);
+ netdev_watchdog_down(dev);
}
- /* Wait for outstanding qdisc-less dev_queue_xmit calls or
- * outstanding qdisc enqueuing calls.
- * This is avoided if all devices are in dismantle phase :
- * Caller will call synchronize_net() for us
- */
- synchronize_net();
+ /* Wait for outstanding qdisc enqueuing calls. */
+ if (sync_needed)
+ synchronize_net();
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);