diff options
Diffstat (limited to 'net/sched/sch_htb.c')
| -rw-r--r-- | net/sched/sch_htb.c | 1122 |
1 files changed, 845 insertions, 277 deletions
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c2124ea29f45..b5e40c51655a 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_htb.c Hierarchical token bucket, feed tree version * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Martin Devera, <devik@cdi.cz> * * Credits (in time order) for older HTB versions: @@ -40,6 +36,7 @@ #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* HTB algorithm. Author: devik@cdi.cz @@ -55,7 +52,7 @@ */ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ -#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ +#define HTB_VER 0x30011 /* major must be matched with number supplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" @@ -100,24 +97,23 @@ struct htb_class { struct psched_ratecfg ceil; s64 buffer, cbuffer;/* token bucket depth/rate */ s64 mbuffer; /* max wait time */ - int prio; /* these two are used only by leaves... */ + u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ - struct tcf_proto *filter_list; /* class attached filters */ - int filter_cnt; - int refcnt; /* usage count of this class */ + struct tcf_proto __rcu *filter_list; /* class attached filters */ + struct tcf_block *block; int level; /* our level (see above) */ unsigned int children; struct htb_class *parent; /* parent class */ - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; /* * Written often fields */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_queue qstats; + struct gnet_stats_basic_sync bstats; + struct gnet_stats_basic_sync bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -126,20 +122,23 @@ struct htb_class { union { struct htb_class_leaf { - struct list_head drop_list; int deficit[TC_HTB_MAXDEPTH]; struct Qdisc *q; + struct netdev_queue *offload_queue; } leaf; struct htb_class_inner { struct htb_prio clprio[TC_HTB_NUMPRIO]; } inner; - } un; + }; s64 pq_key; int prio_activity; /* for which prios are we active */ enum htb_cmode cmode; /* current mode of the class */ struct rb_node pq_node; /* node for event queue */ struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ + + unsigned int drops ____cacheline_aligned_in_smp; + unsigned int overlimits; }; struct htb_level { @@ -153,7 +152,8 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ /* filters for qdisc itself */ - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; + struct tcf_block *block; #define HTB_WARN_TOOMANYEVENTS 0x1 unsigned int warned; /* only one warning */ @@ -161,13 +161,13 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - long direct_pkts; + struct qdisc_skb_head direct_queue; + u32 direct_pkts; + u32 overlimits; struct qdisc_watchdog watchdog; s64 now; /* cached dequeue time */ - struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ /* time of nearest event per level (row) */ s64 near_ev_cache[TC_HTB_MAXDEPTH]; @@ -175,6 +175,11 @@ struct htb_sched { int row_mask[TC_HTB_MAXDEPTH]; struct htb_level hlevel[TC_HTB_MAXDEPTH]; + + struct Qdisc **direct_qdiscs; + unsigned int num_direct_qdiscs; + + bool offload; }; /* find class in global hash table using given handle */ @@ -189,8 +194,18 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) return container_of(clc, struct htb_class, common); } +static unsigned long htb_search(struct Qdisc *sch, u32 handle) +{ + return (unsigned long)htb_find(handle, sch); +} + +#define HTB_DIRECT ((struct htb_class *)-1L) + /** * htb_classify - classify a packet into class + * @skb: the socket buffer + * @sch: the active queue discipline + * @qerr: pointer for returned status code * * It returns NULL if the packet should be dropped or -1 if the packet * should be passed directly thru. In all other cases leaf class is returned. @@ -201,8 +216,6 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful * then finish and return direct queue. */ -#define HTB_DIRECT ((struct htb_class *)-1L) - static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { @@ -219,17 +232,24 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, if (skb->priority == sch->handle) return HTB_DIRECT; /* X:0 (direct flow) selected */ cl = htb_find(skb->priority, sch); - if (cl && cl->level == 0) - return cl; + if (cl) { + if (cl->level == 0) + return cl; + /* Start with inner filter chain if a non-leaf class is selected */ + tcf = rcu_dereference_bh(cl->filter_list); + } else { + tcf = rcu_dereference_bh(q->filter_list); + } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - tcf = q->filter_list; - while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { + while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + fallthrough; case TC_ACT_SHOT: return NULL; } @@ -246,7 +266,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, return cl; /* we hit leaf; return it */ /* we have got inner class; apply inner filter chain */ - tcf = cl->filter_list; + tcf = rcu_dereference_bh(cl->filter_list); } /* classification failed; try to use default class */ cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); @@ -257,6 +277,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, /** * htb_add_to_id_tree - adds class to the round robin list + * @root: the root of the tree + * @cl: the class to add + * @prio: the give prio in class * * Routine adds class to the list (actually tree) sorted by classid. * Make sure that class is not already on such list for given prio. @@ -282,6 +305,9 @@ static void htb_add_to_id_tree(struct rb_root *root, /** * htb_add_to_wait_tree - adds class to the event queue with delay + * @q: the priority event queue + * @cl: the class to add + * @delay: delay in microseconds * * The class is added to priority event queue to indicate that class will * change its mode in cl->pq_key microseconds. Make sure that class is not @@ -315,17 +341,22 @@ static void htb_add_to_wait_tree(struct htb_sched *q, /** * htb_next_rb_node - finds next node in binary tree + * @n: the current node in binary tree * * When we are past last key we return NULL. * Average complexity is 2 steps per call. */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** * htb_add_class_to_row - add class to its row + * @q: the priority event queue + * @cl: the class to add + * @mask: the given priorities in class in bitmap * * The class is added to row at priorities marked in mask. * It does nothing if mask == 0. @@ -355,6 +386,9 @@ static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) /** * htb_remove_class_from_row - removes class from its row + * @q: the priority event queue + * @cl: the class to add + * @mask: the given priorities in class in bitmap * * The class is removed from row at priorities marked in mask. * It does nothing if mask == 0. @@ -382,6 +416,8 @@ static inline void htb_remove_class_from_row(struct htb_sched *q, /** * htb_activate_prios - creates active classe's feed chain + * @q: the priority event queue + * @cl: the class to activate * * The class is connected to ancestors and/or appropriate rows * for priorities it is participating on. cl->cmode must be new @@ -395,16 +431,19 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) while (cl->cmode == HTB_MAY_BORROW && p && mask) { m = mask; while (m) { - int prio = ffz(~m); + unsigned int prio = ffz(~m); + + if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio))) + break; m &= ~(1 << prio); - if (p->un.inner.clprio[prio].feed.rb_node) + if (p->inner.clprio[prio].feed.rb_node) /* parent already has its feed in use so that * reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio); + htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio); } p->prio_activity |= mask; cl = p; @@ -417,6 +456,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) /** * htb_deactivate_prios - remove class from feed chain + * @q: the priority event queue + * @cl: the class to deactivate * * cl->cmode must represent old mode (before deactivation). It does * nothing if cl->prio_activity == 0. Class is removed from all feed @@ -434,19 +475,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.clprio[prio].ptr == cl->node + prio) { + if (p->inner.clprio[prio].ptr == cl->node + prio) { /* we are removing child which is pointed to from * parent feed - forget the pointer but remember * classid */ - p->un.inner.clprio[prio].last_ptr_id = cl->common.classid; - p->un.inner.clprio[prio].ptr = NULL; + p->inner.clprio[prio].last_ptr_id = cl->common.classid; + p->inner.clprio[prio].ptr = NULL; } htb_safe_rb_erase(cl->node + prio, - &p->un.inner.clprio[prio].feed); + &p->inner.clprio[prio].feed); - if (!p->un.inner.clprio[prio].feed.rb_node) + if (!p->inner.clprio[prio].feed.rb_node) mask |= 1 << prio; } @@ -477,6 +518,8 @@ static inline s64 htb_hiwater(const struct htb_class *cl) /** * htb_class_mode - computes and returns current class mode + * @cl: the target class + * @diff: diff time in microseconds * * It computes cl's mode at time cl->t_c+diff and returns it. If mode * is not HTB_CAN_SEND then cl->pq_key is updated to time difference @@ -505,9 +548,12 @@ htb_class_mode(struct htb_class *cl, s64 *diff) /** * htb_change_class_mode - changes classe's mode + * @q: the priority event queue + * @cl: the target class + * @diff: diff time in microseconds * * This should be the only way how to change classe's mode under normal - * cirsumstances. Routine will update feed lists linkage, change mode + * circumstances. Routine will update feed lists linkage, change mode * and add class to the wait event queue if appropriate. New mode should * be different from old one and cl->pq_key has to be valid if changing * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). @@ -520,6 +566,11 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; + if (new_mode == HTB_CANT_SEND) { + cl->overlimits++; + q->overlimits++; + } + if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) htb_deactivate_prios(q, cl); @@ -532,6 +583,8 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) /** * htb_activate - inserts leaf cl into appropriate active feeds + * @q: the priority event queue + * @cl: the target class * * Routine learns (new) priority of leaf and activates feed chain * for the prio. It can be called on already active leaf safely. @@ -539,62 +592,65 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) */ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); + WARN_ON(cl->level || !cl->leaf.q); if (!cl->prio_activity) { cl->prio_activity = 1 << cl->prio; htb_activate_prios(q, cl); - list_add_tail(&cl->un.leaf.drop_list, - q->drops + cl->prio); } } /** * htb_deactivate - remove leaf cl from active feeds + * @q: the priority event queue + * @cl: the target class * * Make sure that leaf is active. In the other words it can't be called * with non-active leaf. It also removes class from the drop list. */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; - list_del_init(&cl->un.leaf.drop_list); } -static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { - int uninitialized_var(ret); + int ret; + unsigned int len = qdisc_pkt_len(skb); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + __qdisc_enqueue_tail(skb, &q->direct_queue); q->direct_pkts++; } else { - return qdisc_drop(skb, sch); + return qdisc_drop(skb, sch, to_free); } #ifdef CONFIG_NET_CLS_ACT } else if (!cl) { if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); return ret; #endif - } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) { + } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, + to_free)) != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; + qdisc_qstats_drop(sch); + cl->drops++; } return ret; } else { htb_activate(q, cl); } + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -627,6 +683,10 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) /** * htb_charge_class - charges amount "bytes" to leaf and ancestors + * @q: the priority event queue + * @cl: the class to start iterate + * @level: the minimum level to account + * @skb: the socket buffer * * Routine assumes that packet "bytes" long was dequeued from leaf cl * borrowing from "level". It accounts bytes to ceil leaky bucket for @@ -676,6 +736,9 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, /** * htb_do_events - make mode changes to classes at the level + * @q: the priority event queue + * @level: which wait_pq in 'q->hlevel' + * @start: start jiffies * * Scans event queue for pending events and applies them. Returns time of * next pending event (0 for no event in pq, q->now for too many events). @@ -712,7 +775,7 @@ static s64 htb_do_events(struct htb_sched *q, const int level, /* too much load - let's continue after a break for scheduling */ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { - pr_warning("htb: too many events!\n"); + pr_warn("htb: too many events!\n"); q->warned |= HTB_WARN_TOOMANYEVENTS; } @@ -744,6 +807,8 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, /** * htb_lookup_leaf - returns next leaf class in DRR order + * @hprio: the current one + * @prio: which prio in class * * Find leaf where current feed pointers points to. */ @@ -756,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_ON(!hprio->row.rb_node); + if (unlikely(!hprio->row.rb_node)) + return NULL; + sp->root = hprio->row.rb_node; sp->pptr = &hprio->ptr; sp->pid = &hprio->last_ptr_id; @@ -791,7 +858,7 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); if (!cl->level) return cl; - clp = &cl->un.inner.clprio[prio]; + clp = &cl->inner.clprio[prio]; (++sp)->root = clp->feed.rb_node; sp->pptr = &clp->ptr; sp->pid = &clp->last_ptr_id; @@ -825,7 +892,7 @@ next: * graft operation on the leaf since last dequeue; * simply deactivate and skip such class */ - if (unlikely(cl->un.leaf.q->q.qlen == 0)) { + if (unlikely(cl->leaf.q->q.qlen == 0)) { struct htb_class *next; htb_deactivate(q, cl); @@ -841,12 +908,12 @@ next: goto next; } - skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); + skb = cl->leaf.q->dequeue(cl->leaf.q); if (likely(skb != NULL)) break; - qdisc_warn_nonwc("htb", cl->un.leaf.q); - htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr: + qdisc_warn_nonwc("htb", cl->leaf.q); + htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr: &q->hlevel[0].hprio[prio].ptr); cl = htb_lookup_leaf(hprio, prio); @@ -854,16 +921,16 @@ next: if (likely(skb != NULL)) { bstats_update(&cl->bstats, skb); - cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); - if (cl->un.leaf.deficit[level] < 0) { - cl->un.leaf.deficit[level] += cl->quantum; - htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr : + cl->leaf.deficit[level] -= qdisc_pkt_len(skb); + if (cl->leaf.deficit[level] < 0) { + cl->leaf.deficit[level] += cl->quantum; + htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr : &q->hlevel[0].hprio[prio].ptr); } /* this used to be after charge_class but this constelation * gives us slightly better performance */ - if (!cl->un.leaf.q->q.qlen) + if (!cl->leaf.q->q.qlen) htb_deactivate(q, cl); htb_charge_class(q, cl, level, skb); } @@ -879,18 +946,18 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); - qdisc_unthrottled(sch); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } if (!sch->q.qlen) goto fin; - q->now = ktime_to_ns(ktime_get()); + q->now = ktime_get_ns(); start_at = jiffies; next_event = q->now + 5LLU * NSEC_PER_SEC; @@ -920,46 +987,14 @@ ok: goto ok; } } - sch->qstats.overlimits++; - if (likely(next_event > q->now)) { - if (!test_bit(__QDISC_STATE_DEACTIVATED, - &qdisc_root_sleeping(q->watchdog.qdisc)->state)) { - ktime_t time = ns_to_ktime(next_event); - qdisc_throttled(q->watchdog.qdisc); - hrtimer_start(&q->watchdog.timer, time, - HRTIMER_MODE_ABS); - } - } else { + if (likely(next_event > q->now)) + qdisc_watchdog_schedule_ns(&q->watchdog, next_event); + else schedule_work(&q->work); - } fin: return skb; } -/* try to drop from each class (by prio) until one succeed */ -static unsigned int htb_drop(struct Qdisc *sch) -{ - struct htb_sched *q = qdisc_priv(sch); - int prio; - - for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { - struct list_head *p; - list_for_each(p, q->drops + prio) { - struct htb_class *cl = list_entry(p, struct htb_class, - un.leaf.drop_list); - unsigned int len; - if (cl->un.leaf.q->ops->drop && - (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { - sch->q.qlen--; - if (!cl->un.leaf.q->q.qlen) - htb_deactivate(q, cl); - return len; - } - } - } - return 0; -} - /* reset all classes */ /* always caled under BH & queue lock */ static void htb_reset(struct Qdisc *sch) @@ -971,24 +1006,19 @@ static void htb_reset(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->level) - memset(&cl->un.inner, 0, sizeof(cl->un.inner)); + memset(&cl->inner, 0, sizeof(cl->inner)); else { - if (cl->un.leaf.q) - qdisc_reset(cl->un.leaf.q); - INIT_LIST_HEAD(&cl->un.leaf.drop_list); + if (cl->leaf.q && !q->offload) + qdisc_reset(cl->leaf.q); } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; - } } qdisc_watchdog_cancel(&q->watchdog); - __skb_queue_purge(&q->direct_queue); - sch->q.qlen = 0; + __qdisc_reset_queue(&q->direct_queue); memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); - for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops + i); } static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { @@ -997,6 +1027,9 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, + [TCA_HTB_RATE64] = { .type = NLA_U64 }, + [TCA_HTB_CEIL64] = { .type = NLA_U64 }, + [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, }; static void htb_work_func(struct work_struct *work) @@ -1004,21 +1037,40 @@ static void htb_work_func(struct work_struct *work) struct htb_sched *q = container_of(work, struct htb_sched, work); struct Qdisc *sch = q->watchdog.qdisc; + rcu_read_lock(); __netif_schedule(qdisc_root(sch)); + rcu_read_unlock(); } -static int htb_init(struct Qdisc *sch, struct nlattr *opt) +static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) { + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); +} + +static int htb_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; + unsigned int ntx; + bool offload; int err; - int i; + + qdisc_watchdog_init(&q->watchdog, sch); + INIT_WORK(&q->work, htb_work_func); if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); + if (err) + return err; + + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + NULL); if (err < 0) return err; @@ -1029,38 +1081,142 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (gopt->version != HTB_VER >> 16) return -EINVAL; + offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); + + if (offload) { + if (sch->parent != TC_H_ROOT) { + NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload"); + return -EOPNOTSUPP; + } + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on"); + return -EOPNOTSUPP; + } + + q->num_direct_qdiscs = dev->real_num_tx_queues; + q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, + sizeof(*q->direct_qdiscs), + GFP_KERNEL); + if (!q->direct_qdiscs) + return -ENOMEM; + } + err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; - for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops + i); - - qdisc_watchdog_init(&q->watchdog, sch); - INIT_WORK(&q->work, htb_work_func); - skb_queue_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); - else { + else q->direct_qlen = qdisc_dev(sch)->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ - q->direct_qlen = 2; - } + if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; + if (!offload) + return 0; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *qdisc; + + qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 0), extack); + if (!qdisc) { + return -ENOMEM; + } + + q->direct_qdiscs[ntx] = qdisc; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + } + + sch->flags |= TCQ_F_MQROOT; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_CREATE, + .parent_classid = TC_H_MAJ(sch->handle) >> 16, + .classid = TC_H_MIN(q->defcls), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + return err; + + /* Defer this assignment, so that htb_destroy skips offload-related + * parts (especially calling ndo_setup_tc) on errors. + */ + q->offload = true; + return 0; } +static void htb_attach_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct htb_sched *q = qdisc_priv(sch); + unsigned int ntx; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; + + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + qdisc_put(old); + qdisc_hash_add(qdisc, false); + } + for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); + + qdisc_put(old); + } + + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; +} + +static void htb_attach_software(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + /* Resemble qdisc_graft behavior. */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); + + qdisc_refcount_inc(sch); + + qdisc_put(old); + } +} + +static void htb_attach(struct Qdisc *sch) +{ + struct htb_sched *q = qdisc_priv(sch); + + if (q->offload) + htb_attach_offload(sch); + else + htb_attach_software(sch); +} + static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { - spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_glob gopt; - spin_lock_bh(root_lock); + if (q->offload) + sch->flags |= TCQ_F_OFFLOADED; + else + sch->flags &= ~TCQ_F_OFFLOADED; + + sch->qstats.overlimits = q->overlimits; + /* Its safe to not acquire qdisc lock. As we hold RTNL, + * no change can happen on the qdisc parameters. + */ gopt.direct_pkts = q->direct_pkts; gopt.version = HTB_VER; @@ -1068,19 +1224,18 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) gopt.defcls = q->defcls; gopt.debug = 0; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; - nla_nest_end(skb, nest); + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; - spin_unlock_bh(root_lock); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: - spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } @@ -1089,17 +1244,19 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; - spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); + struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_opt opt; - spin_lock_bh(root_lock); + /* Its safe to not acquire qdisc lock. As we hold RTNL, + * no change can happen on the class parameters. + */ tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; tcm->tcm_handle = cl->common.classid; - if (!cl->level && cl->un.leaf.q) - tcm->tcm_info = cl->un.leaf.q->handle; + if (!cl->level && cl->leaf.q) + tcm->tcm_info = cl->leaf.q->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -1114,78 +1271,224 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; + if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, + TCA_HTB_PAD)) + goto nla_put_failure; + if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps, + TCA_HTB_PAD)) + goto nla_put_failure; - nla_nest_end(skb, nest); - spin_unlock_bh(root_lock); - return skb->len; + return nla_nest_end(skb, nest); nla_put_failure: - spin_unlock_bh(root_lock); nla_nest_cancel(skb, nest); return -1; } +static void htb_offload_aggregate_stats(struct htb_sched *q, + struct htb_class *cl) +{ + u64 bytes = 0, packets = 0; + struct htb_class *c; + unsigned int i; + + gnet_stats_basic_sync_init(&cl->bstats); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { + struct htb_class *p = c; + + while (p && p->level < cl->level) + p = p->parent; + + if (p != cl) + continue; + + bytes += u64_stats_read(&c->bstats_bias.bytes); + packets += u64_stats_read(&c->bstats_bias.packets); + if (c->level == 0) { + bytes += u64_stats_read(&c->leaf.q->bstats.bytes); + packets += u64_stats_read(&c->leaf.q->bstats.packets); + } + } + } + _bstats_update(&cl->bstats, bytes, packets); +} + static int htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); + struct gnet_stats_queue qs = { + .drops = cl->drops, + .overlimits = cl->overlimits, + }; + __u32 qlen = 0; + + if (!cl->level && cl->leaf.q) + qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); + + cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), + INT_MIN, INT_MAX); + cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), + INT_MIN, INT_MAX); + + if (q->offload) { + if (!cl->level) { + if (cl->leaf.q) + cl->bstats = cl->leaf.q->bstats; + else + gnet_stats_basic_sync_init(&cl->bstats); + _bstats_update(&cl->bstats, + u64_stats_read(&cl->bstats_bias.bytes), + u64_stats_read(&cl->bstats_bias.packets)); + } else { + htb_offload_aggregate_stats(q, cl); + } + } - if (!cl->level && cl->un.leaf.q) - cl->qstats.qlen = cl->un.leaf.q->q.qlen; - cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens); - cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens); - - if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, &cl->qstats) < 0) + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) return -1; return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); } +static struct netdev_queue * +htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; + struct htb_sched *q = qdisc_priv(sch); + int err; + + if (!q->offload) + return sch->dev_queue; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_QUERY_QUEUE, + .classid = TC_H_MIN(tcm->tcm_parent), + }; + err = htb_offload(dev, &offload_opt); + if (err || offload_opt.qid >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, offload_opt.qid); +} + +static struct Qdisc * +htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) +{ + struct net_device *dev = dev_queue->dev; + struct Qdisc *old_q; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + old_q = dev_graft_qdisc(dev_queue, new_q); + if (new_q) + new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + if (dev->flags & IFF_UP) + dev_activate(dev); + + return old_q; +} + +static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl) +{ + struct netdev_queue *queue; + + queue = cl->leaf.offload_queue; + if (!(cl->leaf.q->flags & TCQ_F_BUILTIN)) + WARN_ON(cl->leaf.q->dev_queue != queue); + + return queue; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, + struct htb_class *cl_new, bool destroying) +{ + struct netdev_queue *queue_old, *queue_new; + struct net_device *dev = qdisc_dev(sch); + + queue_old = htb_offload_get_queue(cl_old); + queue_new = htb_offload_get_queue(cl_new); + + if (!destroying) { + struct Qdisc *qdisc; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + WARN_ON(qdisc != cl_old->leaf.q); + } + + if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN)) + cl_old->leaf.q->dev_queue = queue_new; + cl_old->leaf.offload_queue = queue_new; + + if (!destroying) { + struct Qdisc *qdisc; + + qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q); + if (dev->flags & IFF_UP) + dev_activate(dev); + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); + } +} + static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old, struct netlink_ext_ack *extack) { + struct netdev_queue *dev_queue = sch->dev_queue; struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); + struct Qdisc *old_q; if (cl->level) return -EINVAL; - if (new == NULL && - (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid)) == NULL) - return -ENOBUFS; - sch_tree_lock(sch); - *old = cl->un.leaf.q; - cl->un.leaf.q = new; - if (*old != NULL) { - qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); - qdisc_reset(*old); + if (q->offload) + dev_queue = htb_offload_get_queue(cl); + + if (!new) { + new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + cl->common.classid, extack); + if (!new) + return -ENOBUFS; } - sch_tree_unlock(sch); + + if (q->offload) { + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new); + old_q = htb_graft_helper(dev_queue, new); + } + + *old = qdisc_replace(sch, new, &cl->leaf.q); + + if (q->offload) { + WARN_ON(old_q != *old); + qdisc_put(old_q); + } + return 0; } static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - return !cl->level ? cl->un.leaf.q : NULL; + return !cl->level ? cl->leaf.q : NULL; } static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (cl->un.leaf.q->q.qlen == 0) - htb_deactivate(qdisc_priv(sch), cl); -} - -static unsigned long htb_get(struct Qdisc *sch, u32 classid) -{ - struct htb_class *cl = htb_find(classid, sch); - if (cl) - cl->refcnt++; - return (unsigned long)cl; + htb_deactivate(qdisc_priv(sch), cl); } static inline int htb_parent_last_child(struct htb_class *cl) @@ -1199,42 +1502,123 @@ static inline int htb_parent_last_child(struct htb_class *cl) return 1; } -static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, +static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, struct Qdisc *new_q) { + struct htb_sched *q = qdisc_priv(sch); struct htb_class *parent = cl->parent; - WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); + WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); if (parent->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&parent->pq_node, &q->hlevel[parent->level].wait_pq); parent->level = 0; - memset(&parent->un.inner, 0, sizeof(parent->un.inner)); - INIT_LIST_HEAD(&parent->un.leaf.drop_list); - parent->un.leaf.q = new_q ? new_q : &noop_qdisc; + memset(&parent->inner, 0, sizeof(parent->inner)); + parent->leaf.q = new_q ? new_q : &noop_qdisc; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; - parent->t_c = ktime_to_ns(ktime_get()); + parent->t_c = ktime_get_ns(); parent->cmode = HTB_CAN_SEND; + if (q->offload) + parent->leaf.offload_queue = cl->leaf.offload_queue; +} + +static void htb_parent_to_leaf_offload(struct Qdisc *sch, + struct netdev_queue *dev_queue, + struct Qdisc *new_q) +{ + struct Qdisc *old_q; + + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + if (new_q) + qdisc_refcount_inc(new_q); + old_q = htb_graft_helper(dev_queue, new_q); + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); +} + +static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + bool last_child, bool destroying, + struct netlink_ext_ack *extack) +{ + struct tc_htb_qopt_offload offload_opt; + struct netdev_queue *dev_queue; + struct Qdisc *q = cl->leaf.q; + struct Qdisc *old; + int err; + + if (cl->level) + return -EINVAL; + + WARN_ON(!q); + dev_queue = htb_offload_get_queue(cl); + /* When destroying, caller qdisc_graft grafts the new qdisc and invokes + * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload + * does not need to graft or qdisc_put the qdisc being destroyed. + */ + if (!destroying) { + old = htb_graft_helper(dev_queue, NULL); + /* Last qdisc grafted should be the same as cl->leaf.q when + * calling htb_delete. + */ + WARN_ON(old != q); + } + + if (cl->parent) { + _bstats_update(&cl->parent->bstats_bias, + u64_stats_read(&q->bstats.bytes), + u64_stats_read(&q->bstats.packets)); + } + + offload_opt = (struct tc_htb_qopt_offload) { + .command = !last_child ? TC_HTB_LEAF_DEL : + destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : + TC_HTB_LEAF_DEL_LAST, + .classid = cl->common.classid, + .extack = extack, + }; + err = htb_offload(qdisc_dev(sch), &offload_opt); + + if (!destroying) { + if (!err) + qdisc_put(old); + else + htb_graft_helper(dev_queue, old); + } + + if (last_child) + return err; + + if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) { + u32 classid = TC_H_MAJ(sch->handle) | + TC_H_MIN(offload_opt.classid); + struct htb_class *moved_cl = htb_find(classid, sch); + + htb_offload_move_qdisc(sch, moved_cl, cl, destroying); + } + + return err; } static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { - WARN_ON(!cl->un.leaf.q); - qdisc_destroy(cl->un.leaf.q); + WARN_ON(!cl->leaf.q); + qdisc_put(cl->leaf.q); } - gen_kill_estimator(&cl->bstats, &cl->rate_est); - tcf_destroy_chain(&cl->filter_list); + gen_kill_estimator(&cl->rate_est); + tcf_block_put(cl->block); kfree(cl); } static void htb_destroy(struct Qdisc *sch) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct hlist_node *next; + bool nonempty, changed; struct htb_class *cl; unsigned int i; @@ -1245,98 +1629,155 @@ static void htb_destroy(struct Qdisc *sch) * because filter need its target class alive to be able to call * unbind_filter on it (without Oops). */ - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) - tcf_destroy_chain(&cl->filter_list); - } - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], - common.hnode) - htb_destroy_class(sch, cl); + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { + tcf_block_put(cl->block); + cl->block = NULL; + } } + + do { + nonempty = false; + changed = false; + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], + common.hnode) { + bool last_child; + + if (!q->offload) { + htb_destroy_class(sch, cl); + continue; + } + + nonempty = true; + + if (cl->level) + continue; + + changed = true; + + last_child = htb_parent_last_child(cl); + htb_destroy_class_offload(sch, cl, last_child, + true, NULL); + qdisc_class_hash_remove(&q->clhash, + &cl->common); + if (cl->parent) + cl->parent->children--; + if (last_child) + htb_parent_to_leaf(sch, cl, NULL); + htb_destroy_class(sch, cl); + } + } + } while (changed); + WARN_ON(nonempty); + qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->direct_queue); + __qdisc_reset_queue(&q->direct_queue); + + if (q->offload) { + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_DESTROY, + }; + htb_offload(dev, &offload_opt); + } + + if (!q->direct_qdiscs) + return; + for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) + qdisc_put(q->direct_qdiscs[i]); + kfree(q->direct_qdiscs); } -static int htb_delete(struct Qdisc *sch, unsigned long arg) +static int htb_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - unsigned int qlen; struct Qdisc *new_q = NULL; int last_child = 0; + int err; - // TODO: why don't allow to delete subtree ? references ? does - // tc subsys quarantee us that in htb_destroy it holds no class - // refs so that we can remove children safely there ? - if (cl->children || cl->filter_cnt) + /* TODO: why don't allow to delete subtree ? references ? does + * tc subsys guarantee us that in htb_destroy it holds no class + * refs so that we can remove children safely there ? + */ + if (cl->children || qdisc_class_in_use(&cl->common)) { + NL_SET_ERR_MSG(extack, "HTB class in use"); return -EBUSY; + } - if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->parent->common.classid); + if (!cl->level && htb_parent_last_child(cl)) last_child = 1; + + if (q->offload) { + err = htb_destroy_class_offload(sch, cl, last_child, false, + extack); + if (err) + return err; } - sch_tree_lock(sch); + if (last_child) { + struct netdev_queue *dev_queue = sch->dev_queue; - if (!cl->level) { - qlen = cl->un.leaf.q->q.qlen; - qdisc_reset(cl->un.leaf.q); - qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); + if (q->offload) + dev_queue = htb_offload_get_queue(cl); + + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + cl->parent->common.classid, + NULL); + if (q->offload) + htb_parent_to_leaf_offload(sch, dev_queue, new_q); } + sch_tree_lock(sch); + + if (!cl->level) + qdisc_purge_queue(cl->leaf.q); + /* delete from hash and active; remainder in destroy_class */ qdisc_class_hash_remove(&q->clhash, &cl->common); if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); if (last_child) - htb_parent_to_leaf(q, cl, new_q); - - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ + htb_parent_to_leaf(sch, cl, new_q); sch_tree_unlock(sch); - return 0; -} -static void htb_put(struct Qdisc *sch, unsigned long arg) -{ - struct htb_class *cl = (struct htb_class *)arg; - - if (--cl->refcnt == 0) - htb_destroy_class(sch, cl); + htb_destroy_class(sch, cl); + return 0; } static int htb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg) + unsigned long *arg, struct netlink_ext_ack *extack) { int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct tc_htb_qopt_offload offload_opt; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_HTB_MAX + 1]; + struct Qdisc *parent_qdisc = NULL; + struct netdev_queue *dev_queue; struct tc_htb_opt *hopt; + u64 rate64, ceil64; + int warn = 0; /* extract all subattrs from opt attr */ if (!opt) goto failure; - err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); + err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, + extack); if (err < 0) goto failure; @@ -1350,8 +1791,33 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + if (q->offload) { + /* Options not supported by the offload. */ + if (hopt->rate.overhead || hopt->ceil.overhead) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); + goto failure; + } + if (hopt->rate.mpu || hopt->ceil.mpu) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); + goto failure; + } + } + + /* Keeping backward compatible with rate_table based iproute2 tc */ + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], + NULL)); + + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], + NULL)); + + rate64 = nla_get_u64_default(tb[TCA_HTB_RATE64], 0); + ceil64 = nla_get_u64_default(tb[TCA_HTB_CEIL64], 0); + if (!cl) { /* new class */ - struct Qdisc *new_q; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *new_q, *old_q; int prio; struct { struct nlattr nla; @@ -1375,7 +1841,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* check maximal depth */ if (parent && parent->parent && parent->parent->level < 2) { - pr_err("htb: tree is too deep\n"); + NL_SET_ERR_MSG_MOD(extack, "tree is too deep"); goto failure; } err = -ENOBUFS; @@ -1383,40 +1849,107 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; + gnet_stats_basic_sync_init(&cl->bstats); + gnet_stats_basic_sync_init(&cl->bstats_bias); + + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); + if (err) { + kfree(cl); + goto failure; + } if (htb_rate_est || tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + err = gen_new_estimator(&cl->bstats, NULL, + &cl->rate_est, + NULL, + true, tca[TCA_RATE] ? : &est.nla); - if (err) { - kfree(cl); - goto failure; - } + if (err) + goto err_block_put; } - cl->refcnt = 1; cl->children = 0; - INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) RB_CLEAR_NODE(&cl->node[prio]); + cl->common.classid = classid; + + /* Make sure nothing interrupts us in between of two + * ndo_setup_tc calls. + */ + ASSERT_RTNL(); + /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) * so that can't be used inside of sch_tree_lock * -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, classid); + if (!q->offload) { + dev_queue = sch->dev_queue; + } else if (!(parent && !parent->level)) { + /* Assign a dev_queue to this classid. */ + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_ALLOC_QUEUE, + .classid = cl->common.classid, + .parent_classid = parent ? + TC_H_MIN(parent->common.classid) : + TC_HTB_CLASSID_ROOT, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, + .quantum = hopt->quantum, + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + NL_SET_ERR_MSG_WEAK(extack, + "Failed to offload TC_HTB_LEAF_ALLOC_QUEUE"); + goto err_kill_estimator; + } + dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); + } else { /* First child. */ + dev_queue = htb_offload_get_queue(parent); + old_q = htb_graft_helper(dev_queue, NULL); + WARN_ON(old_q != parent->leaf.q); + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_TO_INNER, + .classid = cl->common.classid, + .parent_classid = + TC_H_MIN(parent->common.classid), + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, + .quantum = hopt->quantum, + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + NL_SET_ERR_MSG_WEAK(extack, + "Failed to offload TC_HTB_LEAF_TO_INNER"); + htb_graft_helper(dev_queue, old_q); + goto err_kill_estimator; + } + _bstats_update(&parent->bstats_bias, + u64_stats_read(&old_q->bstats.bytes), + u64_stats_read(&old_q->bstats.packets)); + qdisc_put(old_q); + } + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + classid, NULL); + if (q->offload) { + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + if (new_q) + qdisc_refcount_inc(new_q); + old_q = htb_graft_helper(dev_queue, new_q); + /* No qdisc_put needed. */ + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); + } sch_tree_lock(sch); if (parent && !parent->level) { - unsigned int qlen = parent->un.leaf.q->q.qlen; - /* turn parent into inner node */ - qdisc_reset(parent->un.leaf.q); - qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen); - qdisc_destroy(parent->un.leaf.q); - if (parent->prio_activity) - htb_deactivate(q, parent); + qdisc_purge_queue(parent->leaf.q); + parent_qdisc = parent->leaf.q; + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { @@ -1425,51 +1958,84 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } parent->level = (parent->parent ? parent->parent->level : TC_HTB_MAXDEPTH) - 1; - memset(&parent->un.inner, 0, sizeof(parent->un.inner)); + memset(&parent->inner, 0, sizeof(parent->inner)); } + /* leaf (we) needs elementary qdisc */ - cl->un.leaf.q = new_q ? new_q : &noop_qdisc; + cl->leaf.q = new_q ? new_q : &noop_qdisc; + if (q->offload) + cl->leaf.offload_queue = dev_queue; - cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ cl->tokens = PSCHED_TICKS2NS(hopt->buffer); cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ - cl->t_c = ktime_to_ns(ktime_get()); + cl->t_c = ktime_get_ns(); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ qdisc_class_hash_insert(&q->clhash, &cl->common); if (parent) parent->children++; + if (cl->leaf.q != &noop_qdisc) + qdisc_hash_add(cl->leaf.q, true); } else { if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + err = gen_replace_estimator(&cl->bstats, NULL, + &cl->rate_est, + NULL, + true, tca[TCA_RATE]); if (err) return err; } + + if (q->offload) { + struct net_device *dev = qdisc_dev(sch); + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_NODE_MODIFY, + .classid = cl->common.classid, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, + .quantum = hopt->quantum, + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + /* Estimator was replaced, and rollback may fail + * as well, so we don't try to recover it, and + * the estimator won't work property with the + * offload anyway, because bstats are updated + * only when the stats are queried. + */ + return err; + } + sch_tree_lock(sch); } + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); + /* it used to be a nasty bug here, we have to check that node - * is really leaf before changing cl->un.leaf ! + * is really leaf before changing cl->leaf ! */ if (!cl->level) { - cl->quantum = hopt->rate.rate / q->rate2quantum; + u64 quantum = cl->rate.rate_bytes_ps; + + do_div(quantum, q->rate2quantum); + cl->quantum = min_t(u64, quantum, INT_MAX); + if (!hopt->quantum && cl->quantum < 1000) { - pr_warning( - "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->common.classid); + warn = -1; cl->quantum = 1000; } if (!hopt->quantum && cl->quantum > 200000) { - pr_warning( - "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->common.classid); + warn = 1; cl->quantum = 200000; } if (hopt->quantum) @@ -1478,30 +2044,38 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - psched_ratecfg_precompute(&cl->rate, &hopt->rate); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); - cl->buffer = PSCHED_TICKS2NS(hopt->buffer); - cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); + cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); sch_tree_unlock(sch); + qdisc_put(parent_qdisc); + + if (warn) + NL_SET_ERR_MSG_FMT_MOD(extack, + "quantum of class %X is %s. Consider r2q change.", + cl->common.classid, (warn == -1 ? "small" : "big")); qdisc_class_hash_grow(sch, &q->clhash); *arg = (unsigned long)cl; return 0; +err_kill_estimator: + gen_kill_estimator(&cl->rate_est); +err_block_put: + tcf_block_put(cl->block); + kfree(cl); failure: return err; } -static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) +static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; - return fl; + return cl ? cl->block : q->block; } static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, @@ -1519,7 +2093,7 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, * be broken by class during destroy IIUC. */ if (cl) - cl->filter_cnt++; + qdisc_class_get(&cl->common); return (unsigned long)cl; } @@ -1527,8 +2101,7 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (cl) - cl->filter_cnt--; + qdisc_class_put(&cl->common); } static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) @@ -1542,29 +2115,22 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } static const struct Qdisc_class_ops htb_class_ops = { + .select_queue = htb_select_queue, .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, - .get = htb_get, - .put = htb_put, + .find = htb_search, .change = htb_change_class, .delete = htb_delete, .walk = htb_walk, - .tcf_chain = htb_find_tcf, + .tcf_block = htb_tcf_block, .bind_tcf = htb_bind_filter, .unbind_tcf = htb_unbind_filter, .dump = htb_dump_class, @@ -1578,13 +2144,14 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .enqueue = htb_enqueue, .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, - .drop = htb_drop, .init = htb_init, + .attach = htb_attach, .reset = htb_reset, .destroy = htb_destroy, .dump = htb_dump, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("htb"); static int __init htb_module_init(void) { @@ -1598,3 +2165,4 @@ static void __exit htb_module_exit(void) module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler"); |
