diff options
Diffstat (limited to 'net/sched/sch_qfq.c')
| -rw-r--r-- | net/sched/sch_qfq.c | 458 |
1 files changed, 228 insertions, 230 deletions
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index a7ab323849b6..d920f57dc6d7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * net/sched/sch_qfq.c Quick Fair Queueing Plus Scheduler. * * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente. * Copyright (c) 2012 Paolo Valente. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. */ #include <linux/module.h> @@ -113,10 +110,10 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ +#define QFQ_MAX_LMAX (1UL << QFQ_MTU_SHIFT) #define QFQ_MAX_AGG_CLASSES 8 /* max num classes per aggregate allowed */ @@ -133,12 +130,9 @@ struct qfq_aggregate; struct qfq_class { struct Qdisc_class_common common; - unsigned int refcnt; - unsigned int filter_cnt; - - struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est64 rate_est; + struct net_rate_estimator __rcu *rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ @@ -182,13 +176,14 @@ struct qfq_group { }; struct qfq_sched { - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; + struct tcf_block *block; struct Qdisc_class_hash clhash; u64 oldV, V; /* Precise virtual times. */ struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ - u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -207,6 +202,11 @@ struct qfq_sched { */ enum update_reason {enqueue, requeue}; +static bool cl_is_active(struct qfq_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); @@ -218,17 +218,14 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static void qfq_purge_queue(struct qfq_class *cl) -{ - unsigned int len = cl->qdisc->q.qlen; - - qdisc_reset(cl->qdisc); - qdisc_tree_decrease_qlen(cl->qdisc, len); -} +static const struct netlink_range_validation lmax_range = { + .min = QFQ_MIN_LMAX, + .max = QFQ_MAX_LMAX, +}; static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { - [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, - [TCA_QFQ_LMAX] = { .type = NLA_U32 }, + [TCA_QFQ_WEIGHT] = NLA_POLICY_RANGE(NLA_U32, 1, QFQ_MAX_WEIGHT), + [TCA_QFQ_LMAX] = NLA_POLICY_FULL_RANGE(NLA_U32, &lmax_range), }; /* @@ -314,6 +311,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -338,8 +336,11 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *); static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { - if (!hlist_unhashed(&agg->nonfull_next)) - hlist_del_init(&agg->nonfull_next); + hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -351,7 +352,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) struct qfq_aggregate *agg = cl->agg; - list_del(&cl->alist); /* remove from RR queue of the aggregate */ + list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } @@ -383,8 +384,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, u32 lmax) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight); + struct qfq_aggregate *new_agg; + + /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ + if (lmax > QFQ_MAX_LMAX) + return -EINVAL; + new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); if (new_agg == NULL) @@ -398,64 +404,69 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, } static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) + struct nlattr **tca, unsigned long *arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)*arg; bool existing = false; struct nlattr *tb[TCA_QFQ_MAX + 1]; struct qfq_aggregate *new_agg = NULL; - u32 weight, lmax, inv_w; + u32 weight, lmax, inv_w, old_weight, old_lmax; int err; int delta_w; - if (tca[TCA_OPTIONS] == NULL) { - pr_notice("qfq: no options\n"); + if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { + NL_SET_ERR_MSG_MOD(extack, "missing options"); return -EINVAL; } - err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); + err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], + qfq_policy, extack); if (err < 0) return err; - if (tb[TCA_QFQ_WEIGHT]) { - weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); - if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { - pr_notice("qfq: invalid weight %u\n", weight); - return -EINVAL; - } - } else - weight = 1; + weight = nla_get_u32_default(tb[TCA_QFQ_WEIGHT], 1); if (tb[TCA_QFQ_LMAX]) { lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); + } else { + /* MTU size is user controlled */ + lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) { + NL_SET_ERR_MSG_MOD(extack, + "MTU size out of bounds for qfq"); return -EINVAL; } - } else - lmax = psched_mtu(qdisc_dev(sch)); + } inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; - if (cl != NULL && - lmax == cl->agg->lmax && - weight == cl->agg->class_weight) - return 0; /* nothing to change */ + if (cl != NULL) { + sch_tree_lock(sch); + old_weight = cl->agg->class_weight; + old_lmax = cl->agg->lmax; + sch_tree_unlock(sch); + if (lmax == old_lmax && weight == old_weight) + return 0; /* nothing to change */ + } - delta_w = weight - (cl ? cl->agg->class_weight : 0); + delta_w = weight - (cl ? old_weight : 0); if (q->wsum + delta_w > QFQ_MAX_WSUM) { - pr_notice("qfq: total weight out of range (%d + %u)\n", - delta_w, q->wsum); + NL_SET_ERR_MSG_FMT_MOD(extack, + "total weight out of range (%d + %u)", + delta_w, q->wsum); return -EINVAL; } if (cl != NULL) { /* modify existing class */ if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + err = gen_replace_estimator(&cl->bstats, NULL, + &cl->rate_est, + NULL, + true, tca[TCA_RATE]); if (err) return err; @@ -469,28 +480,28 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - cl->refcnt = 1; + gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; + INIT_LIST_HEAD(&cl->alist); - cl->qdisc = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, classid); + cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + classid, NULL); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), + err = gen_new_estimator(&cl->bstats, NULL, + &cl->rate_est, + NULL, + true, tca[TCA_RATE]); if (err) goto destroy_class; } - sch_tree_lock(sch); - qdisc_class_hash_insert(&q->clhash, &cl->common); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); + if (cl->qdisc != &noop_qdisc) + qdisc_hash_add(cl->qdisc, true); set_change_agg: sch_tree_lock(sch); @@ -500,7 +511,7 @@ set_change_agg: new_agg = kzalloc(sizeof(*new_agg), GFP_KERNEL); if (new_agg == NULL) { err = -ENOBUFS; - gen_kill_estimator(&cl->bstats, &cl->rate_est); + gen_kill_estimator(&cl->rate_est); goto destroy_class; } sch_tree_lock(sch); @@ -508,77 +519,65 @@ set_change_agg: } if (existing) qfq_deact_rm_from_agg(q, cl); + else + qdisc_class_hash_insert(&q->clhash, &cl->common); qfq_add_to_agg(q, new_agg, cl); sch_tree_unlock(sch); + qdisc_class_hash_grow(sch, &q->clhash); *arg = (unsigned long)cl; return 0; destroy_class: - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) { - struct qfq_sched *q = qdisc_priv(sch); - - qfq_rm_from_agg(q, cl); - gen_kill_estimator(&cl->bstats, &cl->rate_est); - qdisc_destroy(cl->qdisc); + gen_kill_estimator(&cl->rate_est); + qdisc_put(cl->qdisc); kfree(cl); } -static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) +static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; - if (cl->filter_cnt > 0) + if (qdisc_class_in_use(&cl->common)) { + NL_SET_ERR_MSG_MOD(extack, "QFQ class in use"); return -EBUSY; + } sch_tree_lock(sch); - qfq_purge_queue(cl); + qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->common); - - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ + qfq_rm_from_agg(q, cl); sch_tree_unlock(sch); - return 0; -} - -static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) -{ - struct qfq_class *cl = qfq_find_class(sch, classid); - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; + qfq_destroy_class(sch, cl); + return 0; } -static void qfq_put_class(struct Qdisc *sch, unsigned long arg) +static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid) { - struct qfq_class *cl = (struct qfq_class *)arg; - - if (--cl->refcnt == 0) - qfq_destroy_class(sch, cl); + return (unsigned long)qfq_find_class(sch, classid); } -static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); if (cl) return NULL; - return &q->filter_list; + return q->block; } static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, @@ -586,8 +585,8 @@ static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, { struct qfq_class *cl = qfq_find_class(sch, classid); - if (cl != NULL) - cl->filter_cnt++; + if (cl) + qdisc_class_get(&cl->common); return (unsigned long)cl; } @@ -596,26 +595,23 @@ static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) { struct qfq_class *cl = (struct qfq_class *)arg; - cl->filter_cnt--; + qdisc_class_put(&cl->common); } static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old) + struct Qdisc *new, struct Qdisc **old, + struct netlink_ext_ack *extack) { struct qfq_class *cl = (struct qfq_class *)arg; if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, cl->common.classid); + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + cl->common.classid, NULL); if (new == NULL) new = &noop_qdisc; } - sch_tree_lock(sch); - qfq_purge_queue(cl); - *old = cl->qdisc; - cl->qdisc = new; - sch_tree_unlock(sch); + *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } @@ -631,16 +627,22 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, { struct qfq_class *cl = (struct qfq_class *)arg; struct nlattr *nest; + u32 class_weight, lmax; tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle = cl->common.classid; tcm->tcm_info = cl->qdisc->handle; - nest = nla_nest_start(skb, TCA_OPTIONS); + nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) || - nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax)) + + sch_tree_lock(sch); + class_weight = cl->agg->class_weight; + lmax = cl->agg->lmax; + sch_tree_unlock(sch); + if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) || + nla_put_u32(skb, TCA_QFQ_LMAX, lmax)) goto nla_put_failure; return nla_nest_end(skb, nest); @@ -656,14 +658,15 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct tc_qfq_stats xstats; memset(&xstats, 0, sizeof(xstats)); - cl->qdisc->qstats.qlen = cl->qdisc->q.qlen; + sch_tree_lock(sch); xstats.weight = cl->agg->class_weight; xstats.lmax = cl->agg->lmax; + sch_tree_unlock(sch); - if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + qdisc_qstats_copy(d, cl->qdisc) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); @@ -680,15 +683,8 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } @@ -699,6 +695,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct tcf_result res; + struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { @@ -709,13 +706,16 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, q->filter_list, &res); + fl = rcu_dereference_bh(q->filter_list); + result = tcf_classify(skb, NULL, fl, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: + case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + fallthrough; case TC_ACT_SHOT: return NULL; } @@ -834,38 +834,60 @@ static void qfq_make_eligible(struct qfq_sched *q) } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. + * + * QFQ+ fully satisfies this bound to the slot index if the parameters + * of the classes are not changed dynamically, and if QFQ+ never + * happens to postpone the service of agg unjustly, i.e., it never + * happens that the aggregate becomes backlogged and eligible, or just + * eligible, while an aggregate with a higher approximated finish time + * is being served. In particular, in this case QFQ+ guarantees that + * the timestamps of agg are low enough that the slot index is never + * higher than 2. Unfortunately, QFQ+ cannot provide the same + * guarantee if it happens to unjustly postpone the service of agg, or + * if the parameters of some class are changed. + * + * As for the first event, i.e., an out-of-order service, the + * upper bound to the slot index guaranteed by QFQ+ grows to + * 2 + + * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * + * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1. * - * If the weight and lmax (max_pkt_size) of the classes do not change, - * then QFQ+ does meet the above contraint according to the current - * values of its parameters. In fact, if the weight and lmax of the - * classes do not change, then, from the theory, QFQ+ guarantees that - * the slot index is never higher than - * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * - * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18 + * The following function deals with this problem by backward-shifting + * the timestamps of agg, if needed, so as to guarantee that the slot + * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may + * cause the service of other aggregates to be postponed, yet the + * worst-case guarantees of these aggregates are not violated. In + * fact, in case of no out-of-order service, the timestamps of agg + * would have been even lower than they are after the backward shift, + * because QFQ+ would have guaranteed a maximum value equal to 2 for + * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose + * service is postponed because of the backward-shift would have + * however waited for the service of agg before being served. * - * When the weight of a class is increased or the lmax of the class is - * decreased, a new aggregate with smaller slot size than the original - * parent aggregate of the class may happen to be activated. The - * activation of this aggregate should be properly delayed to when the - * service of the class has finished in the ideal system tracked by - * QFQ+. If the activation of the aggregate is not delayed to this - * reference time instant, then this aggregate may be unjustly served - * before other aggregates waiting for service. This may cause the - * above bound to the slot index to be violated for some of these - * unlucky aggregates. + * The other event that may cause the slot index to be higher than 2 + * for agg is a recent change of the parameters of some class. If the + * weight of a class is increased or the lmax (max_pkt_size) of the + * class is decreased, then a new aggregate with smaller slot size + * than the original parent aggregate of the class may happen to be + * activated. The activation of this aggregate should be properly + * delayed to when the service of the class has finished in the ideal + * system tracked by QFQ+. If the activation of the aggregate is not + * delayed to this reference time instant, then this aggregate may be + * unjustly served before other aggregates waiting for service. This + * may cause the above bound to the slot index to be violated for some + * of these unlucky aggregates. * * Instead of delaying the activation of the new aggregate, which is - * quite complex, the following inaccurate but simple solution is used: - * if the slot index is higher than QFQ_MAX_SLOTS-2, then the - * timestamps of the aggregate are shifted backward so as to let the - * slot index become equal to QFQ_MAX_SLOTS-2. + * quite complex, the above-discussed capping of the slot index is + * used to handle also the consequences of a change of the parameters + * of a class. */ static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, u64 roundedS) @@ -965,19 +987,24 @@ static void qfq_update_eligible(struct qfq_sched *q) } /* Dequeue head packet of the head class in the DRR queue of the aggregate. */ -static void agg_dequeue(struct qfq_aggregate *agg, - struct qfq_class *cl, unsigned int len) +static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, + struct qfq_class *cl, unsigned int len) { - qdisc_dequeue_peeked(cl->qdisc); + struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc); + + if (!skb) + return NULL; cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ - list_del(&cl->alist); - else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { + list_del_init(&cl->alist); + else if (cl->deficit < qdisc_peek_len(cl->qdisc)) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } + + return skb; } static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, @@ -989,7 +1016,7 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, *cl = list_first_entry(&agg->active, struct qfq_class, alist); skb = (*cl)->qdisc->ops->peek((*cl)->qdisc); if (skb == NULL) - WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); + qdisc_warn_nonwc("qfq_dequeue", (*cl)->qdisc); else *len = qdisc_pkt_len(skb); @@ -1124,9 +1151,17 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) return NULL; sch->q.qlen--; + + skb = agg_dequeue(in_serv_agg, cl, len); + + if (!skb) { + sch->q.qlen++; + return NULL; + } + + qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); - agg_dequeue(in_serv_agg, cl, len); /* If lmax is lowered, through qfq_change_class, for a class * owning pending packets with larger size than the new value * of lmax, then the following condition may hold. @@ -1136,7 +1171,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) else in_serv_agg->budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); @@ -1187,8 +1222,10 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb), gso_segs; struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct qfq_aggregate *agg; @@ -1197,40 +1234,43 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl = qfq_classify(skb, sch, &err); if (cl == NULL) { if (err & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); + qdisc_qstats_drop(sch); + __qdisc_drop(skb, to_free); return err; } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); - if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) { + if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", - cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); - err = qfq_change_agg(sch, cl, cl->agg->class_weight, - qdisc_pkt_len(skb)); - if (err) - return err; + cl->agg->lmax, len, cl->common.classid); + err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch, to_free); + } } - err = qdisc_enqueue(skb, cl->qdisc); + gso_segs = qdisc_pkt_segs(skb); + err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); if (net_xmit_drop_count(err)) { cl->qstats.drops++; - sch->qstats.drops++; + qdisc_qstats_drop(sch); } return err; } - bstats_update(&cl->bstats, skb); + _bstats_update(&cl->bstats, len, gso_segs); + sch->qstats.backlog += len; ++sch->q.qlen; agg = cl->agg; - /* if the queue was not empty, then done here */ - if (cl->qdisc->q.qlen != 1) { + /* if the class is active, then done here */ + if (cl_is_active(cl)) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) - == cl && cl->deficit < qdisc_pkt_len(skb)) + == cl && cl->deficit < len) list_move_tail(&cl->alist, &agg->active); return err; @@ -1389,71 +1429,29 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; - if (cl->qdisc->q.qlen == 0) - qfq_deactivate_class(q, cl); -} - -static unsigned int qfq_drop_from_slot(struct qfq_sched *q, - struct hlist_head *slot) -{ - struct qfq_aggregate *agg; - struct qfq_class *cl; - unsigned int len; - - hlist_for_each_entry(agg, slot, next) { - list_for_each_entry(cl, &agg->active, alist) { - - if (!cl->qdisc->ops->drop) - continue; - - len = cl->qdisc->ops->drop(cl->qdisc); - if (len > 0) { - if (cl->qdisc->q.qlen == 0) - qfq_deactivate_class(q, cl); - - return len; - } - } - } - return 0; -} - -static unsigned int qfq_drop(struct Qdisc *sch) -{ - struct qfq_sched *q = qdisc_priv(sch); - struct qfq_group *grp; - unsigned int i, j, len; - - for (i = 0; i <= QFQ_MAX_INDEX; i++) { - grp = &q->groups[i]; - for (j = 0; j < QFQ_MAX_SLOTS; j++) { - len = qfq_drop_from_slot(q, &grp->slots[j]); - if (len > 0) { - sch->q.qlen--; - return len; - } - } - - } - - return 0; + if (list_empty(&cl->alist)) + return; + qfq_deactivate_class(q, cl); } -static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_group *grp; int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); + if (err) + return err; + err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; - if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES) - max_classes = QFQ_MAX_AGG_CLASSES; - else - max_classes = qdisc_dev(sch)->tx_queue_len + 1; + max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1, + QFQ_MAX_AGG_CLASSES); /* max_cl_shift = floor(log_2(max_classes)) */ max_cl_shift = __fls(max_classes); q->max_agg_classes = 1<<max_cl_shift; @@ -1489,7 +1487,6 @@ static void qfq_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } - sch->q.qlen = 0; } static void qfq_destroy_qdisc(struct Qdisc *sch) @@ -1499,11 +1496,12 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) struct hlist_node *next; unsigned int i; - tcf_destroy_chain(&q->filter_list); + tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], common.hnode) { + qfq_rm_from_agg(q, cl); qfq_destroy_class(sch, cl); } } @@ -1513,9 +1511,8 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) static const struct Qdisc_class_ops qfq_class_ops = { .change = qfq_change_class, .delete = qfq_delete_class, - .get = qfq_get_class, - .put = qfq_put_class, - .tcf_chain = qfq_tcf_chain, + .find = qfq_search_class, + .tcf_block = qfq_tcf_block, .bind_tcf = qfq_bind_tcf, .unbind_tcf = qfq_unbind_tcf, .graft = qfq_graft_class, @@ -1533,12 +1530,12 @@ static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { .enqueue = qfq_enqueue, .dequeue = qfq_dequeue, .peek = qdisc_peek_dequeued, - .drop = qfq_drop, .init = qfq_init_qdisc, .reset = qfq_reset_qdisc, .destroy = qfq_destroy_qdisc, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("qfq"); static int __init qfq_init(void) { @@ -1553,3 +1550,4 @@ static void __exit qfq_exit(void) module_init(qfq_init); module_exit(qfq_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc"); |
