diff options
Diffstat (limited to 'net/sched/sch_fq_codel.c')
| -rw-r--r-- | net/sched/sch_fq_codel.c | 202 |
1 files changed, 115 insertions, 87 deletions
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 337f2d6d81e4..dc187c7f06b1 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -1,11 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Fair Queue CoDel discipline * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> */ @@ -18,7 +14,6 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/skbuff.h> -#include <linux/jhash.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/netlink.h> @@ -49,7 +44,6 @@ struct fq_codel_flow { struct sk_buff *tail; struct list_head flowchain; int deficit; - u32 dropped; /* number of drops (or ECN marks) on this flow */ struct codel_vars cvars; }; /* please try to keep this structure <= 64 bytes */ @@ -97,7 +91,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tcf_classify(skb, filter, &res, false); + result = tcf_classify(skb, NULL, filter, &res, false); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -105,6 +99,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + fallthrough; case TC_ACT_SHOT: return 0; } @@ -123,7 +118,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow) struct sk_buff *skb = flow->head; flow->head = skb->next; - skb->next = NULL; + skb_mark_not_on_list(skb); return skb; } @@ -173,10 +168,12 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, skb = dequeue_head(flow); len += qdisc_pkt_len(skb); mem += get_codel_cb(skb)->mem_usage; + tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); - flow->dropped += i; + /* Tell codel to increase its signal strength also */ + flow->cvars.count += i; q->backlogs[idx] -= len; q->memory_usage -= mem; sch->qstats.drops += i; @@ -191,7 +188,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct fq_codel_sched_data *q = qdisc_priv(sch); unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; - int uninitialized_var(ret); + int ret; unsigned int pkt_len; bool memory_limited; @@ -214,7 +211,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; flow->deficit = q->quantum; - flow->dropped = 0; } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -279,7 +275,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb(skb); + qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } @@ -289,7 +285,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct fq_codel_flow *flow; struct list_head *head; - u32 prev_drop_count, prev_ecn_mark; begin: head = &q->new_flows; @@ -306,16 +301,10 @@ begin: goto begin; } - prev_drop_count = q->cstats.drop_count; - prev_ecn_mark = q->cstats.ecn_mark; - skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, &flow->cvars, &q->cstats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - flow->dropped += q->cstats.drop_count - prev_drop_count; - flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; - if (!skb) { /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && !list_empty(&q->old_flows)) @@ -326,10 +315,8 @@ begin: } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->cstats.drop_count && sch->q.qlen) { + + if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; @@ -359,8 +346,6 @@ static void fq_codel_reset(struct Qdisc *sch) codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); - sch->q.qlen = 0; - sch->qstats.backlog = 0; q->memory_usage = 0; } @@ -374,19 +359,21 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, [TCA_FQ_CODEL_MEMORY_LIMIT] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR] = { .type = NLA_U8 }, + [TCA_FQ_CODEL_CE_THRESHOLD_MASK] = { .type = NLA_U8 }, }; -static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) +static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { + unsigned int dropped_pkts = 0, dropped_bytes = 0; struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; + u32 quantum = 0; int err; - if (!opt) - return -EINVAL; - - err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy, - NULL); + err = nla_parse_nested_deprecated(tb, TCA_FQ_CODEL_MAX, opt, + fq_codel_policy, NULL); if (err < 0) return err; if (tb[TCA_FQ_CODEL_FLOWS]) { @@ -397,52 +384,74 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) q->flows_cnt > 65536) return -EINVAL; } + if (tb[TCA_FQ_CODEL_QUANTUM]) { + quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum > FQ_CODEL_QUANTUM_MAX) { + NL_SET_ERR_MSG(extack, "Invalid quantum"); + return -EINVAL; + } + } sch_tree_lock(sch); if (tb[TCA_FQ_CODEL_TARGET]) { u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]); - q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->cparams.target, + (target * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) { u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]); - q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->cparams.ce_threshold, + (val * NSEC_PER_USEC) >> CODEL_SHIFT); } + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]) + WRITE_ONCE(q->cparams.ce_threshold_selector, + nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])); + if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]) + WRITE_ONCE(q->cparams.ce_threshold_mask, + nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])); + if (tb[TCA_FQ_CODEL_INTERVAL]) { u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]); - q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT; + WRITE_ONCE(q->cparams.interval, + (interval * NSEC_PER_USEC) >> CODEL_SHIFT); } if (tb[TCA_FQ_CODEL_LIMIT]) - sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]); + WRITE_ONCE(sch->limit, + nla_get_u32(tb[TCA_FQ_CODEL_LIMIT])); if (tb[TCA_FQ_CODEL_ECN]) - q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); + WRITE_ONCE(q->cparams.ecn, + !!nla_get_u32(tb[TCA_FQ_CODEL_ECN])); - if (tb[TCA_FQ_CODEL_QUANTUM]) - q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum) + WRITE_ONCE(q->quantum, quantum); if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) - q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); + WRITE_ONCE(q->drop_batch_size, + max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]))); if (tb[TCA_FQ_CODEL_MEMORY_LIMIT]) - q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])); + WRITE_ONCE(q->memory_limit, + min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]))); while (sch->q.qlen > sch->limit || q->memory_usage > q->memory_limit) { - struct sk_buff *skb = fq_codel_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); - q->cstats.drop_len += qdisc_pkt_len(skb); + if (!skb) + break; + + dropped_pkts++; + dropped_bytes += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); - q->cstats.drop_count++; } - qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); - q->cstats.drop_count = 0; - q->cstats.drop_len = 0; + qdisc_tree_reduce_backlog(sch, dropped_pkts, dropped_bytes); sch_tree_unlock(sch); return 0; @@ -457,7 +466,8 @@ static void fq_codel_destroy(struct Qdisc *sch) kvfree(q->flows); } -static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) +static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) { struct fq_codel_sched_data *q = qdisc_priv(sch); int i; @@ -476,24 +486,27 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) q->cparams.mtu = psched_mtu(qdisc_dev(sch)); if (opt) { - int err = fq_codel_change(sch, opt); + err = fq_codel_change(sch, opt, extack); if (err) - return err; + goto init_failure; } - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch, extack); if (err) - return err; + goto init_failure; if (!q->flows) { - q->flows = kvzalloc(q->flows_cnt * - sizeof(struct fq_codel_flow), GFP_KERNEL); - if (!q->flows) - return -ENOMEM; - q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); + q->flows = kvcalloc(q->flows_cnt, + sizeof(struct fq_codel_flow), + GFP_KERNEL); + if (!q->flows) { + err = -ENOMEM; + goto init_failure; + } + q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL); if (!q->backlogs) { - kvfree(q->flows); - return -ENOMEM; + err = -ENOMEM; + goto alloc_failure; } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; @@ -506,40 +519,59 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_CAN_BYPASS; else sch->flags &= ~TCQ_F_CAN_BYPASS; + + sch->flags |= TCQ_F_DEQUEUE_DROPS; + return 0; + +alloc_failure: + kvfree(q->flows); + q->flows = NULL; +init_failure: + q->flows_cnt = 0; + return err; } static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_codel_sched_data *q = qdisc_priv(sch); + codel_time_t ce_threshold; struct nlattr *opts; - opts = nla_nest_start(skb, TCA_OPTIONS); + opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET, - codel_time_to_us(q->cparams.target)) || + codel_time_to_us(READ_ONCE(q->cparams.target))) || nla_put_u32(skb, TCA_FQ_CODEL_LIMIT, - sch->limit) || + READ_ONCE(sch->limit)) || nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL, - codel_time_to_us(q->cparams.interval)) || + codel_time_to_us(READ_ONCE(q->cparams.interval))) || nla_put_u32(skb, TCA_FQ_CODEL_ECN, - q->cparams.ecn) || + READ_ONCE(q->cparams.ecn)) || nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, - q->quantum) || + READ_ONCE(q->quantum)) || nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE, - q->drop_batch_size) || + READ_ONCE(q->drop_batch_size)) || nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT, - q->memory_limit) || + READ_ONCE(q->memory_limit)) || nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, - q->flows_cnt)) + READ_ONCE(q->flows_cnt))) goto nla_put_failure; - if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD && - nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, - codel_time_to_us(q->cparams.ce_threshold))) - goto nla_put_failure; + ce_threshold = READ_ONCE(q->cparams.ce_threshold); + if (ce_threshold != CODEL_DISABLED_THRESHOLD) { + if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD, + codel_time_to_us(ce_threshold))) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, + READ_ONCE(q->cparams.ce_threshold_selector))) + goto nla_put_failure; + if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, + READ_ONCE(q->cparams.ce_threshold_mask))) + goto nla_put_failure; + } return nla_nest_end(skb, opts); @@ -579,7 +611,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid) +static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid) { return 0; } @@ -587,16 +619,15 @@ static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid) static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - /* we cannot bypass queue discipline anymore */ - sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } -static void fq_codel_put(struct Qdisc *q, unsigned long cl) +static void fq_codel_unbind(struct Qdisc *q, unsigned long cl) { } -static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl) +static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -650,7 +681,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch_tree_unlock(sch); } qs.backlog = q->backlogs[idx]; - qs.drops = flow->dropped; + qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; @@ -668,26 +699,21 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->flows_cnt; i++) { - if (list_empty(&q->flows[i].flowchain) || - arg->count < arg->skip) { + if (list_empty(&q->flows[i].flowchain)) { arg->count++; continue; } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } static const struct Qdisc_class_ops fq_codel_class_ops = { .leaf = fq_codel_leaf, - .get = fq_codel_get, - .put = fq_codel_put, + .find = fq_codel_find, .tcf_block = fq_codel_tcf_block, .bind_tcf = fq_codel_bind, - .unbind_tcf = fq_codel_put, + .unbind_tcf = fq_codel_unbind, .dump = fq_codel_dump_class, .dump_stats = fq_codel_dump_class_stats, .walk = fq_codel_walk, @@ -708,6 +734,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .dump_stats = fq_codel_dump_stats, .owner = THIS_MODULE, }; +MODULE_ALIAS_NET_SCH("fq_codel"); static int __init fq_codel_module_init(void) { @@ -723,3 +750,4 @@ module_init(fq_codel_module_init) module_exit(fq_codel_module_exit) MODULE_AUTHOR("Eric Dumazet"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Fair Queue CoDel discipline"); |
